Fix reading rows with empty columns at the end (#127)
* Fix reading rows with empty columns at the end * Update CHANGELOG for the recovery logs
This commit is contained in:
parent
92dfc93b20
commit
276e09d7d3
@ -22,6 +22,8 @@
|
||||
* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query.
|
||||
* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for
|
||||
the entire query. It can be added only at the end of the entire Cypher query.
|
||||
* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain
|
||||
additional information that is printed out while the recovery is in progress.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
|
@ -61,12 +61,7 @@ void Reader::TryInitializeHeader() {
|
||||
const Reader::Header &Reader::GetHeader() const { return header_; }
|
||||
|
||||
namespace {
|
||||
enum class CsvParserState : uint8_t {
|
||||
INITIAL_FIELD,
|
||||
NEXT_FIELD,
|
||||
QUOTING,
|
||||
EXPECT_DELIMITER,
|
||||
};
|
||||
enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE };
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -89,7 +84,7 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
||||
|
||||
std::string_view line_string_view = *maybe_line;
|
||||
|
||||
while (!line_string_view.empty()) {
|
||||
while (state != CsvParserState::DONE && !line_string_view.empty()) {
|
||||
const auto c = line_string_view[0];
|
||||
|
||||
// Line feeds and carriage returns are ignored in CSVs.
|
||||
@ -120,11 +115,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
||||
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
|
||||
row.emplace_back(line_string_view.substr(0, delimiter_idx));
|
||||
if (delimiter_idx == std::string_view::npos) {
|
||||
line_string_view.remove_prefix(line_string_view.size());
|
||||
state = CsvParserState::DONE;
|
||||
} else {
|
||||
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
}
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -159,15 +154,21 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CsvParserState::DONE: {
|
||||
LOG_FATAL("Invalid state of the CSV parser!");
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (state == CsvParserState::QUOTING);
|
||||
|
||||
switch (state) {
|
||||
case CsvParserState::INITIAL_FIELD:
|
||||
case CsvParserState::NEXT_FIELD:
|
||||
case CsvParserState::DONE:
|
||||
case CsvParserState::EXPECT_DELIMITER:
|
||||
break;
|
||||
case CsvParserState::NEXT_FIELD:
|
||||
row.emplace_back("");
|
||||
break;
|
||||
case CsvParserState::QUOTING: {
|
||||
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
|
||||
"There is no more data left to load while inside a quoted string. "
|
||||
|
@ -283,3 +283,35 @@ TEST_F(CsvReaderTest, MultilineQuotedString) {
|
||||
parsed_row = reader.GetNextRow(mem);
|
||||
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
|
||||
}
|
||||
|
||||
TEST_F(CsvReaderTest, EmptyColumns) {
|
||||
// create a file with all rows valid;
|
||||
// parser should return 'std::nullopt'
|
||||
const auto filepath = csv_directory / "bla.csv";
|
||||
auto writer = FileWriter(filepath);
|
||||
|
||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||
|
||||
const utils::pmr::string delimiter{",", mem};
|
||||
const utils::pmr::string quote{"\"", mem};
|
||||
|
||||
std::vector<std::vector<std::string>> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}};
|
||||
|
||||
for (const auto &row : expected_rows) {
|
||||
writer.WriteLine(CreateRow(row, delimiter));
|
||||
}
|
||||
|
||||
writer.Close();
|
||||
|
||||
const bool with_header = false;
|
||||
const bool ignore_bad = false;
|
||||
const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote};
|
||||
auto reader = csv::Reader(filepath, cfg);
|
||||
|
||||
for (const auto &expected_row : expected_rows) {
|
||||
const auto pmr_expected_row = ToPmrColumns(expected_row);
|
||||
const auto parsed_row = reader.GetNextRow(mem);
|
||||
ASSERT_TRUE(parsed_row.has_value());
|
||||
ASSERT_EQ(*parsed_row, pmr_expected_row);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user