Fix reading rows with empty columns at the end (#127)

* Fix reading rows with empty columns at the end

* Update CHANGELOG for the recovery logs
This commit is contained in:
antonio2368 2021-03-27 09:47:41 +01:00 committed by GitHub
parent 92dfc93b20
commit 276e09d7d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 10 deletions

View File

@ -22,6 +22,8 @@
* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query. * Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query.
* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for * Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for
the entire query. It can be added only at the end of the entire Cypher query. the entire query. It can be added only at the end of the entire Cypher query.
* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain
additional information that is printed out while the recovery is in progress.
### Bug Fixes ### Bug Fixes

View File

@ -61,12 +61,7 @@ void Reader::TryInitializeHeader() {
const Reader::Header &Reader::GetHeader() const { return header_; } const Reader::Header &Reader::GetHeader() const { return header_; }
namespace { namespace {
enum class CsvParserState : uint8_t { enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE };
INITIAL_FIELD,
NEXT_FIELD,
QUOTING,
EXPECT_DELIMITER,
};
} // namespace } // namespace
@ -89,7 +84,7 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
std::string_view line_string_view = *maybe_line; std::string_view line_string_view = *maybe_line;
while (!line_string_view.empty()) { while (state != CsvParserState::DONE && !line_string_view.empty()) {
const auto c = line_string_view[0]; const auto c = line_string_view[0];
// Line feeds and carriage returns are ignored in CSVs. // Line feeds and carriage returns are ignored in CSVs.
@ -120,11 +115,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter); const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
row.emplace_back(line_string_view.substr(0, delimiter_idx)); row.emplace_back(line_string_view.substr(0, delimiter_idx));
if (delimiter_idx == std::string_view::npos) { if (delimiter_idx == std::string_view::npos) {
line_string_view.remove_prefix(line_string_view.size()); state = CsvParserState::DONE;
} else { } else {
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size()); line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
state = CsvParserState::NEXT_FIELD;
} }
state = CsvParserState::NEXT_FIELD;
} }
break; break;
} }
@ -159,15 +154,21 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
} }
break; break;
} }
case CsvParserState::DONE: {
LOG_FATAL("Invalid state of the CSV parser!");
}
} }
} }
} while (state == CsvParserState::QUOTING); } while (state == CsvParserState::QUOTING);
switch (state) { switch (state) {
case CsvParserState::INITIAL_FIELD: case CsvParserState::INITIAL_FIELD:
case CsvParserState::NEXT_FIELD: case CsvParserState::DONE:
case CsvParserState::EXPECT_DELIMITER: case CsvParserState::EXPECT_DELIMITER:
break; break;
case CsvParserState::NEXT_FIELD:
row.emplace_back("");
break;
case CsvParserState::QUOTING: { case CsvParserState::QUOTING: {
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE, return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
"There is no more data left to load while inside a quoted string. " "There is no more data left to load while inside a quoted string. "

View File

@ -283,3 +283,35 @@ TEST_F(CsvReaderTest, MultilineQuotedString) {
parsed_row = reader.GetNextRow(mem); parsed_row = reader.GetNextRow(mem);
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline)); ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
} }
TEST_F(CsvReaderTest, EmptyColumns) {
// create a file with all rows valid;
// parser should return 'std::nullopt'
const auto filepath = csv_directory / "bla.csv";
auto writer = FileWriter(filepath);
utils::MemoryResource *mem(utils::NewDeleteResource());
const utils::pmr::string delimiter{",", mem};
const utils::pmr::string quote{"\"", mem};
std::vector<std::vector<std::string>> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}};
for (const auto &row : expected_rows) {
writer.WriteLine(CreateRow(row, delimiter));
}
writer.Close();
const bool with_header = false;
const bool ignore_bad = false;
const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote};
auto reader = csv::Reader(filepath, cfg);
for (const auto &expected_row : expected_rows) {
const auto pmr_expected_row = ToPmrColumns(expected_row);
const auto parsed_row = reader.GetNextRow(mem);
ASSERT_TRUE(parsed_row.has_value());
ASSERT_EQ(*parsed_row, pmr_expected_row);
}
}