Fix reading rows with empty columns at the end (#127)

* Fix reading rows with empty columns at the end

* Update CHANGELOG for the recovery logs
This commit is contained in:
antonio2368 2021-03-27 09:47:41 +01:00 committed by GitHub
parent 92dfc93b20
commit 276e09d7d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 10 deletions

View File

@ -22,6 +22,8 @@
* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query.
* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for
the entire query. It can be added only at the end of the entire Cypher query.
* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain
additional information that is printed out while the recovery is in progress.
### Bug Fixes

View File

@ -61,12 +61,7 @@ void Reader::TryInitializeHeader() {
const Reader::Header &Reader::GetHeader() const { return header_; }
namespace {
enum class CsvParserState : uint8_t {
INITIAL_FIELD,
NEXT_FIELD,
QUOTING,
EXPECT_DELIMITER,
};
enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE };
} // namespace
@ -89,7 +84,7 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
std::string_view line_string_view = *maybe_line;
while (!line_string_view.empty()) {
while (state != CsvParserState::DONE && !line_string_view.empty()) {
const auto c = line_string_view[0];
// Line feeds and carriage returns are ignored in CSVs.
@ -120,11 +115,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
row.emplace_back(line_string_view.substr(0, delimiter_idx));
if (delimiter_idx == std::string_view::npos) {
line_string_view.remove_prefix(line_string_view.size());
state = CsvParserState::DONE;
} else {
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
state = CsvParserState::NEXT_FIELD;
}
state = CsvParserState::NEXT_FIELD;
}
break;
}
@ -159,15 +154,21 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
}
break;
}
case CsvParserState::DONE: {
LOG_FATAL("Invalid state of the CSV parser!");
}
}
}
} while (state == CsvParserState::QUOTING);
switch (state) {
case CsvParserState::INITIAL_FIELD:
case CsvParserState::NEXT_FIELD:
case CsvParserState::DONE:
case CsvParserState::EXPECT_DELIMITER:
break;
case CsvParserState::NEXT_FIELD:
row.emplace_back("");
break;
case CsvParserState::QUOTING: {
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
"There is no more data left to load while inside a quoted string. "

View File

@ -283,3 +283,35 @@ TEST_F(CsvReaderTest, MultilineQuotedString) {
parsed_row = reader.GetNextRow(mem);
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
}
TEST_F(CsvReaderTest, EmptyColumns) {
// create a file with all rows valid;
// parser should return 'std::nullopt'
const auto filepath = csv_directory / "bla.csv";
auto writer = FileWriter(filepath);
utils::MemoryResource *mem(utils::NewDeleteResource());
const utils::pmr::string delimiter{",", mem};
const utils::pmr::string quote{"\"", mem};
std::vector<std::vector<std::string>> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}};
for (const auto &row : expected_rows) {
writer.WriteLine(CreateRow(row, delimiter));
}
writer.Close();
const bool with_header = false;
const bool ignore_bad = false;
const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote};
auto reader = csv::Reader(filepath, cfg);
for (const auto &expected_row : expected_rows) {
const auto pmr_expected_row = ToPmrColumns(expected_row);
const auto parsed_row = reader.GetNextRow(mem);
ASSERT_TRUE(parsed_row.has_value());
ASSERT_EQ(*parsed_row, pmr_expected_row);
}
}