Fix reading rows with empty columns at the end (#127)
* Fix reading rows with empty columns at the end * Update CHANGELOG for the recovery logs
This commit is contained in:
parent
92dfc93b20
commit
276e09d7d3
@ -22,6 +22,8 @@
|
|||||||
* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query.
|
* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query.
|
||||||
* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for
|
* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for
|
||||||
the entire query. It can be added only at the end of the entire Cypher query.
|
the entire query. It can be added only at the end of the entire Cypher query.
|
||||||
|
* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain
|
||||||
|
additional information that is printed out while the recovery is in progress.
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
|
@ -61,12 +61,7 @@ void Reader::TryInitializeHeader() {
|
|||||||
const Reader::Header &Reader::GetHeader() const { return header_; }
|
const Reader::Header &Reader::GetHeader() const { return header_; }
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
enum class CsvParserState : uint8_t {
|
enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE };
|
||||||
INITIAL_FIELD,
|
|
||||||
NEXT_FIELD,
|
|
||||||
QUOTING,
|
|
||||||
EXPECT_DELIMITER,
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
@ -89,7 +84,7 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
|||||||
|
|
||||||
std::string_view line_string_view = *maybe_line;
|
std::string_view line_string_view = *maybe_line;
|
||||||
|
|
||||||
while (!line_string_view.empty()) {
|
while (state != CsvParserState::DONE && !line_string_view.empty()) {
|
||||||
const auto c = line_string_view[0];
|
const auto c = line_string_view[0];
|
||||||
|
|
||||||
// Line feeds and carriage returns are ignored in CSVs.
|
// Line feeds and carriage returns are ignored in CSVs.
|
||||||
@ -120,11 +115,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
|||||||
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
|
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
|
||||||
row.emplace_back(line_string_view.substr(0, delimiter_idx));
|
row.emplace_back(line_string_view.substr(0, delimiter_idx));
|
||||||
if (delimiter_idx == std::string_view::npos) {
|
if (delimiter_idx == std::string_view::npos) {
|
||||||
line_string_view.remove_prefix(line_string_view.size());
|
state = CsvParserState::DONE;
|
||||||
} else {
|
} else {
|
||||||
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
|
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
|
||||||
|
state = CsvParserState::NEXT_FIELD;
|
||||||
}
|
}
|
||||||
state = CsvParserState::NEXT_FIELD;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -159,15 +154,21 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CsvParserState::DONE: {
|
||||||
|
LOG_FATAL("Invalid state of the CSV parser!");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (state == CsvParserState::QUOTING);
|
} while (state == CsvParserState::QUOTING);
|
||||||
|
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case CsvParserState::INITIAL_FIELD:
|
case CsvParserState::INITIAL_FIELD:
|
||||||
case CsvParserState::NEXT_FIELD:
|
case CsvParserState::DONE:
|
||||||
case CsvParserState::EXPECT_DELIMITER:
|
case CsvParserState::EXPECT_DELIMITER:
|
||||||
break;
|
break;
|
||||||
|
case CsvParserState::NEXT_FIELD:
|
||||||
|
row.emplace_back("");
|
||||||
|
break;
|
||||||
case CsvParserState::QUOTING: {
|
case CsvParserState::QUOTING: {
|
||||||
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
|
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
|
||||||
"There is no more data left to load while inside a quoted string. "
|
"There is no more data left to load while inside a quoted string. "
|
||||||
|
@ -283,3 +283,35 @@ TEST_F(CsvReaderTest, MultilineQuotedString) {
|
|||||||
parsed_row = reader.GetNextRow(mem);
|
parsed_row = reader.GetNextRow(mem);
|
||||||
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
|
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(CsvReaderTest, EmptyColumns) {
|
||||||
|
// create a file with all rows valid;
|
||||||
|
// parser should return 'std::nullopt'
|
||||||
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
|
auto writer = FileWriter(filepath);
|
||||||
|
|
||||||
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
|
const utils::pmr::string delimiter{",", mem};
|
||||||
|
const utils::pmr::string quote{"\"", mem};
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}};
|
||||||
|
|
||||||
|
for (const auto &row : expected_rows) {
|
||||||
|
writer.WriteLine(CreateRow(row, delimiter));
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.Close();
|
||||||
|
|
||||||
|
const bool with_header = false;
|
||||||
|
const bool ignore_bad = false;
|
||||||
|
const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote};
|
||||||
|
auto reader = csv::Reader(filepath, cfg);
|
||||||
|
|
||||||
|
for (const auto &expected_row : expected_rows) {
|
||||||
|
const auto pmr_expected_row = ToPmrColumns(expected_row);
|
||||||
|
const auto parsed_row = reader.GetNextRow(mem);
|
||||||
|
ASSERT_TRUE(parsed_row.has_value());
|
||||||
|
ASSERT_EQ(*parsed_row, pmr_expected_row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user