parent
f950a91732
commit
d913a67e16
@ -3720,8 +3720,10 @@ auto ToOptionalString(ExpressionEvaluator *evaluator, Expression *expression) ->
|
||||
|
||||
TypedValue CsvRowToTypedList(csv::Reader::Row row, utils::MemoryResource *mem) {
|
||||
auto typed_columns = utils::pmr::vector<TypedValue>(mem);
|
||||
std::transform(begin(row), end(row), std::back_inserter(typed_columns),
|
||||
[mem = mem](auto &column) { return TypedValue(column, mem); });
|
||||
typed_columns.reserve(row.size());
|
||||
for (auto &column : row) {
|
||||
typed_columns.emplace_back(std::move(column));
|
||||
}
|
||||
return TypedValue(typed_columns, mem);
|
||||
}
|
||||
|
||||
@ -3729,7 +3731,7 @@ TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header, ut
|
||||
// a valid row has the same number of elements as the header
|
||||
utils::pmr::map<utils::pmr::string, TypedValue> m(mem);
|
||||
for (auto i = 0; i < row.size(); ++i) {
|
||||
m.emplace(header[i], TypedValue(row[i], mem));
|
||||
m.emplace(std::move(header[i]), std::move(row[i]));
|
||||
}
|
||||
return TypedValue(m, mem);
|
||||
}
|
||||
@ -3800,9 +3802,10 @@ class LoadCsvCursor : public Cursor {
|
||||
|
||||
// no need to check if maybe_file is std::nullopt, as the parser makes sure
|
||||
// we can't get a nullptr for the 'file_' member in the LoadCsv clause
|
||||
return csv::Reader(*maybe_file,
|
||||
csv::Reader::Config(self_->with_header_, self_->ignore_bad_, maybe_delim, maybe_quote),
|
||||
eval_context->memory);
|
||||
return csv::Reader(
|
||||
*maybe_file,
|
||||
csv::Reader::Config(self_->with_header_, self_->ignore_bad_, std::move(maybe_delim), std::move(maybe_quote)),
|
||||
eval_context->memory);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -65,17 +65,17 @@ enum class CsvParserState : uint8_t {
|
||||
INITIAL_FIELD,
|
||||
NEXT_FIELD,
|
||||
QUOTING,
|
||||
NOT_QUOTING,
|
||||
EXPECT_DELIMITER,
|
||||
};
|
||||
|
||||
bool SubstringStartsWith(const std::string_view str, size_t pos, const std::string_view what) {
|
||||
return utils::StartsWith(utils::Substr(str, pos), what);
|
||||
}
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
Reader::ParsingResult Reader::ParseRow() {
|
||||
utils::pmr::vector<utils::pmr::string> row(memory_);
|
||||
if (number_of_columns_ != 0) {
|
||||
row.reserve(number_of_columns_);
|
||||
}
|
||||
|
||||
utils::pmr::string column(memory_);
|
||||
|
||||
auto state = CsvParserState::INITIAL_FIELD;
|
||||
@ -87,11 +87,16 @@ Reader::ParsingResult Reader::ParseRow() {
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < maybe_line->size(); ++i) {
|
||||
const auto c = (*maybe_line)[i];
|
||||
std::string_view line_string_view = *maybe_line;
|
||||
|
||||
while (!line_string_view.empty()) {
|
||||
const auto c = line_string_view[0];
|
||||
|
||||
// Line feeds and carriage returns are ignored in CSVs.
|
||||
if (c == '\n' || c == '\r') continue;
|
||||
if (c == '\n' || c == '\r') {
|
||||
line_string_view.remove_prefix(1);
|
||||
continue;
|
||||
}
|
||||
// Null bytes aren't allowed in CSVs.
|
||||
if (c == '\0') {
|
||||
return ParseError(ParseError::ErrorCode::NULL_BYTE,
|
||||
@ -101,53 +106,52 @@ Reader::ParsingResult Reader::ParseRow() {
|
||||
switch (state) {
|
||||
case CsvParserState::INITIAL_FIELD:
|
||||
case CsvParserState::NEXT_FIELD: {
|
||||
if (SubstringStartsWith(*maybe_line, i, *read_config_.quote)) {
|
||||
if (utils::StartsWith(line_string_view, *read_config_.quote)) {
|
||||
// The current field is a quoted field.
|
||||
state = CsvParserState::QUOTING;
|
||||
i += read_config_.quote->size() - 1;
|
||||
} else if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) {
|
||||
line_string_view.remove_prefix(read_config_.quote->size());
|
||||
} else if (utils::StartsWith(line_string_view, *read_config_.delimiter)) {
|
||||
// The current field has an empty value.
|
||||
row.emplace_back("");
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
i += read_config_.delimiter->size() - 1;
|
||||
line_string_view.remove_prefix(read_config_.delimiter->size());
|
||||
} else {
|
||||
// The current field is a regular field.
|
||||
column.push_back(c);
|
||||
state = CsvParserState::NOT_QUOTING;
|
||||
const auto delimiter_idx = line_string_view.find(*read_config_.delimiter);
|
||||
row.emplace_back(line_string_view.substr(0, delimiter_idx));
|
||||
if (delimiter_idx == std::string_view::npos) {
|
||||
line_string_view.remove_prefix(line_string_view.size());
|
||||
} else {
|
||||
line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size());
|
||||
}
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CsvParserState::QUOTING: {
|
||||
auto quote_now = SubstringStartsWith(*maybe_line, i, *read_config_.quote);
|
||||
auto quote_next = SubstringStartsWith(*maybe_line, i + read_config_.quote->size(), *read_config_.quote);
|
||||
const auto quote_now = utils::StartsWith(line_string_view, *read_config_.quote);
|
||||
const auto quote_next =
|
||||
utils::StartsWith(line_string_view.substr(read_config_.quote->size()), *read_config_.quote);
|
||||
if (quote_now && quote_next) {
|
||||
// This is an escaped quote character.
|
||||
column += *read_config_.quote;
|
||||
i += read_config_.quote->size() * 2 - 1;
|
||||
} else if (quote_now && !quote_next) {
|
||||
line_string_view.remove_prefix(read_config_.quote->size() * 2);
|
||||
} else if (quote_now) {
|
||||
// This is the end of the quoted field.
|
||||
row.emplace_back(std::move(column));
|
||||
column.clear();
|
||||
state = CsvParserState::EXPECT_DELIMITER;
|
||||
i += read_config_.quote->size() - 1;
|
||||
} else {
|
||||
column.push_back(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CsvParserState::NOT_QUOTING: {
|
||||
if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) {
|
||||
row.emplace_back(std::move(column));
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
i += read_config_.delimiter->size() - 1;
|
||||
line_string_view.remove_prefix(read_config_.quote->size());
|
||||
} else {
|
||||
column.push_back(c);
|
||||
line_string_view.remove_prefix(1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CsvParserState::EXPECT_DELIMITER: {
|
||||
if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) {
|
||||
if (utils::StartsWith(line_string_view, *read_config_.delimiter)) {
|
||||
state = CsvParserState::NEXT_FIELD;
|
||||
i += read_config_.delimiter->size() - 1;
|
||||
line_string_view.remove_prefix(read_config_.delimiter->size());
|
||||
} else {
|
||||
return ParseError(ParseError::ErrorCode::UNEXPECTED_TOKEN,
|
||||
fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}' at line {:d}",
|
||||
@ -160,26 +164,16 @@ Reader::ParsingResult Reader::ParseRow() {
|
||||
} while (state == CsvParserState::QUOTING);
|
||||
|
||||
switch (state) {
|
||||
case CsvParserState::INITIAL_FIELD: {
|
||||
case CsvParserState::INITIAL_FIELD:
|
||||
case CsvParserState::NEXT_FIELD:
|
||||
case CsvParserState::EXPECT_DELIMITER:
|
||||
break;
|
||||
}
|
||||
case CsvParserState::NEXT_FIELD: {
|
||||
row.emplace_back(std::move(column));
|
||||
break;
|
||||
}
|
||||
case CsvParserState::QUOTING: {
|
||||
return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE,
|
||||
"There is no more data left to load while inside a quoted string. "
|
||||
"Did you forget to close the quote?");
|
||||
break;
|
||||
}
|
||||
case CsvParserState::NOT_QUOTING: {
|
||||
row.emplace_back(std::move(column));
|
||||
break;
|
||||
}
|
||||
case CsvParserState::EXPECT_DELIMITER: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// reached the end of file - return empty row
|
||||
@ -202,7 +196,7 @@ Reader::ParsingResult Reader::ParseRow() {
|
||||
line_count_ - 1, row.size()));
|
||||
}
|
||||
|
||||
return row;
|
||||
return std::move(row);
|
||||
}
|
||||
|
||||
// Returns Reader::Row if the read row if valid;
|
||||
@ -231,7 +225,7 @@ std::optional<Reader::Row> Reader::GetNextRow() {
|
||||
// reached end of file
|
||||
return std::nullopt;
|
||||
}
|
||||
return *row;
|
||||
return std::move(*row);
|
||||
}
|
||||
|
||||
} // namespace csv
|
||||
|
@ -437,7 +437,7 @@ inline std::string Escape(const std::string_view &src) {
|
||||
inline std::string_view Substr(const std::string_view &string, size_t pos = 0, size_t count = std::string::npos) {
|
||||
if (pos >= string.size()) return std::string_view(string.data(), 0);
|
||||
auto len = std::min(string.size() - pos, count);
|
||||
return std::string_view(string.data() + pos, len);
|
||||
return string.substr(pos, len);
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
|
Loading…
Reference in New Issue
Block a user