Ignore carriage return at the end of line for a CSV file (#151)
This commit is contained in:
parent
1def0c9104
commit
30413a7b4f
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
* Fixed parsing of types for Python procedures for types nested in `mgp.List`.
|
* Fixed parsing of types for Python procedures for types nested in `mgp.List`.
|
||||||
For example, parsing of `mgp.List[mgp.Map]` works now.
|
For example, parsing of `mgp.List[mgp.Map]` works now.
|
||||||
|
* Fixed reading CSV files that are using CRLF as the newline symbol.
|
||||||
|
|
||||||
## v1.4.0
|
## v1.4.0
|
||||||
|
|
||||||
|
@ -84,6 +84,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) {
|
|||||||
|
|
||||||
std::string_view line_string_view = *maybe_line;
|
std::string_view line_string_view = *maybe_line;
|
||||||
|
|
||||||
|
// remove '\r' from the end in case we have dos file format
|
||||||
|
if (line_string_view.back() == '\r') {
|
||||||
|
line_string_view.remove_suffix(1);
|
||||||
|
}
|
||||||
|
|
||||||
while (state != CsvParserState::DONE && !line_string_view.empty()) {
|
while (state != CsvParserState::DONE && !line_string_view.empty()) {
|
||||||
const auto c = line_string_view[0];
|
const auto c = line_string_view[0];
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#include "utils/string.hpp"
|
#include "utils/string.hpp"
|
||||||
|
|
||||||
class CsvReaderTest : public ::testing::Test {
|
class CsvReaderTest : public ::testing::TestWithParam<const char *> {
|
||||||
protected:
|
protected:
|
||||||
const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"};
|
const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"};
|
||||||
|
|
||||||
@ -30,7 +30,9 @@ class CsvReaderTest : public ::testing::Test {
|
|||||||
namespace {
|
namespace {
|
||||||
class FileWriter {
|
class FileWriter {
|
||||||
public:
|
public:
|
||||||
explicit FileWriter(const std::filesystem::path path) { stream_.open(path); }
|
explicit FileWriter(const std::filesystem::path path, std::string newline = "\n") : newline_{std::move(newline)} {
|
||||||
|
stream_.open(path);
|
||||||
|
}
|
||||||
|
|
||||||
FileWriter(const FileWriter &) = delete;
|
FileWriter(const FileWriter &) = delete;
|
||||||
FileWriter &operator=(const FileWriter &) = delete;
|
FileWriter &operator=(const FileWriter &) = delete;
|
||||||
@ -45,7 +47,7 @@ class FileWriter {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
stream_ << line << std::endl;
|
stream_ << line << newline_;
|
||||||
|
|
||||||
// including the newline character
|
// including the newline character
|
||||||
return line.size() + 1;
|
return line.size() + 1;
|
||||||
@ -53,6 +55,7 @@ class FileWriter {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::ofstream stream_;
|
std::ofstream stream_;
|
||||||
|
std::string newline_;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string CreateRow(const std::vector<std::string> &columns, const std::string_view delim) {
|
std::string CreateRow(const std::vector<std::string> &columns, const std::string_view delim) {
|
||||||
@ -69,10 +72,10 @@ auto ToPmrColumns(const std::vector<std::string> &columns) {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, CommaDelimiter) {
|
TEST_P(CsvReaderTest, CommaDelimiter) {
|
||||||
// create a file with a single valid row;
|
// create a file with a single valid row;
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
const std::vector<std::string> columns{"A", "B", "C"};
|
const std::vector<std::string> columns{"A", "B", "C"};
|
||||||
writer.WriteLine(CreateRow(columns, ","));
|
writer.WriteLine(CreateRow(columns, ","));
|
||||||
@ -93,9 +96,9 @@ TEST_F(CsvReaderTest, CommaDelimiter) {
|
|||||||
ASSERT_EQ(*parsed_row, ToPmrColumns(columns));
|
ASSERT_EQ(*parsed_row, ToPmrColumns(columns));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, SemicolonDelimiter) {
|
TEST_P(CsvReaderTest, SemicolonDelimiter) {
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -116,12 +119,12 @@ TEST_F(CsvReaderTest, SemicolonDelimiter) {
|
|||||||
ASSERT_EQ(*parsed_row, ToPmrColumns(columns));
|
ASSERT_EQ(*parsed_row, ToPmrColumns(columns));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, SkipBad) {
|
TEST_P(CsvReaderTest, SkipBad) {
|
||||||
// create a file with invalid first two rows (containing a string with a
|
// create a file with invalid first two rows (containing a string with a
|
||||||
// missing closing quote);
|
// missing closing quote);
|
||||||
// the last row is valid;
|
// the last row is valid;
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -161,11 +164,11 @@ TEST_F(CsvReaderTest, SkipBad) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, AllRowsValid) {
|
TEST_P(CsvReaderTest, AllRowsValid) {
|
||||||
// create a file with all rows valid;
|
// create a file with all rows valid;
|
||||||
// parser should return 'std::nullopt'
|
// parser should return 'std::nullopt'
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -190,11 +193,11 @@ TEST_F(CsvReaderTest, AllRowsValid) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, SkipAllRows) {
|
TEST_P(CsvReaderTest, SkipAllRows) {
|
||||||
// create a file with all rows invalid (containing a string with a missing closing quote);
|
// create a file with all rows invalid (containing a string with a missing closing quote);
|
||||||
// parser should return 'std::nullopt'
|
// parser should return 'std::nullopt'
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -217,9 +220,9 @@ TEST_F(CsvReaderTest, SkipAllRows) {
|
|||||||
ASSERT_EQ(parsed_row, std::nullopt);
|
ASSERT_EQ(parsed_row, std::nullopt);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, WithHeader) {
|
TEST_P(CsvReaderTest, WithHeader) {
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -249,12 +252,12 @@ TEST_F(CsvReaderTest, WithHeader) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, MultilineQuotedString) {
|
TEST_P(CsvReaderTest, MultilineQuotedString) {
|
||||||
// create a file with first row valid and the second row containing a quoted
|
// create a file with first row valid and the second row containing a quoted
|
||||||
// string spanning two lines;
|
// string spanning two lines;
|
||||||
// parser should return two valid rows
|
// parser should return two valid rows
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -284,11 +287,11 @@ TEST_F(CsvReaderTest, MultilineQuotedString) {
|
|||||||
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
|
ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CsvReaderTest, EmptyColumns) {
|
TEST_P(CsvReaderTest, EmptyColumns) {
|
||||||
// create a file with all rows valid;
|
// create a file with all rows valid;
|
||||||
// parser should return 'std::nullopt'
|
// parser should return 'std::nullopt'
|
||||||
const auto filepath = csv_directory / "bla.csv";
|
const auto filepath = csv_directory / "bla.csv";
|
||||||
auto writer = FileWriter(filepath);
|
auto writer = FileWriter(filepath, GetParam());
|
||||||
|
|
||||||
utils::MemoryResource *mem(utils::NewDeleteResource());
|
utils::MemoryResource *mem(utils::NewDeleteResource());
|
||||||
|
|
||||||
@ -315,3 +318,5 @@ TEST_F(CsvReaderTest, EmptyColumns) {
|
|||||||
ASSERT_EQ(*parsed_row, pmr_expected_row);
|
ASSERT_EQ(*parsed_row, pmr_expected_row);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(NewlineParameterizedTest, CsvReaderTest, ::testing::Values("\n", "\r\n"));
|
||||||
|
Loading…
Reference in New Issue
Block a user