Implement new CSV parser

Summary:
The new CSV parser in `mg_import_csv` behaves the same when importing a CSV
file as the standard Python CSV importer. Tests are added for all CSV field
edge-cases.

Reviewers: teon.banek, ipaljak

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2702
This commit is contained in:
Matej Ferencevic 2020-03-06 10:49:09 +01:00
parent 1d30a2e5cb
commit 8363991575
66 changed files with 939 additions and 43 deletions

View File

@ -16,12 +16,16 @@
#include "utils/timer.hpp"
#include "version.hpp"
bool ValidateNotNewline(const char *flagname, const std::string &value) {
auto has_no_newline = value.find('\n') == std::string::npos;
if (!has_no_newline) {
printf("The argument '%s' cannot contain newline character\n", flagname);
bool ValidateControlCharacter(const char *flagname, const std::string &value) {
if (value.empty()) {
printf("The argument '%s' cannot be empty\n", flagname);
return false;
}
return has_no_newline;
if (value.find('\n') != std::string::npos) {
printf("The argument '%s' cannot contain a newline character\n", flagname);
return false;
}
return true;
}
bool ValidateNoWhitespace(const char *flagname, const std::string &value) {
@ -53,10 +57,12 @@ DEFINE_bool(storage_properties_on_edges, false,
// CSV import flags.
DEFINE_string(array_delimiter, ";",
"Delimiter between elements of array values.");
DEFINE_validator(array_delimiter, &ValidateControlCharacter);
DEFINE_string(delimiter, ",", "Delimiter between each field in the CSV.");
DEFINE_validator(delimiter, &ValidateControlCharacter);
DEFINE_string(quote, "\"",
"Quotation character for data in the CSV. Cannot contain '\n'");
DEFINE_validator(quote, &ValidateNotNewline);
DEFINE_validator(quote, &ValidateControlCharacter);
DEFINE_bool(skip_duplicate_nodes, false,
"Set to true to skip duplicate nodes instead of raising an error.");
// Arguments `--nodes` and `--relationships` can be input multiple times and are
@ -139,60 +145,168 @@ class LoadException : public utils::BasicException {
using utils::BasicException::BasicException;
};
enum class CsvParserState {
INITIAL_FIELD,
NEXT_FIELD,
QUOTING,
NOT_QUOTING,
EXPECT_DELIMITER,
};
bool SubstringStartsWith(const std::string_view &str, size_t pos,
const std::string_view &what) {
return utils::StartsWith(utils::Substr(str, pos), what);
}
/// This function reads a row from a CSV stream.
///
/// Each CSV field must be divided using the `delimiter` and each CSV field can
/// either be quoted or unquoted. When the field is quoted, the first and last
/// character in the field *must* be the quote character. If the field isn't
/// quoted, and a quote character appears in it, it is treated as a regular
/// character. If a quote character appears inside a quoted string then the
/// quote character must be doubled in order to escape it. Line feeds and
/// carriage returns are ignored in the CSV file, also, the file can't contain a
/// NULL character.
///
/// The function uses the same logic as the standard Python CSV parser. The data
/// is parsed in the same way as the following snippet:
/// ```
/// import csv
/// for row in csv.reader(stream, strict=True):
/// # process `row`
/// ```
///
/// Python uses 'excel' as the default dialect when parsing CSV files and the
/// default settings for the CSV parser are:
/// - delimiter: ','
/// - doublequote: True
/// - escapechar: None
/// - lineterminator: '\r\n'
/// - quotechar: '"'
/// - skipinitialspace: False
///
/// The above snippet can be expanded to:
/// ```
/// import csv
/// for row in csv.reader(stream, delimiter=',', doublequote=True,
/// escapechar=None, lineterminator='\r\n',
/// quotechar='"', skipinitialspace=False,
/// strict=True):
/// # process `row`
/// ```
///
/// For more information about the meaning of the above values, see:
/// https://docs.python.org/3/library/csv.html#csv.Dialect
///
/// @throw LoadException
std::pair<std::vector<std::string>, uint64_t> ReadRow(std::istream &stream) {
std::vector<std::string> row;
bool quoting = false;
std::vector<char> column;
std::string line;
std::string column;
uint64_t lines_count = 0;
auto check_quote = [&line](int curr_idx) {
return curr_idx + FLAGS_quote.size() <= line.size() &&
line.compare(curr_idx, FLAGS_quote.size(), FLAGS_quote) == 0;
};
auto state = CsvParserState::INITIAL_FIELD;
do {
std::string line;
if (!std::getline(stream, line)) {
if (quoting) {
throw LoadException(
"There is no more data left to load while inside a quoted string. "
"Did you forget to close the quote?");
} else {
// The whole row was processed.
break;
}
// The whole file was processed.
break;
}
++lines_count;
for (size_t i = 0; i < line.size(); ++i) {
auto c = line[i];
if (quoting) {
if (check_quote(i)) {
quoting = false;
i += FLAGS_quote.size() - 1;
} else {
column.push_back(c);
// Line feeds and carriage returns are ignored in CSVs.
if (c == '\n' || c == '\r') continue;
// Null bytes aren't allowed in CSVs.
if (c == '\0') throw LoadException("Line contains NULL byte");
switch (state) {
case CsvParserState::INITIAL_FIELD:
case CsvParserState::NEXT_FIELD: {
if (SubstringStartsWith(line, i, FLAGS_quote)) {
// The current field is a quoted field.
state = CsvParserState::QUOTING;
i += FLAGS_quote.size() - 1;
} else if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
// The current field has an empty value.
row.emplace_back("");
state = CsvParserState::NEXT_FIELD;
i += FLAGS_delimiter.size() - 1;
} else {
// The current field is a regular field.
column.push_back(c);
state = CsvParserState::NOT_QUOTING;
}
break;
}
} else if (check_quote(i)) {
// Hopefully, escaping isn't needed
quoting = true;
i += FLAGS_quote.size() - 1;
} else if (c == FLAGS_delimiter.front()) {
row.emplace_back(column.begin(), column.end());
column.clear();
// Handle special case when delimiter is the last
// character in line. This means that another
// empty column needs to be added.
if (i == line.size() - 1) {
row.emplace_back("");
case CsvParserState::QUOTING: {
auto quote_now = SubstringStartsWith(line, i, FLAGS_quote);
auto quote_next =
SubstringStartsWith(line, i + FLAGS_quote.size(), FLAGS_quote);
if (quote_now && quote_next) {
// This is an escaped quote character.
column += FLAGS_quote;
i += FLAGS_quote.size() * 2 - 1;
} else if (quote_now && !quote_next) {
// This is the end of the quoted field.
row.emplace_back(std::move(column));
state = CsvParserState::EXPECT_DELIMITER;
i += FLAGS_quote.size() - 1;
} else {
column.push_back(c);
}
break;
}
case CsvParserState::NOT_QUOTING: {
if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
row.emplace_back(std::move(column));
state = CsvParserState::NEXT_FIELD;
i += FLAGS_delimiter.size() - 1;
} else {
column.push_back(c);
}
break;
}
case CsvParserState::EXPECT_DELIMITER: {
if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
state = CsvParserState::NEXT_FIELD;
i += FLAGS_delimiter.size() - 1;
} else {
throw LoadException("Expected '{}' after '{}', but got '{}'",
FLAGS_delimiter, FLAGS_quote, c);
}
break;
}
} else {
column.push_back(c);
}
}
} while (quoting);
} while (state == CsvParserState::QUOTING);
switch (state) {
case CsvParserState::INITIAL_FIELD: {
break;
}
case CsvParserState::NEXT_FIELD: {
row.emplace_back(std::move(column));
break;
}
case CsvParserState::QUOTING: {
throw LoadException(
"There is no more data left to load while inside a quoted string. "
"Did you forget to close the quote?");
break;
}
case CsvParserState::NOT_QUOTING: {
row.emplace_back(std::move(column));
break;
}
case CsvParserState::EXPECT_DELIMITER: {
break;
}
}
if (!column.empty()) row.emplace_back(column.begin(), column.end());
return {std::move(row), lines_count};
}

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:ID,value
0,
1,""
2,hello
1 :ID value
2 0
3 1
4 2 hello

View File

@ -0,0 +1,8 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,Öworld
fghÖ,asd
1,,string
2,hello,Ö
will this work?Ö
1 :ID,value,test
2 0,Öworld
3 fghÖ,asd
4 1,,string
5 2,hello,Ö
6 will this work?Ö

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:ID,value,test
0,Öworld
fghÖ,asd
1,,string
2,Ö
will this work?Ö ,hello
1 :ID,value,test
2 0,Öworld
3 fghÖ,asd
4 1,,string
5 2,Ö
6 will this work?Ö ,hello

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "ÖworldÖÖÖ fÖgh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl this work?Ö", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,ÖÖÖworld
ÖÖÖÖÖÖ
fÖÖghÖ,asd
1,,string
2,hello,Ö
wilÖÖl this work?ÖÖÖ
1 :ID,value,test
2 0,ÖÖÖworld
3 ÖÖÖÖÖÖ
4 fÖÖghÖ,asd
5 1,,string
6 2,hello,Ö
7 wilÖÖl this work?ÖÖÖ

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "Öworld,Ö,ÖÖ, fÖ,gh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl t,his work?Ö", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,ÖÖÖworld,
ÖÖ,ÖÖÖÖ,
fÖÖ,ghÖ,asd
1,,string
2,hello,Ö
wilÖÖl t,his work?ÖÖÖ
Can't render this file because it has a wrong number of fields in line 22.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:ID,value,test
0,Öworld
fghÖ,asd
1,,string
2,hello,Ö
will this work?Öa
1 :ID,value,test
2 0,Öworld
3 fghÖ,asd
4 1,,string
5 2,hello,Ö
6 will this work?Öa

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "woÖrld"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl this work?ÖÖ", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:ID,value,test
0,woÖrld,asd
1,,string
2,hello,wilÖl this work?ÖÖ
1 :ID value test
2 0 woÖrld asd
3 1 string
4 2 hello wilÖl this work?ÖÖ

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
expected: expected.cypher

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:IDÖvalue
1Ö""
2Öhello
Can't render this file because it contains an unexpected character in line 3 and column 4.

View File

@ -0,0 +1,9 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "my", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:IDÖvalueÖtest
0ÖworldÖasd
1ÖÖstring
2ÖhelloÖmy
1 :IDÖvalueÖtest
2 0ÖworldÖasd
3 1ÖÖstring
4 2ÖhelloÖmy

View File

@ -0,0 +1,9 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:IDÖvalueÖtest
0Ö"world
fgh"Öasd
1ÖÖstring
2ÖhelloÖ"
will this work?"
Can't render this file because it contains an unexpected character in line 2 and column 4.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:IDÖvalueÖtest
0Ö"world
fgh"Öasd
1ÖÖstring
2Ö"
will this work?" Öhello
Can't render this file because it contains an unexpected character in line 2 and column 4.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "my", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:ID,value,test
0,world,asd
1,,string
2,hello,my
1 :ID value test
2 0 world asd
3 1 string
4 2 hello my

View File

@ -0,0 +1,8 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world\"\"\" f\"gh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:IDÖvalueÖtest
0Ö"""world
""""""
f""gh"Öasd
1ÖÖstring
2ÖhelloÖ"
wil""l this work?"""
Can't render this file because it contains an unexpected character in line 2 and column 4.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"worldÖ\"Ö\"\"Ö f\"Ögh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l tÖhis work?\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:IDÖvalueÖtest
0Ö"""worldÖ
""Ö""""Ö
f""Ögh"Öasd
1ÖÖstring
2ÖhelloÖ"
wil""l tÖhis work?"""
Can't render this file because it contains an unexpected character in line 2 and column 4.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:IDÖvalueÖtest
0Ö"world
fgh"Öasd
1ÖÖstring
2ÖhelloÖ"
will this work?"a
Can't render this file because it contains an unexpected character in line 2 and column 4.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "wo\"rld"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:IDÖvalueÖtest
0Öwo"rldÖasd
1ÖÖstring
2ÖhelloÖwil"l this work?""
Can't render this file because it contains an unexpected character in line 2 and column 6.

View File

@ -0,0 +1,15 @@
- name: good_configuration
nodes: "nodes.csv"
delimiter: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
delimiter: "Ö"
quote: "-"
expected: expected.cypher

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,"world
fgh",asd
1,,string
2,hello,"
will this work?"
1 :ID value test
2 0 world fgh asd
3 1 string
4 2 hello will this work?

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:ID,value,test
0,"world
fgh",asd
1,,string
2,"
will this work?" ,hello
Can't render this file because it contains an unexpected character in line 22 and column 16.

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world\"\"\" f\"gh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,"""world
""""""
f""gh",asd
1,,string
2,hello,"
wil""l this work?"""
1 :ID value test
2 0 "world """ f"gh asd
3 1 string
4 2 hello wil"l this work?"

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world,\",\"\", f\",gh"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l t,his work?\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,22 @@
:ID,value,test
0,"""world,
"","""",
f"",gh",asd
1,,string
2,hello,"
wil""l t,his work?"""
1 :ID value test
2 0 "world, ","", f",gh asd
3 1 string
4 2 hello wil"l t,his work?"

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,22 @@
:ID,value,test
0,"world
fgh",asd
1,,string
2,hello,"
will this work?"a
Can't render this file because it contains an unexpected character in line 22 and column 16.

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
import_should_fail: True
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
import_should_fail: True

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "wo\"rld"});
CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"\"", id: "2", value: "hello"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:ID,value,test
0,wo"rld,asd
1,,string
2,hello,wil"l this work?""
Can't render this file because it contains an unexpected character in line 2 and column 5.

View File

@ -0,0 +1,13 @@
- name: good_configuration
nodes: "nodes.csv"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
delimiter: "-"
import_should_fail: True
- name: wrong_quote
nodes: "nodes.csv"
quote: "-"
expected: expected.cypher

View File

@ -0,0 +1,6 @@
CREATE INDEX ON :__mg_vertex__(__mg_id__);
CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
DROP INDEX ON :__mg_vertex__(__mg_id__);
MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;

View File

@ -0,0 +1,4 @@
:ID,value
0,
1,ÖÖ
2,hello
1 :ID value
2 0
3 1 ÖÖ
4 2 hello

View File

@ -0,0 +1,10 @@
- name: good_configuration
nodes: "nodes.csv"
quote: "Ö"
expected: expected.cypher
- name: wrong_delimiter
nodes: "nodes.csv"
quote: "Ö"
delimiter: "-"
import_should_fail: True

View File

@ -0,0 +1,2 @@
value
1
1 value
2 1

View File

@ -0,0 +1,14 @@
- name: empty_array_delimiter
nodes: "nodes.csv"
array_delimiter: ""
import_should_fail: True
- name: empty_delimiter
nodes: "nodes.csv"
delimiter: ""
import_should_fail: True
- name: empty_quote
nodes: "nodes.csv"
quote: ""
import_should_fail: True