Implement new CSV parser

Summary: The new CSV parser in `mg_import_csv` behaves the same when importing a CSV file as the standard Python CSV importer. Tests are added for all CSV field edge-cases. Reviewers: teon.banek, ipaljak Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2702
2020-03-06 10:49:09 +01:00 · 2020-03-06 10:49:09 +01:00 · 8363991575
commit 8363991575
parent 1d30a2e5cb
66 changed files with 939 additions and 43 deletions
--- a/src/mg_import_csv.cpp
+++ b/src/mg_import_csv.cpp
@ -16,12 +16,16 @@
 #include "utils/timer.hpp"
 #include "version.hpp"

-bool ValidateNotNewline(const char *flagname, const std::string &value) {
-  auto has_no_newline = value.find('\n') == std::string::npos;
-  if (!has_no_newline) {
-    printf("The argument '%s' cannot contain newline character\n", flagname);
+bool ValidateControlCharacter(const char *flagname, const std::string &value) {
+  if (value.empty()) {
+    printf("The argument '%s' cannot be empty\n", flagname);
+    return false;
  }
-  return has_no_newline;
+  if (value.find('\n') != std::string::npos) {
+    printf("The argument '%s' cannot contain a newline character\n", flagname);
+    return false;
+  }
+  return true;
 }

 bool ValidateNoWhitespace(const char *flagname, const std::string &value) {
@ -53,10 +57,12 @@ DEFINE_bool(storage_properties_on_edges, false,
 // CSV import flags.
 DEFINE_string(array_delimiter, ";",
              "Delimiter between elements of array values.");
+DEFINE_validator(array_delimiter, &ValidateControlCharacter);
 DEFINE_string(delimiter, ",", "Delimiter between each field in the CSV.");
+DEFINE_validator(delimiter, &ValidateControlCharacter);
 DEFINE_string(quote, "\"",
              "Quotation character for data in the CSV. Cannot contain '\n'");
-DEFINE_validator(quote, &ValidateNotNewline);
+DEFINE_validator(quote, &ValidateControlCharacter);
 DEFINE_bool(skip_duplicate_nodes, false,
            "Set to true to skip duplicate nodes instead of raising an error.");
 // Arguments `--nodes` and `--relationships` can be input multiple times and are
@ -139,60 +145,168 @@ class LoadException : public utils::BasicException {
  using utils::BasicException::BasicException;
 };

+enum class CsvParserState {
+  INITIAL_FIELD,
+  NEXT_FIELD,
+  QUOTING,
+  NOT_QUOTING,
+  EXPECT_DELIMITER,
+};
+
+bool SubstringStartsWith(const std::string_view &str, size_t pos,
+                         const std::string_view &what) {
+  return utils::StartsWith(utils::Substr(str, pos), what);
+}
+
+/// This function reads a row from a CSV stream.
+///
+/// Each CSV field must be divided using the `delimiter` and each CSV field can
+/// either be quoted or unquoted. When the field is quoted, the first and last
+/// character in the field *must* be the quote character. If the field isn't
+/// quoted, and a quote character appears in it, it is treated as a regular
+/// character. If a quote character appears inside a quoted string then the
+/// quote character must be doubled in order to escape it. Line feeds and
+/// carriage returns are ignored in the CSV file, also, the file can't contain a
+/// NULL character.
+///
+/// The function uses the same logic as the standard Python CSV parser. The data
+/// is parsed in the same way as the following snippet:
+/// ```
+/// import csv
+/// for row in csv.reader(stream, strict=True):
+///     # process `row`
+/// ```
+///
+/// Python uses 'excel' as the default dialect when parsing CSV files and the
+/// default settings for the CSV parser are:
+///  - delimiter: ','
+///  - doublequote: True
+///  - escapechar: None
+///  - lineterminator: '\r\n'
+///  - quotechar: '"'
+///  - skipinitialspace: False
+///
+/// The above snippet can be expanded to:
+/// ```
+/// import csv
+/// for row in csv.reader(stream, delimiter=',', doublequote=True,
+///                       escapechar=None, lineterminator='\r\n',
+///                       quotechar='"', skipinitialspace=False,
+///                       strict=True):
+///     # process `row`
+/// ```
+///
+/// For more information about the meaning of the above values, see:
+/// https://docs.python.org/3/library/csv.html#csv.Dialect
+///
 /// @throw LoadException
 std::pair<std::vector<std::string>, uint64_t> ReadRow(std::istream &stream) {
  std::vector<std::string> row;
-  bool quoting = false;
-  std::vector<char> column;
-  std::string line;
+  std::string column;
  uint64_t lines_count = 0;

-  auto check_quote = [&line](int curr_idx) {
-    return curr_idx + FLAGS_quote.size() <= line.size() &&
-           line.compare(curr_idx, FLAGS_quote.size(), FLAGS_quote) == 0;
-  };
+  auto state = CsvParserState::INITIAL_FIELD;

  do {
+    std::string line;
    if (!std::getline(stream, line)) {
-      if (quoting) {
-        throw LoadException(
-            "There is no more data left to load while inside a quoted string. "
-            "Did you forget to close the quote?");
-      } else {
-        // The whole row was processed.
-        break;
-      }
+      // The whole file was processed.
+      break;
    }
    ++lines_count;
+
    for (size_t i = 0; i < line.size(); ++i) {
      auto c = line[i];
-      if (quoting) {
-        if (check_quote(i)) {
-          quoting = false;
-          i += FLAGS_quote.size() - 1;
-        } else {
-          column.push_back(c);
+
+      // Line feeds and carriage returns are ignored in CSVs.
+      if (c == '\n' || c == '\r') continue;
+      // Null bytes aren't allowed in CSVs.
+      if (c == '\0') throw LoadException("Line contains NULL byte");
+
+      switch (state) {
+        case CsvParserState::INITIAL_FIELD:
+        case CsvParserState::NEXT_FIELD: {
+          if (SubstringStartsWith(line, i, FLAGS_quote)) {
+            // The current field is a quoted field.
+            state = CsvParserState::QUOTING;
+            i += FLAGS_quote.size() - 1;
+          } else if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
+            // The current field has an empty value.
+            row.emplace_back("");
+            state = CsvParserState::NEXT_FIELD;
+            i += FLAGS_delimiter.size() - 1;
+          } else {
+            // The current field is a regular field.
+            column.push_back(c);
+            state = CsvParserState::NOT_QUOTING;
+          }
+          break;
        }
-      } else if (check_quote(i)) {
-        // Hopefully, escaping isn't needed
-        quoting = true;
-        i += FLAGS_quote.size() - 1;
-      } else if (c == FLAGS_delimiter.front()) {
-        row.emplace_back(column.begin(), column.end());
-        column.clear();
-        // Handle special case when delimiter is the last
-        // character in line. This means that another
-        // empty column needs to be added.
-        if (i == line.size() - 1) {
-          row.emplace_back("");
+        case CsvParserState::QUOTING: {
+          auto quote_now = SubstringStartsWith(line, i, FLAGS_quote);
+          auto quote_next =
+              SubstringStartsWith(line, i + FLAGS_quote.size(), FLAGS_quote);
+          if (quote_now && quote_next) {
+            // This is an escaped quote character.
+            column += FLAGS_quote;
+            i += FLAGS_quote.size() * 2 - 1;
+          } else if (quote_now && !quote_next) {
+            // This is the end of the quoted field.
+            row.emplace_back(std::move(column));
+            state = CsvParserState::EXPECT_DELIMITER;
+            i += FLAGS_quote.size() - 1;
+          } else {
+            column.push_back(c);
+          }
+          break;
+        }
+        case CsvParserState::NOT_QUOTING: {
+          if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
+            row.emplace_back(std::move(column));
+            state = CsvParserState::NEXT_FIELD;
+            i += FLAGS_delimiter.size() - 1;
+          } else {
+            column.push_back(c);
+          }
+          break;
+        }
+        case CsvParserState::EXPECT_DELIMITER: {
+          if (SubstringStartsWith(line, i, FLAGS_delimiter)) {
+            state = CsvParserState::NEXT_FIELD;
+            i += FLAGS_delimiter.size() - 1;
+          } else {
+            throw LoadException("Expected '{}' after '{}', but got '{}'",
+                                FLAGS_delimiter, FLAGS_quote, c);
+          }
+          break;
        }
-      } else {
-        column.push_back(c);
      }
    }
-  } while (quoting);
+  } while (state == CsvParserState::QUOTING);
+
+  switch (state) {
+    case CsvParserState::INITIAL_FIELD: {
+      break;
+    }
+    case CsvParserState::NEXT_FIELD: {
+      row.emplace_back(std::move(column));
+      break;
+    }
+    case CsvParserState::QUOTING: {
+      throw LoadException(
+          "There is no more data left to load while inside a quoted string. "
+          "Did you forget to close the quote?");
+      break;
+    }
+    case CsvParserState::NOT_QUOTING: {
+      row.emplace_back(std::move(column));
+      break;
+    }
+    case CsvParserState::EXPECT_DELIMITER: {
+      break;
+    }
+  }

-  if (!column.empty()) row.emplace_back(column.begin(), column.end());
  return {std::move(row), lines_count};
 }

--- a/tests/integration/mg_import_csv/tests/csv_parser_test1/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test1/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test1/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test1/nodes.csv
@ -0,0 +1,4 @@
+:ID,value
+0,
+1,""
+2,hello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test1/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test1/test.yaml
@ -0,0 +1,8 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test10/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test10/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test10/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test10/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,Öworld
+
+
+
+
+
+
+
+
+ fghÖ,asd
+1,,string
+2,hello,Ö
+
+
+
+
+
+
+
+
+will this work?Ö
--- a/tests/integration/mg_import_csv/tests/csv_parser_test10/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test10/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test11/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test11/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,Öworld
+
+
+
+
+
+
+
+
+ fghÖ,asd
+1,,string
+2,Ö
+
+
+
+
+
+
+
+
+will this work?Ö ,hello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test11/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test11/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test12/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test12/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "ÖworldÖÖÖ fÖgh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl this work?Ö", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test12/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test12/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,ÖÖÖworld
+
+
+
+ÖÖÖÖÖÖ
+
+
+
+
+ fÖÖghÖ,asd
+1,,string
+2,hello,Ö
+
+
+
+
+
+
+
+
+wilÖÖl this work?ÖÖÖ
--- a/tests/integration/mg_import_csv/tests/csv_parser_test12/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test12/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test13/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test13/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "Öworld,Ö,ÖÖ, fÖ,gh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl t,his work?Ö", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test13/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test13/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,ÖÖÖworld,
+
+
+
+ÖÖ,ÖÖÖÖ,
+
+
+
+
+ fÖÖ,ghÖ,asd
+1,,string
+2,hello,Ö
+
+
+
+
+
+
+
+
+wilÖÖl t,his work?ÖÖÖ
--- a/tests/integration/mg_import_csv/tests/csv_parser_test13/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test13/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test14/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test14/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,Öworld
+
+
+
+
+
+
+
+
+ fghÖ,asd
+1,,string
+2,hello,Ö
+
+
+
+
+
+
+
+
+will this work?Öa
--- a/tests/integration/mg_import_csv/tests/csv_parser_test14/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test14/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test15/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test15/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "woÖrld"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wilÖl this work?ÖÖ", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test15/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test15/nodes.csv
@ -0,0 +1,4 @@
+:ID,value,test
+0,woÖrld,asd
+1,,string
+2,hello,wilÖl this work?ÖÖ
--- a/tests/integration/mg_import_csv/tests/csv_parser_test15/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test15/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  expected: expected.cypher
--- a/tests/integration/mg_import_csv/tests/csv_parser_test16/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test16/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test16/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test16/nodes.csv
@ -0,0 +1,4 @@
+:IDÖvalue
+0Ö
+1Ö""
+2Öhello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test16/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test16/test.yaml
@ -0,0 +1,9 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test17/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test17/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "my", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test17/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test17/nodes.csv
@ -0,0 +1,4 @@
+:IDÖvalueÖtest
+0ÖworldÖasd
+1ÖÖstring
+2ÖhelloÖmy
--- a/tests/integration/mg_import_csv/tests/csv_parser_test17/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test17/test.yaml
@ -0,0 +1,9 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test18/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test18/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test18/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test18/nodes.csv
@ -0,0 +1,22 @@
+:IDÖvalueÖtest
+0Ö"world
+
+
+
+
+
+
+
+
+ fgh"Öasd
+1ÖÖstring
+2ÖhelloÖ"
+
+
+
+
+
+
+
+
+will this work?"
--- a/tests/integration/mg_import_csv/tests/csv_parser_test18/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test18/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test19/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test19/nodes.csv
@ -0,0 +1,22 @@
+:IDÖvalueÖtest
+0Ö"world
+
+
+
+
+
+
+
+
+ fgh"Öasd
+1ÖÖstring
+2Ö"
+
+
+
+
+
+
+
+
+will this work?" Öhello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test19/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test19/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test2/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test2/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "my", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test2/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test2/nodes.csv
@ -0,0 +1,4 @@
+:ID,value,test
+0,world,asd
+1,,string
+2,hello,my
--- a/tests/integration/mg_import_csv/tests/csv_parser_test2/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test2/test.yaml
@ -0,0 +1,8 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test20/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test20/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world\"\"\" f\"gh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test20/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test20/nodes.csv
@ -0,0 +1,22 @@
+:IDÖvalueÖtest
+0Ö"""world
+
+
+
+""""""
+
+
+
+
+ f""gh"Öasd
+1ÖÖstring
+2ÖhelloÖ"
+
+
+
+
+
+
+
+
+wil""l this work?"""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test20/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test20/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test21/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test21/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"worldÖ\"Ö\"\"Ö f\"Ögh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l tÖhis work?\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test21/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test21/nodes.csv
@ -0,0 +1,22 @@
+:IDÖvalueÖtest
+0Ö"""worldÖ
+
+
+
+""Ö""""Ö
+
+
+
+
+ f""Ögh"Öasd
+1ÖÖstring
+2ÖhelloÖ"
+
+
+
+
+
+
+
+
+wil""l tÖhis work?"""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test21/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test21/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test22/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test22/nodes.csv
@ -0,0 +1,22 @@
+:IDÖvalueÖtest
+0Ö"world
+
+
+
+
+
+
+
+
+ fgh"Öasd
+1ÖÖstring
+2ÖhelloÖ"
+
+
+
+
+
+
+
+
+will this work?"a
--- a/tests/integration/mg_import_csv/tests/csv_parser_test22/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test22/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test23/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test23/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "wo\"rld"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test23/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test23/nodes.csv
@ -0,0 +1,4 @@
+:IDÖvalueÖtest
+0Öwo"rldÖasd
+1ÖÖstring
+2ÖhelloÖwil"l this work?""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test23/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test23/test.yaml
@ -0,0 +1,15 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  delimiter: "Ö"
+  quote: "-"
+  expected: expected.cypher
--- a/tests/integration/mg_import_csv/tests/csv_parser_test3/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test3/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "world fgh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "will this work?", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test3/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test3/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,"world
+
+
+
+
+
+
+
+
+ fgh",asd
+1,,string
+2,hello,"
+
+
+
+
+
+
+
+
+will this work?"
--- a/tests/integration/mg_import_csv/tests/csv_parser_test3/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test3/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test4/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test4/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,"world
+
+
+
+
+
+
+
+
+ fgh",asd
+1,,string
+2,"
+
+
+
+
+
+
+
+
+will this work?" ,hello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test4/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test4/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test5/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test5/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world\"\"\" f\"gh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test5/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test5/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,"""world
+
+
+
+""""""
+
+
+
+
+ f""gh",asd
+1,,string
+2,hello,"
+
+
+
+
+
+
+
+
+wil""l this work?"""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test5/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test5/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test6/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test6/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "\"world,\",\"\", f\",gh"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l t,his work?\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test6/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test6/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,"""world,
+
+
+
+"","""",
+
+
+
+
+ f"",gh",asd
+1,,string
+2,hello,"
+
+
+
+
+
+
+
+
+wil""l t,his work?"""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test6/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test6/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test7/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test7/nodes.csv
@ -0,0 +1,22 @@
+:ID,value,test
+0,"world
+
+
+
+
+
+
+
+
+ fgh",asd
+1,,string
+2,hello,"
+
+
+
+
+
+
+
+
+will this work?"a
--- a/tests/integration/mg_import_csv/tests/csv_parser_test7/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test7/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  import_should_fail: True
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/csv_parser_test8/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test8/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, test: "asd", id: "0", value: "wo\"rld"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, test: "string", id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, test: "wil\"l this work?\"\"", id: "2", value: "hello"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test8/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test8/nodes.csv
@ -0,0 +1,4 @@
+:ID,value,test
+0,wo"rld,asd
+1,,string
+2,hello,wil"l this work?""
--- a/tests/integration/mg_import_csv/tests/csv_parser_test8/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test8/test.yaml
@ -0,0 +1,13 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  delimiter: "-"
+  import_should_fail: True
+
+- name: wrong_quote
+  nodes: "nodes.csv"
+  quote: "-"
+  expected: expected.cypher
--- a/tests/integration/mg_import_csv/tests/csv_parser_test9/expected.cypher
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test9/expected.cypher
@ -0,0 +1,6 @@
+CREATE INDEX ON :__mg_vertex__(__mg_id__);
+CREATE (:__mg_vertex__ {__mg_id__: 0, id: "0"});
+CREATE (:__mg_vertex__ {__mg_id__: 1, id: "1"});
+CREATE (:__mg_vertex__ {__mg_id__: 2, value: "hello", id: "2"});
+DROP INDEX ON :__mg_vertex__(__mg_id__);
+MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__;
--- a/tests/integration/mg_import_csv/tests/csv_parser_test9/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test9/nodes.csv
@ -0,0 +1,4 @@
+:ID,value
+0,
+1,ÖÖ
+2,hello
--- a/tests/integration/mg_import_csv/tests/csv_parser_test9/test.yaml
+++ b/tests/integration/mg_import_csv/tests/csv_parser_test9/test.yaml
@ -0,0 +1,10 @@
+- name: good_configuration
+  nodes: "nodes.csv"
+  quote: "Ö"
+  expected: expected.cypher
+
+- name: wrong_delimiter
+  nodes: "nodes.csv"
+  quote: "Ö"
+  delimiter: "-"
+  import_should_fail: True
--- a/tests/integration/mg_import_csv/tests/empty_control_characters/nodes.csv
+++ b/tests/integration/mg_import_csv/tests/empty_control_characters/nodes.csv
@ -0,0 +1,2 @@
+value
+1
--- a/tests/integration/mg_import_csv/tests/empty_control_characters/test.yaml
+++ b/tests/integration/mg_import_csv/tests/empty_control_characters/test.yaml
@ -0,0 +1,14 @@
+- name: empty_array_delimiter
+  nodes: "nodes.csv"
+  array_delimiter: ""
+  import_should_fail: True
+
+- name: empty_delimiter
+  nodes: "nodes.csv"
+  delimiter: ""
+  import_should_fail: True
+
+- name: empty_quote
+  nodes: "nodes.csv"
+  quote: ""
+  import_should_fail: True