Fix MATCH + LOAD CSV to load CSV only once (#916)
* update profile query to use poolresource * Optimize update of indexes * Add ignore empty strings to load csv * Add operator changes to support handling of nulls * Store chunks in memory pools ordered * Use same max block per chunks number * Remove redundant return statement * add hacky cached solution * change map to set * remove memory * Add match load csv invalid behaviour commit * Accept input on LOAD CSV * Ommit changes not tied to the PR * Add tests for match + load csv * Add gqlalchemy installation for e2e tests * Modify setup script to update packages * Revert gqlalchemy to 1.3.3 * Revert gqlalchemy to 1.3.3 * Address PR review comments * Ommit semicolon --------- Co-authored-by: antoniofilipovic <filipovicantonio1998@gmail.com> Co-authored-by: János Benjamin Antal <benjamin.antal@memgraph.io>
This commit is contained in:
parent
df95775222
commit
63f8298033
@ -4648,7 +4648,7 @@ LoadCsv::LoadCsv(std::shared_ptr<LogicalOperator> input, Expression *file, bool
|
|||||||
MG_ASSERT(file_, "Something went wrong - '{}' member file_ shouldn't be a nullptr", __func__);
|
MG_ASSERT(file_, "Something went wrong - '{}' member file_ shouldn't be a nullptr", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoadCsv::Accept(HierarchicalLogicalOperatorVisitor &visitor) { return false; };
|
ACCEPT_WITH_INPUT(LoadCsv)
|
||||||
|
|
||||||
class LoadCsvCursor;
|
class LoadCsvCursor;
|
||||||
|
|
||||||
@ -4699,14 +4699,12 @@ TypedValue CsvRowToTypedMap(csv::Reader::Row &row, csv::Reader::Header header) {
|
|||||||
class LoadCsvCursor : public Cursor {
|
class LoadCsvCursor : public Cursor {
|
||||||
const LoadCsv *self_;
|
const LoadCsv *self_;
|
||||||
const UniqueCursorPtr input_cursor_;
|
const UniqueCursorPtr input_cursor_;
|
||||||
bool input_is_once_;
|
bool did_pull_;
|
||||||
std::optional<csv::Reader> reader_{};
|
std::optional<csv::Reader> reader_{};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
LoadCsvCursor(const LoadCsv *self, utils::MemoryResource *mem)
|
LoadCsvCursor(const LoadCsv *self, utils::MemoryResource *mem)
|
||||||
: self_(self), input_cursor_(self_->input_->MakeCursor(mem)) {
|
: self_(self), input_cursor_(self_->input_->MakeCursor(mem)), did_pull_{false} {}
|
||||||
input_is_once_ = dynamic_cast<Once *>(self_->input_.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Pull(Frame &frame, ExecutionContext &context) override {
|
bool Pull(Frame &frame, ExecutionContext &context) override {
|
||||||
SCOPED_PROFILE_OP("LoadCsv");
|
SCOPED_PROFILE_OP("LoadCsv");
|
||||||
@ -4722,14 +4720,14 @@ class LoadCsvCursor : public Cursor {
|
|||||||
reader_ = MakeReader(&context.evaluation_context);
|
reader_ = MakeReader(&context.evaluation_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool input_pulled = input_cursor_->Pull(frame, context);
|
if (input_cursor_->Pull(frame, context)) {
|
||||||
|
if (did_pull_) {
|
||||||
|
throw QueryRuntimeException(
|
||||||
|
"LOAD CSV can be executed only once, please check if the cardinality of the operator before LOAD CSV is 1");
|
||||||
|
}
|
||||||
|
did_pull_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
// If the input is Once, we have to keep going until we read all the rows,
|
|
||||||
// regardless of whether the pull on Once returned false.
|
|
||||||
// If we have e.g. MATCH(n) LOAD CSV ... AS x SET n.name = x.name, then we
|
|
||||||
// have to read at most cardinality(n) rows (but we can read less and stop
|
|
||||||
// pulling MATCH).
|
|
||||||
if (!input_is_once_ && !input_pulled) return false;
|
|
||||||
auto row = reader_->GetNextRow(context.evaluation_context.memory);
|
auto row = reader_->GetNextRow(context.evaluation_context.memory);
|
||||||
if (!row) {
|
if (!row) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -874,11 +874,27 @@ bool PlanToJsonVisitor::PreVisit(query::plan::CallProcedure &op) {
|
|||||||
bool PlanToJsonVisitor::PreVisit(query::plan::LoadCsv &op) {
|
bool PlanToJsonVisitor::PreVisit(query::plan::LoadCsv &op) {
|
||||||
json self;
|
json self;
|
||||||
self["name"] = "LoadCsv";
|
self["name"] = "LoadCsv";
|
||||||
self["file"] = ToJson(op.file_);
|
|
||||||
self["with_header"] = op.with_header_;
|
if (op.file_) {
|
||||||
self["ignore_bad"] = op.ignore_bad_;
|
self["file"] = ToJson(op.file_);
|
||||||
self["delimiter"] = ToJson(op.delimiter_);
|
}
|
||||||
self["quote"] = ToJson(op.quote_);
|
|
||||||
|
if (op.with_header_) {
|
||||||
|
self["with_header"] = op.with_header_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.ignore_bad_) {
|
||||||
|
self["ignore_bad"] = op.ignore_bad_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.delimiter_) {
|
||||||
|
self["delimiter"] = ToJson(op.delimiter_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.quote_) {
|
||||||
|
self["quote"] = ToJson(op.quote_);
|
||||||
|
}
|
||||||
|
|
||||||
self["row_variable"] = ToJson(op.row_var_);
|
self["row_variable"] = ToJson(op.row_var_);
|
||||||
|
|
||||||
op.input_->Accept(*this);
|
op.input_->Accept(*this);
|
||||||
|
@ -477,6 +477,16 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PreVisit(LoadCsv &op) override {
|
||||||
|
prev_ops_.push_back(&op);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PostVisit(LoadCsv & /*op*/) override {
|
||||||
|
prev_ops_.pop_back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
std::shared_ptr<LogicalOperator> new_root_;
|
std::shared_ptr<LogicalOperator> new_root_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
@ -55,6 +55,7 @@ add_subdirectory(python_query_modules_reloading)
|
|||||||
add_subdirectory(analyze_graph)
|
add_subdirectory(analyze_graph)
|
||||||
add_subdirectory(transaction_queue)
|
add_subdirectory(transaction_queue)
|
||||||
add_subdirectory(mock_api)
|
add_subdirectory(mock_api)
|
||||||
|
add_subdirectory(load_csv)
|
||||||
add_subdirectory(init_file_flags)
|
add_subdirectory(init_file_flags)
|
||||||
|
|
||||||
copy_e2e_python_files(pytest_runner pytest_runner.sh "")
|
copy_e2e_python_files(pytest_runner pytest_runner.sh "")
|
||||||
|
10
tests/e2e/load_csv/CMakeLists.txt
Normal file
10
tests/e2e/load_csv/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
function(copy_load_csv_e2e_python_files FILE_NAME)
|
||||||
|
copy_e2e_python_files(load_csv ${FILE_NAME})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
function(copy_load_csv_e2e_files FILE_NAME)
|
||||||
|
copy_e2e_python_files(load_csv ${FILE_NAME})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
copy_load_csv_e2e_python_files(load_csv.py)
|
||||||
|
copy_load_csv_e2e_files(simple.csv)
|
56
tests/e2e/load_csv/load_csv.py
Normal file
56
tests/e2e/load_csv/load_csv.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# Copyright 2022 Memgraph Ltd.
|
||||||
|
#
|
||||||
|
# Use of this software is governed by the Business Source License
|
||||||
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
# License, and you may not use this file except in compliance with the Business Source License.
|
||||||
|
#
|
||||||
|
# As of the Change Date specified in that file, in accordance with
|
||||||
|
# the Business Source License, use of this software will be governed
|
||||||
|
# by the Apache License, Version 2.0, included in the file
|
||||||
|
# licenses/APL.txt.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from gqlalchemy import Memgraph
|
||||||
|
from mgclient import DatabaseError
|
||||||
|
|
||||||
|
SIMPLE_CSV_FILE = "simple.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_path(file: str) -> str:
|
||||||
|
return os.path.join(Path(__file__).parent.absolute(), file)
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_two_rows_in_db_when_load_csv_after_match_then_throw_exception():
|
||||||
|
memgraph = Memgraph("localhost", 7687)
|
||||||
|
|
||||||
|
with pytest.raises(DatabaseError):
|
||||||
|
next(
|
||||||
|
memgraph.execute_and_fetch(
|
||||||
|
f"""MATCH (n) LOAD CSV
|
||||||
|
FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row
|
||||||
|
CREATE (:Person {{name: row.name}})
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_one_row_in_db_when_load_csv_after_match_then_pass():
|
||||||
|
memgraph = Memgraph("localhost", 7687)
|
||||||
|
|
||||||
|
results = memgraph.execute_and_fetch(
|
||||||
|
f"""MATCH (n {{prop: 1}}) LOAD CSV
|
||||||
|
FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row
|
||||||
|
CREATE (:Person {{name: row.name}})
|
||||||
|
RETURN n
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(list(results)) == 4
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(pytest.main([__file__, "-rA"]))
|
5
tests/e2e/load_csv/simple.csv
Normal file
5
tests/e2e/load_csv/simple.csv
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
id,name
|
||||||
|
1,Joseph
|
||||||
|
2,Peter
|
||||||
|
3,Ella
|
||||||
|
4,Joe
|
|
15
tests/e2e/load_csv/workloads.yaml
Normal file
15
tests/e2e/load_csv/workloads.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
load_csv_cluster: &load_csv_cluster
|
||||||
|
cluster:
|
||||||
|
main:
|
||||||
|
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
||||||
|
log_file: "load_csv_log_file.txt"
|
||||||
|
setup_queries:
|
||||||
|
- "CREATE (n {prop: 1});"
|
||||||
|
- "CREATE (n {prop: 2});"
|
||||||
|
validation_queries: []
|
||||||
|
|
||||||
|
workloads:
|
||||||
|
- name: "MATCH + LOAD CSV"
|
||||||
|
binary: "tests/e2e/pytest_runner.sh"
|
||||||
|
args: ["load_csv/load_csv.py"]
|
||||||
|
<<: *load_csv_cluster
|
@ -1,6 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# shellcheck disable=1091
|
# shellcheck disable=1091
|
||||||
|
|
||||||
set -Eeuo pipefail
|
set -Eeuo pipefail
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
Loading…
Reference in New Issue
Block a user