From d4bcdb77ad7301073aa51a31ddb12adb9bcdbed4 Mon Sep 17 00:00:00 2001 From: DavIvek <david.ivekovic@memgraph.io> Date: Wed, 10 Jan 2024 11:46:20 +0100 Subject: [PATCH 1/7] Fix using path identifier after CREATE (#1629) --- src/query/plan/rule_based_planner.hpp | 3 ++- .../tests/memgraph_V1/features/match.feature | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 074bd1c88..bdd7bdbd9 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -380,6 +380,7 @@ class RuleBasedPlanner { if (pattern.identifier_->user_declared_) { std::vector<Symbol> path_elements; for (const PatternAtom *atom : pattern.atoms_) path_elements.emplace_back(symbol_table.at(*atom->identifier_)); + bound_symbols.insert(symbol_table.at(*pattern.identifier_)); last_op = std::make_unique<ConstructNamedPath>(std::move(last_op), symbol_table.at(*pattern.identifier_), path_elements); } diff --git a/tests/gql_behave/tests/memgraph_V1/features/match.feature b/tests/gql_behave/tests/memgraph_V1/features/match.feature index 227ad9ad6..0d0477ad9 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/match.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/match.feature @@ -771,3 +771,17 @@ Feature: Match Then the result should be: | path | | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | + + Scenario: Using path indentifier from CREATE in MERGE + Given an empty graph + And having executed: + """ + CREATE p0=()-[:T0]->() MERGE ({k:(size(p0))}); + """ + When executing query: + """ + MATCH (n {k: 1}) RETURN n; + """ + Then the result should be: + | n | + | ({k: 1}) | From b3d0c2ccc2578bd343f757cb2563883fba7af9c4 Mon Sep 17 00:00:00 2001 From: DavIvek <david.ivekovic@memgraph.io> Date: Wed, 10 Jan 2024 15:08:21 +0100 Subject: [PATCH 2/7] Add query parameters support for labels (#1602) --- src/query/cypher_query_interpreter.cpp | 5 +- .../frontend/ast/cypher_main_visitor.cpp | 10 ++- .../frontend/ast/cypher_main_visitor.hpp | 6 +- .../frontend/opencypher/grammar/Cypher.g4 | 2 +- tests/benchmark/query/execution.cpp | 4 +- .../memgraph_V1/features/parameters.feature | 89 ++++++++++++++++++- tests/manual/expression_pretty_printer.cpp | 6 +- tests/manual/interactive_planning.cpp | 3 +- tests/unit/cypher_main_visitor.cpp | 10 ++- 9 files changed, 121 insertions(+), 14 deletions(-) diff --git a/src/query/cypher_query_interpreter.cpp b/src/query/cypher_query_interpreter.cpp index d3ddc22c4..30966119b 100644 --- a/src/query/cypher_query_interpreter.cpp +++ b/src/query/cypher_query_interpreter.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -12,7 +12,6 @@ #include "query/cypher_query_interpreter.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" -#include "utils/synchronized.hpp" // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(query_cost_planner, true, "Use the cost-estimating query planner."); @@ -80,7 +79,7 @@ ParsedQuery ParseQuery(const std::string &query_string, const std::map<std::stri // Convert the ANTLR4 parse tree into an AST. AstStorage ast_storage; frontend::ParsingContext context{.is_query_cached = true}; - frontend::CypherMainVisitor visitor(context, &ast_storage); + frontend::CypherMainVisitor visitor(context, &ast_storage, ¶meters); visitor.visit(parser->tree()); diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 7002ee4b9..b62c9e301 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -1825,7 +1825,15 @@ antlrcpp::Any CypherMainVisitor::visitNodePattern(MemgraphCypher::NodePatternCon antlrcpp::Any CypherMainVisitor::visitNodeLabels(MemgraphCypher::NodeLabelsContext *ctx) { std::vector<LabelIx> labels; for (auto *node_label : ctx->nodeLabel()) { - labels.push_back(AddLabel(std::any_cast<std::string>(node_label->accept(this)))); + if (node_label->labelName()->symbolicName()) { + labels.emplace_back(AddLabel(std::any_cast<std::string>(node_label->accept(this)))); + } else { + // If we have a parameter, we have to resolve it. + const auto *param_lookup = std::any_cast<ParameterLookup *>(node_label->accept(this)); + const auto label_name = parameters_->AtTokenPosition(param_lookup->token_position_).ValueString(); + labels.emplace_back(storage_->GetLabelIx(label_name)); + query_info_.is_cacheable = false; // We can't cache queries with label parameters. + } } return labels; } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 9e828674f..7f75b0050 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -17,6 +17,7 @@ #include "query/frontend/ast/ast.hpp" #include "query/frontend/opencypher/generated/MemgraphCypherBaseVisitor.h" +#include "query/parameters.hpp" #include "utils/exceptions.hpp" #include "utils/logging.hpp" @@ -30,7 +31,8 @@ struct ParsingContext { class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { public: - explicit CypherMainVisitor(ParsingContext context, AstStorage *storage) : context_(context), storage_(storage) {} + explicit CypherMainVisitor(ParsingContext context, AstStorage *storage, Parameters *parameters) + : context_(context), storage_(storage), parameters_(parameters) {} private: Expression *CreateBinaryOperatorByToken(size_t token, Expression *e1, Expression *e2) { @@ -1022,6 +1024,8 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { // return. bool in_with_ = false; + Parameters *parameters_; + QueryInfo query_info_; }; } // namespace memgraph::query::frontend diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index d387002d8..bb435b85d 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -193,7 +193,7 @@ nodeLabels : nodeLabel ( nodeLabel )* ; nodeLabel : ':' labelName ; -labelName : symbolicName ; +labelName : symbolicName | parameter; relTypeName : symbolicName ; diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index aa72e21d7..750dd5564 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -23,6 +23,7 @@ // variable of the same name, EOF. // This hides the definition of the macro which causes // the compilation to fail. +#include "query/parameters.hpp" #include "query/plan/planner.hpp" ////////////////////////////////////////////////////// #include "communication/result_stream_faker.hpp" @@ -119,10 +120,11 @@ static void AddTree(memgraph::storage::Storage *db, int vertex_count) { static memgraph::query::CypherQuery *ParseCypherQuery(const std::string &query_string, memgraph::query::AstStorage *ast) { memgraph::query::frontend::ParsingContext parsing_context; + memgraph::query::Parameters parameters; parsing_context.is_query_cached = false; memgraph::query::frontend::opencypher::Parser parser(query_string); // Convert antlr4 AST into Memgraph AST. - memgraph::query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast); + memgraph::query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast, ¶meters); cypher_visitor.visit(parser.tree()); return memgraph::utils::Downcast<memgraph::query::CypherQuery>(cypher_visitor.query()); }; diff --git a/tests/gql_behave/tests/memgraph_V1/features/parameters.feature b/tests/gql_behave/tests/memgraph_V1/features/parameters.feature index 908507e43..288f93206 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/parameters.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/parameters.feature @@ -52,7 +52,7 @@ Feature: Parameters | [1, 2, 3] | Scenario: Parameters in match: - Given an empty graph + Given an empty graph And having executed: """ CREATE (a {x : 10}) @@ -66,3 +66,90 @@ Feature: Parameters Then the result should be: | a.x | | 10 | + + Scenario: Label parameters in match: + Given an empty graph + And having executed: + """ + CREATE (a:Label1 {x : 10}) + """ + And parameters are: + | a | 10 | + | label | Label1 | + When executing query: + """ + MATCH (a:$label {x : $a}) RETURN a + """ + Then the result should be: + | a | + | (:Label1{x: 10}) | + + Scenario: Label parameters in create and match + Given an empty graph + And parameters are: + | a | 10 | + | label | Label1 | + When executing query: + """ + CREATE (a:$label {x: $a}) + """ + When executing query: + """ + MATCH (a:$label {x: $a}) RETURN a + """ + Then the result should be: + | a | + | (:Label1{x: 10}) | + + Scenario: Label parameters in merge + Given an empty graph + And parameters are: + | a | 10 | + | label | Label1 | + When executing query: + """ + MERGE (a:$label {x: $a}) RETURN a + """ + Then the result should be: + | a | + | (:Label1{x: 10}) | + + Scenario: Label parameters in set label + Given an empty graph + And having executed: + """ + CREATE (a:Label1 {x : 10}) + """ + And parameters are: + | new_label | Label2 | + When executing query: + """ + MATCH (a:Label1 {x: 10}) SET a:$new_label + """ + When executing query: + """ + MATCH (a:Label1:Label1 {x: 10}) RETURN a + """ + Then the result should be: + | a | + | (:Label1:Label2 {x: 10}) | + + Scenario: Label parameters in remove label + Given an empty graph + And having executed: + """ + CREATE (a:Label1:LabelToRemove {x : 10}) + """ + And parameters are: + | label_to_remove | LabelToRemove | + When executing query: + """ + MATCH (a {x: 10}) REMOVE a:$label_to_remove + """ + When executing query: + """ + MATCH (a {x: 10}) RETURN a + """ + Then the result should be: + | a | + | (:Label1 {x: 10}) | diff --git a/tests/manual/expression_pretty_printer.cpp b/tests/manual/expression_pretty_printer.cpp index e2d58b350..20757b195 100644 --- a/tests/manual/expression_pretty_printer.cpp +++ b/tests/manual/expression_pretty_printer.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -17,6 +17,7 @@ #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/ast/pretty_print.hpp" #include "query/frontend/opencypher/parser.hpp" +#include "query/parameters.hpp" std::string AssembleQueryString(const std::string &expression_string) { return "return " + expression_string + " as expr"; @@ -24,8 +25,9 @@ std::string AssembleQueryString(const std::string &expression_string) { memgraph::query::Query *ParseQuery(const std::string &query_string, memgraph::query::AstStorage *ast_storage) { memgraph::query::frontend::ParsingContext context; + memgraph::query::Parameters parameters; memgraph::query::frontend::opencypher::Parser parser(query_string); - memgraph::query::frontend::CypherMainVisitor visitor(context, ast_storage); + memgraph::query::frontend::CypherMainVisitor visitor(context, ast_storage, ¶meters); visitor.visit(parser.tree()); return visitor.query(); diff --git a/tests/manual/interactive_planning.cpp b/tests/manual/interactive_planning.cpp index d5da0ba2b..76ae4cc1a 100644 --- a/tests/manual/interactive_planning.cpp +++ b/tests/manual/interactive_planning.cpp @@ -434,11 +434,12 @@ void ExaminePlans(memgraph::query::DbAccessor *dba, const memgraph::query::Symbo memgraph::query::Query *MakeAst(const std::string &query, memgraph::query::AstStorage *storage) { memgraph::query::frontend::ParsingContext parsing_context; + memgraph::query::Parameters parameters; parsing_context.is_query_cached = false; // query -> AST auto parser = std::make_unique<memgraph::query::frontend::opencypher::Parser>(query); // AST -> high level tree - memgraph::query::frontend::CypherMainVisitor visitor(parsing_context, storage); + memgraph::query::frontend::CypherMainVisitor visitor(parsing_context, storage, ¶meters); visitor.visit(parser->tree()); return visitor.query(); } diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 31fd95c6c..9cb33589c 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -37,6 +37,7 @@ #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" #include "query/frontend/stripped.hpp" +#include "query/parameters.hpp" #include "query/procedure/cypher_types.hpp" #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" @@ -118,7 +119,8 @@ class AstGenerator : public Base { public: Query *ParseQuery(const std::string &query_string) override { ::frontend::opencypher::Parser parser(query_string); - CypherMainVisitor visitor(context_, &ast_storage_); + Parameters parameters; + CypherMainVisitor visitor(context_, &ast_storage_, ¶meters); visitor.visit(parser.tree()); return visitor.query(); } @@ -151,6 +153,7 @@ class ClonedAstGenerator : public Base { public: Query *ParseQuery(const std::string &query_string) override { ::frontend::opencypher::Parser parser(query_string); + Parameters parameters; AstStorage tmp_storage; { // Add a label, property and edge type into temporary storage so @@ -159,7 +162,7 @@ class ClonedAstGenerator : public Base { tmp_storage.GetPropertyIx("fdjakfjdklfjdaslk"); tmp_storage.GetEdgeTypeIx("fdjkalfjdlkajfdkla"); } - CypherMainVisitor visitor(context_, &tmp_storage); + CypherMainVisitor visitor(context_, &tmp_storage, ¶meters); visitor.visit(parser.tree()); return visitor.query()->Clone(&ast_storage_); } @@ -182,8 +185,9 @@ class CachedAstGenerator : public Base { StrippedQuery stripped(query_string); parameters_ = stripped.literals(); ::frontend::opencypher::Parser parser(stripped.query()); + Parameters parameters; AstStorage tmp_storage; - CypherMainVisitor visitor(context_, &tmp_storage); + CypherMainVisitor visitor(context_, &tmp_storage, ¶meters); visitor.visit(parser.tree()); return visitor.query()->Clone(&ast_storage_); } From 31f15b36514e81d0bf0386268707bfe62284b2da Mon Sep 17 00:00:00 2001 From: DavIvek <david.ivekovic@memgraph.io> Date: Thu, 11 Jan 2024 10:10:06 +0100 Subject: [PATCH 3/7] Fix index hints (#1606) --- src/query/plan/cost_estimator.hpp | 50 +++++++++++++++++++------ src/query/plan/planner.hpp | 29 ++++++++++---- src/query/plan/rewrite/index_lookup.hpp | 24 ++++++++++++ tests/benchmark/query/planner.cpp | 7 +++- tests/e2e/index_hints/index_hints.py | 39 +++++++++++++++++++ tests/manual/interactive_planning.cpp | 4 +- tests/unit/query_cost_estimator.cpp | 4 +- 7 files changed, 132 insertions(+), 25 deletions(-) diff --git a/src/query/plan/cost_estimator.hpp b/src/query/plan/cost_estimator.hpp index 47da0a23b..ede4a89fc 100644 --- a/src/query/plan/cost_estimator.hpp +++ b/src/query/plan/cost_estimator.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -14,6 +14,7 @@ #include "query/frontend/ast/ast.hpp" #include "query/parameters.hpp" #include "query/plan/operator.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "query/typed_value.hpp" #include "utils/algorithm.hpp" #include "utils/math.hpp" @@ -46,6 +47,11 @@ struct CostEstimation { double cardinality; }; +struct PlanCost { + double cost; + bool use_index_hints; +}; + /** * Query plan execution time cost estimator, for comparing and choosing optimal * execution plans. @@ -109,11 +115,13 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { using HierarchicalLogicalOperatorVisitor::PostVisit; using HierarchicalLogicalOperatorVisitor::PreVisit; - CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters) - : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()} {} + CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, + const IndexHints &index_hints) + : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()}, index_hints_(index_hints) {} - CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope) - : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope} {} + CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope, + const IndexHints &index_hints) + : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope}, index_hints_(index_hints) {} bool PostVisit(ScanAll &) override { cardinality_ *= db_accessor_->VerticesCount(); @@ -129,7 +137,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { } cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_); - // ScanAll performs some work for every element that is produced + if (index_hints_.HasLabelIndex(db_accessor_, scan_all_by_label.label_)) { + use_index_hints_ = true; + } + IncrementCost(CostParam::kScanAllByLabel); return true; } @@ -154,6 +165,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + // ScanAll performs some work for every element that is produced IncrementCost(CostParam::MakeScanAllByLabelPropertyValue); return true; @@ -184,6 +199,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + // ScanAll performs some work for every element that is produced IncrementCost(CostParam::MakeScanAllByLabelPropertyRange); return true; @@ -197,6 +216,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_); cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + IncrementCost(CostParam::MakeScanAllByLabelProperty); return true; } @@ -375,6 +398,7 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { auto cost() const { return cost_; } auto cardinality() const { return cardinality_; } + auto use_index_hints() const { return use_index_hints_; } private: // cost estimation that gets accumulated as the visitor @@ -390,17 +414,19 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { const SymbolTable &table_; const Parameters ¶meters; std::vector<Scope> scopes_; + IndexHints index_hints_; + bool use_index_hints_{false}; void IncrementCost(double param) { cost_ += param * cardinality_; } CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch) { - CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters); + CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, index_hints_); (*branch)->Accept(cost_estimator); return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()}; } CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch, Scope scope) { - CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope); + CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope, index_hints_); (*branch)->Accept(cost_estimator); return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()}; } @@ -450,11 +476,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { /** Returns the estimated cost of the given plan. */ template <class TDbAccessor> -double EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters, - LogicalOperator &plan) { - CostEstimator<TDbAccessor> estimator(db, table, parameters); +PlanCost EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters, + LogicalOperator &plan, const IndexHints &index_hints) { + CostEstimator<TDbAccessor> estimator(db, table, parameters, index_hints); plan.Accept(estimator); - return estimator.cost(); + return PlanCost{.cost = estimator.cost(), .use_index_hints = estimator.use_index_hints()}; } } // namespace memgraph::query::plan diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index 10318e6b9..e8ca80e39 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -59,9 +59,9 @@ class PostProcessor final { } template <class TVertexCounts> - double EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts, - const SymbolTable &table) { - return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan); + PlanCost EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts, + const SymbolTable &table) { + return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan, index_hints_); } }; @@ -99,6 +99,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process, auto query_parts = CollectQueryParts(*context->symbol_table, *context->ast_storage, context->query); auto &vertex_counts = *context->db; double total_cost = std::numeric_limits<double>::max(); + bool curr_uses_index_hint = false; using ProcessedPlan = typename TPlanPostProcess::ProcessedPlan; ProcessedPlan plan_with_least_cost; @@ -110,16 +111,28 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process, // Plans are generated lazily and the current plan will disappear, so // it's ok to move it. auto rewritten_plan = post_process->Rewrite(std::move(plan), context); - double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); - if (!curr_plan || cost < total_cost) { + auto plan_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); + // if we have a plan that uses index hints, we reject all the plans that don't use index hinting because we want + // to force the plan using the index hints to be executed + if (curr_uses_index_hint && !plan_cost.use_index_hints) continue; + // if a plan uses index hints, and there is currently not yet a plan that utilizes it, we will take it regardless + if (plan_cost.use_index_hints && !curr_uses_index_hint) { + curr_uses_index_hint = plan_cost.use_index_hints; curr_plan.emplace(std::move(rewritten_plan)); - total_cost = cost; + total_cost = plan_cost.cost; + continue; + } + // if both plans either use or don't use index hints, we want to use the one with the least cost + if (!curr_plan || plan_cost.cost < total_cost) { + curr_uses_index_hint = plan_cost.use_index_hints; + curr_plan.emplace(std::move(rewritten_plan)); + total_cost = plan_cost.cost; } } } else { auto plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(query_parts, context); auto rewritten_plan = post_process->Rewrite(std::move(plan), context); - total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); + total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table).cost; curr_plan.emplace(std::move(rewritten_plan)); } diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 590bad5f4..407c32ba0 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -28,6 +28,7 @@ #include "query/plan/operator.hpp" #include "query/plan/preprocess.hpp" +#include "storage/v2/id_types.hpp" DECLARE_int64(query_vertex_count_to_expand_existing); @@ -59,6 +60,29 @@ struct IndexHints { } } + template <class TDbAccessor> + bool HasLabelIndex(TDbAccessor *db, storage::LabelId label) const { + for (const auto &[index_type, label_hint, _] : label_index_hints_) { + auto label_id = db->NameToLabel(label_hint.name); + if (label_id == label) { + return true; + } + } + return false; + } + + template <class TDbAccessor> + bool HasLabelPropertyIndex(TDbAccessor *db, storage::LabelId label, storage::PropertyId property) const { + for (const auto &[index_type, label_hint, property_hint] : label_property_index_hints_) { + auto label_id = db->NameToLabel(label_hint.name); + auto property_id = db->NameToProperty(property_hint->name); + if (label_id == label && property_id == property) { + return true; + } + } + return false; + } + std::vector<IndexHint> label_index_hints_{}; std::vector<IndexHint> label_property_index_hints_{}; }; diff --git a/tests/benchmark/query/planner.cpp b/tests/benchmark/query/planner.cpp index 52cb44490..b64c4c39f 100644 --- a/tests/benchmark/query/planner.cpp +++ b/tests/benchmark/query/planner.cpp @@ -16,6 +16,7 @@ #include "query/frontend/semantic/symbol_generator.hpp" #include "query/plan/cost_estimator.hpp" #include "query/plan/planner.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "query/plan/vertex_count_cache.hpp" #include "storage/v2/inmemory/storage.hpp" @@ -136,7 +137,8 @@ static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) { auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>( query_parts, &ctx); for (auto plan : plans) { - memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan); + memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan, + memgraph::query::plan::IndexHints()); } } } @@ -166,7 +168,8 @@ static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State & auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>( query_parts, &ctx); for (auto plan : plans) { - memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan); + memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan, + memgraph::query::plan::IndexHints()); } } } diff --git a/tests/e2e/index_hints/index_hints.py b/tests/e2e/index_hints/index_hints.py index 70d3ce6b6..b59d60103 100644 --- a/tests/e2e/index_hints/index_hints.py +++ b/tests/e2e/index_hints/index_hints.py @@ -478,5 +478,44 @@ def test_nonexistent_label_property_index(memgraph): assert False +def test_index_hint_on_expand(memgraph): + # Prefer expanding from the node with the given hint even if estimator estimates higher cost for that plan + + memgraph.execute("FOREACH (i IN range(1, 1000) | CREATE (n:Label1 {id: i}));") + memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i}));") + memgraph.execute("CREATE INDEX ON :Label1;") + memgraph.execute("CREATE INDEX ON :Label2;") + + expected_explain_without_hint = [ + " * Produce {n, m}", + " * Filter (n :Label1)", + " * Expand (m)<-[anon1:rel]-(n)", + " * ScanAllByLabel (m :Label2)", + " * Once", + ] + + expected_explain_with_hint = [ + " * Produce {n, m}", + " * Filter (m :Label2)", + " * Expand (n)-[anon1:rel]->(m)", + " * ScanAllByLabel (n :Label1)", + " * Once", + ] + + explain_without_hint = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;") + ] + + explain_with_hint = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch( + "EXPLAIN USING INDEX :Label1 MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;" + ) + ] + + assert explain_without_hint == expected_explain_without_hint and explain_with_hint == expected_explain_with_hint + + if __name__ == "__main__": sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/manual/interactive_planning.cpp b/tests/manual/interactive_planning.cpp index 76ae4cc1a..f550b9724 100644 --- a/tests/manual/interactive_planning.cpp +++ b/tests/manual/interactive_planning.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -463,7 +463,7 @@ auto MakeLogicalPlans(memgraph::query::CypherQuery *query, memgraph::query::AstS memgraph::query::AstStorage ast_copy; auto unoptimized_plan = plan->Clone(&ast_copy); auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx); - double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table); + double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table).cost; interactive_plans.push_back( InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost}); } diff --git a/tests/unit/query_cost_estimator.cpp b/tests/unit/query_cost_estimator.cpp index ff9525cdc..631d17414 100644 --- a/tests/unit/query_cost_estimator.cpp +++ b/tests/unit/query_cost_estimator.cpp @@ -17,6 +17,7 @@ #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/cost_estimator.hpp" #include "query/plan/operator.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/storage.hpp" @@ -83,7 +84,8 @@ class QueryCostEstimator : public ::testing::Test { } auto Cost() { - CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_); + CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_, + memgraph::query::plan::IndexHints()); last_op_->Accept(cost_estimator); return cost_estimator.cost(); } From 2e4d27c59a548fb0571d8e482d50581b738026a6 Mon Sep 17 00:00:00 2001 From: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io> Date: Thu, 11 Jan 2024 17:20:21 +0300 Subject: [PATCH 4/7] Add List Pattern Comprehension grammar. (#1588) --- src/query/frontend/ast/ast.cpp | 3 + src/query/frontend/ast/ast.hpp | 59 +++++++++++++++++++ src/query/frontend/ast/ast_visitor.hpp | 5 +- .../frontend/ast/cypher_main_visitor.cpp | 27 +++++++++ .../frontend/ast/cypher_main_visitor.hpp | 10 ++++ src/query/frontend/ast/pretty_print.cpp | 5 ++ .../frontend/opencypher/grammar/Cypher.g4 | 2 +- .../frontend/semantic/symbol_generator.hpp | 1 + src/query/frontend/semantic/symbol_table.hpp | 3 + src/query/interpret/eval.hpp | 8 ++- src/query/plan/preprocess.hpp | 1 + src/query/plan/rule_based_planner.cpp | 10 +++- src/utils/typeinfo.hpp | 1 + .../features/list_operations.feature | 12 ++++ .../memgraph_V1/graphs/graph_keanu.cypher | 16 +++++ 15 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher diff --git a/src/query/frontend/ast/ast.cpp b/src/query/frontend/ast/ast.cpp index c5e4c84c4..6a9f05bad 100644 --- a/src/query/frontend/ast/ast.cpp +++ b/src/query/frontend/ast/ast.cpp @@ -293,4 +293,7 @@ constexpr utils::TypeInfo query::ShowDatabasesQuery::kType{utils::TypeId::AST_SH constexpr utils::TypeInfo query::EdgeImportModeQuery::kType{utils::TypeId::AST_EDGE_IMPORT_MODE_QUERY, "EdgeImportModeQuery", &query::Query::kType}; +constexpr utils::TypeInfo query::PatternComprehension::kType{utils::TypeId::AST_PATTERN_COMPREHENSION, + "PatternComprehension", &query::Expression::kType}; + } // namespace memgraph diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index 59860d5b0..b5e058491 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -3520,6 +3520,65 @@ class Exists : public memgraph::query::Expression { friend class AstStorage; }; +class PatternComprehension : public memgraph::query::Expression { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + PatternComprehension() = default; + + DEFVISITABLE(ExpressionVisitor<TypedValue>); + DEFVISITABLE(ExpressionVisitor<TypedValue *>); + DEFVISITABLE(ExpressionVisitor<void>); + + bool Accept(HierarchicalTreeVisitor &visitor) override { + if (visitor.PreVisit(*this)) { + if (variable_) { + variable_->Accept(visitor); + } + pattern_->Accept(visitor); + if (filter_) { + filter_->Accept(visitor); + } + resultExpr_->Accept(visitor); + } + return visitor.PostVisit(*this); + } + + PatternComprehension *MapTo(const Symbol &symbol) { + symbol_pos_ = symbol.position(); + return this; + } + + // The variable name. + Identifier *variable_{nullptr}; + // The pattern to match. + Pattern *pattern_{nullptr}; + // Optional WHERE clause for filtering. + Where *filter_{nullptr}; + // The projection expression. + Expression *resultExpr_{nullptr}; + + /// Symbol table position of the symbol this Aggregation is mapped to. + int32_t symbol_pos_{-1}; + + PatternComprehension *Clone(AstStorage *storage) const override { + PatternComprehension *object = storage->Create<PatternComprehension>(); + object->pattern_ = pattern_ ? pattern_->Clone(storage) : nullptr; + object->filter_ = filter_ ? filter_->Clone(storage) : nullptr; + object->resultExpr_ = resultExpr_ ? resultExpr_->Clone(storage) : nullptr; + + object->symbol_pos_ = symbol_pos_; + return object; + } + + protected: + PatternComprehension(Identifier *variable, Pattern *pattern) : variable_(variable), pattern_(pattern) {} + + private: + friend class AstStorage; +}; + class CallSubquery : public memgraph::query::Clause { public: static const utils::TypeInfo kType; diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index 793c15a95..ff1586fe4 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -107,6 +107,7 @@ class Exists; class MultiDatabaseQuery; class ShowDatabasesQuery; class EdgeImportModeQuery; +class PatternComprehension; using TreeCompositeVisitor = utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -116,7 +117,7 @@ using TreeCompositeVisitor = utils::CompositeVisitor< MapProjectionLiteral, PropertyLookup, AllPropertiesLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Any, None, CallProcedure, Create, Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where, SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge, Unwind, RegexMatch, LoadCsv, - Foreach, Exists, CallSubquery, CypherQuery>; + Foreach, Exists, CallSubquery, CypherQuery, PatternComprehension>; using TreeLeafVisitor = utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>; @@ -137,7 +138,7 @@ class ExpressionVisitor ListSlicingOperator, IfOperator, UnaryPlusOperator, UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral, MapProjectionLiteral, PropertyLookup, AllPropertiesLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Any, None, - ParameterLookup, Identifier, PrimitiveLiteral, RegexMatch, Exists> {}; + ParameterLookup, Identifier, PrimitiveLiteral, RegexMatch, Exists, PatternComprehension> {}; template <class TResult> class QueryVisitor diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index b62c9e301..2d93fd757 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -1969,6 +1969,18 @@ antlrcpp::Any CypherMainVisitor::visitPatternElement(MemgraphCypher::PatternElem return pattern; } +antlrcpp::Any CypherMainVisitor::visitRelationshipsPattern(MemgraphCypher::RelationshipsPatternContext *ctx) { + auto *pattern = storage_->Create<Pattern>(); + pattern->atoms_.push_back(std::any_cast<NodeAtom *>(ctx->nodePattern()->accept(this))); + for (auto *pattern_element_chain : ctx->patternElementChain()) { + auto element = std::any_cast<std::pair<PatternAtom *, PatternAtom *>>(pattern_element_chain->accept(this)); + pattern->atoms_.push_back(element.first); + pattern->atoms_.push_back(element.second); + } + anonymous_identifiers.push_back(&pattern->identifier_); + return pattern; +} + antlrcpp::Any CypherMainVisitor::visitPatternElementChain(MemgraphCypher::PatternElementChainContext *ctx) { return std::pair<PatternAtom *, PatternAtom *>(std::any_cast<EdgeAtom *>(ctx->relationshipPattern()->accept(this)), std::any_cast<NodeAtom *>(ctx->nodePattern()->accept(this))); @@ -2463,6 +2475,8 @@ antlrcpp::Any CypherMainVisitor::visitAtom(MemgraphCypher::AtomContext *ctx) { return static_cast<Expression *>(storage_->Create<Extract>(ident, list, expr)); } else if (ctx->existsExpression()) { return std::any_cast<Expression *>(ctx->existsExpression()->accept(this)); + } else if (ctx->patternComprehension()) { + return std::any_cast<Expression *>(ctx->patternComprehension()->accept(this)); } // TODO: Implement this. We don't support comprehensions, filtering... at @@ -2523,6 +2537,19 @@ antlrcpp::Any CypherMainVisitor::visitExistsExpression(MemgraphCypher::ExistsExp return static_cast<Expression *>(exists); } +antlrcpp::Any CypherMainVisitor::visitPatternComprehension(MemgraphCypher::PatternComprehensionContext *ctx) { + auto *comprehension = storage_->Create<PatternComprehension>(); + if (ctx->variable()) { + comprehension->variable_ = storage_->Create<Identifier>(std::any_cast<std::string>(ctx->variable()->accept(this))); + } + comprehension->pattern_ = std::any_cast<Pattern *>(ctx->relationshipsPattern()->accept(this)); + if (ctx->where()) { + comprehension->filter_ = std::any_cast<Where *>(ctx->where()->accept(this)); + } + comprehension->resultExpr_ = std::any_cast<Expression *>(ctx->expression()->accept(this)); + return static_cast<Expression *>(comprehension); +} + antlrcpp::Any CypherMainVisitor::visitParenthesizedExpression(MemgraphCypher::ParenthesizedExpressionContext *ctx) { return std::any_cast<Expression *>(ctx->expression()->accept(this)); } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 7f75b0050..1aa887ad7 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -678,6 +678,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitPatternElement(MemgraphCypher::PatternElementContext *ctx) override; + /** + * @return Pattern* + */ + antlrcpp::Any visitRelationshipsPattern(MemgraphCypher::RelationshipsPatternContext *ctx) override; + /** * @return vector<pair<EdgeAtom*, NodeAtom*>> */ @@ -843,6 +848,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitExistsExpression(MemgraphCypher::ExistsExpressionContext *ctx) override; + /** + * @return pattern comprehension (Expression) + */ + antlrcpp::Any visitPatternComprehension(MemgraphCypher::PatternComprehensionContext *ctx) override; + /** * @return Expression* */ diff --git a/src/query/frontend/ast/pretty_print.cpp b/src/query/frontend/ast/pretty_print.cpp index ef45afd7d..61bd23797 100644 --- a/src/query/frontend/ast/pretty_print.cpp +++ b/src/query/frontend/ast/pretty_print.cpp @@ -73,6 +73,7 @@ class ExpressionPrettyPrinter : public ExpressionVisitor<void> { void Visit(ParameterLookup &op) override; void Visit(NamedExpression &op) override; void Visit(RegexMatch &op) override; + void Visit(PatternComprehension &op) override; private: std::ostream *out_; @@ -323,6 +324,10 @@ void ExpressionPrettyPrinter::Visit(NamedExpression &op) { void ExpressionPrettyPrinter::Visit(RegexMatch &op) { PrintOperator(out_, "=~", op.string_expr_, op.regex_); } +void ExpressionPrettyPrinter::Visit(PatternComprehension &op) { + PrintOperator(out_, "Pattern Comprehension", op.variable_, op.pattern_, op.filter_, op.resultExpr_); +} + } // namespace void PrintExpression(Expression *expr, std::ostream *out) { diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index bb435b85d..f4830ccef 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -296,7 +296,7 @@ functionName : symbolicName ( '.' symbolicName )* ; listComprehension : '[' filterExpression ( '|' expression )? ']' ; -patternComprehension : '[' ( variable '=' )? relationshipsPattern ( WHERE expression )? '|' expression ']' ; +patternComprehension : '[' ( variable '=' )? relationshipsPattern ( where )? '|' resultExpr=expression ']' ; propertyLookup : '.' ( propertyKeyName ) ; diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index 207bbddbd..f9e6468f6 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -249,6 +249,7 @@ class PropertyLookupEvaluationModeVisitor : public ExpressionVisitor<void> { void Visit(ParameterLookup &op) override{}; void Visit(NamedExpression &op) override { op.expression_->Accept(*this); }; void Visit(RegexMatch &op) override{}; + void Visit(PatternComprehension &op) override{}; void Visit(PropertyLookup & /*property_lookup*/) override; diff --git a/src/query/frontend/semantic/symbol_table.hpp b/src/query/frontend/semantic/symbol_table.hpp index 0b521356c..cf462c437 100644 --- a/src/query/frontend/semantic/symbol_table.hpp +++ b/src/query/frontend/semantic/symbol_table.hpp @@ -52,6 +52,9 @@ class SymbolTable final { const Symbol &at(const NamedExpression &nexpr) const { return table_.at(nexpr.symbol_pos_); } const Symbol &at(const Aggregation &aggr) const { return table_.at(aggr.symbol_pos_); } const Symbol &at(const Exists &exists) const { return table_.at(exists.symbol_pos_); } + const Symbol &at(const PatternComprehension &pattern_comprehension) const { + return table_.at(pattern_comprehension.symbol_pos_); + } // TODO: Remove these since members are public int32_t max_position() const { return static_cast<int32_t>(table_.size()); } diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index f4f3126cd..017dc9101 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -101,6 +101,7 @@ class ReferenceExpressionEvaluator : public ExpressionVisitor<TypedValue *> { UNSUCCESSFUL_VISIT(ParameterLookup); UNSUCCESSFUL_VISIT(RegexMatch); UNSUCCESSFUL_VISIT(Exists); + UNSUCCESSFUL_VISIT(PatternComprehension); #undef UNSUCCESSFUL_VISIT @@ -170,6 +171,7 @@ class PrimitiveLiteralExpressionEvaluator : public ExpressionVisitor<TypedValue> INVALID_VISIT(Identifier) INVALID_VISIT(RegexMatch) INVALID_VISIT(Exists) + INVALID_VISIT(PatternComprehension) #undef INVALID_VISIT private: @@ -1090,6 +1092,10 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> { } } + TypedValue Visit(PatternComprehension & /*pattern_comprehension*/) override { + throw utils::NotYetImplemented("Expression evaluator can not handle pattern comprehension."); + } + private: template <class TRecordAccessor> std::map<storage::PropertyId, storage::PropertyValue> GetAllProperties(const TRecordAccessor &record_accessor) { diff --git a/src/query/plan/preprocess.hpp b/src/query/plan/preprocess.hpp index 8e1955907..322da545a 100644 --- a/src/query/plan/preprocess.hpp +++ b/src/query/plan/preprocess.hpp @@ -230,6 +230,7 @@ class PatternFilterVisitor : public ExpressionVisitor<void> { void Visit(ParameterLookup &op) override{}; void Visit(NamedExpression &op) override{}; void Visit(RegexMatch &op) override{}; + void Visit(PatternComprehension &op) override{}; std::vector<FilterMatching> getMatchings() { return matchings_; } diff --git a/src/query/plan/rule_based_planner.cpp b/src/query/plan/rule_based_planner.cpp index f3d0c1487..bf5e66158 100644 --- a/src/query/plan/rule_based_planner.cpp +++ b/src/query/plan/rule_based_planner.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -373,12 +373,12 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { return true; } - bool Visit(ParameterLookup &) override { + bool Visit(ParameterLookup & /*unused*/) override { has_aggregation_.emplace_back(false); return true; } - bool PostVisit(RegexMatch ®ex_match) override { + bool PostVisit(RegexMatch & /*unused*/) override { MG_ASSERT(has_aggregation_.size() >= 2U, "Expected 2 has_aggregation_ flags for RegexMatch arguments"); bool has_aggr = has_aggregation_.back(); has_aggregation_.pop_back(); @@ -386,6 +386,10 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { return true; } + bool PostVisit(PatternComprehension & /*unused*/) override { + throw utils::NotYetImplemented("Planner can not handle pattern comprehension."); + } + // Creates NamedExpression with an Identifier for each user declared symbol. // This should be used when body.all_identifiers is true, to generate // expressions for Produce operator. diff --git a/src/utils/typeinfo.hpp b/src/utils/typeinfo.hpp index 682b5ac55..944d35fab 100644 --- a/src/utils/typeinfo.hpp +++ b/src/utils/typeinfo.hpp @@ -190,6 +190,7 @@ enum class TypeId : uint64_t { AST_MULTI_DATABASE_QUERY, AST_SHOW_DATABASES, AST_EDGE_IMPORT_MODE_QUERY, + AST_PATTERN_COMPREHENSION, // Symbol SYMBOL, }; diff --git a/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature b/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature index bfe6b6225..8c5538d6b 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature @@ -279,3 +279,15 @@ Feature: List operators Then the result should be: | o | | (:Node {Status: 'This is the status'}) | + + Scenario: Simple list pattern comprehension + Given graph "graph_keanu" + When executing query: + """ + MATCH (keanu:Person {name: 'Keanu Reeves'}) + RETURN [(keanu)-->(b:Movie) WHERE b.title CONTAINS 'Matrix' | b.released] AS years + """ + Then an error should be raised +# Then the result should be: +# | years | +# | [2021,2003,2003,1999] | diff --git a/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher b/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher new file mode 100644 index 000000000..a7a72aced --- /dev/null +++ b/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher @@ -0,0 +1,16 @@ +CREATE + (keanu:Person {name: 'Keanu Reeves'}), + (johnnyMnemonic:Movie {title: 'Johnny Mnemonic', released: 1995}), + (theMatrixRevolutions:Movie {title: 'The Matrix Revolutions', released: 2003}), + (theMatrixReloaded:Movie {title: 'The Matrix Reloaded', released: 2003}), + (theReplacements:Movie {title: 'The Replacements', released: 2000}), + (theMatrix:Movie {title: 'The Matrix', released: 1999}), + (theDevilsAdvocate:Movie {title: 'The Devils Advocate', released: 1997}), + (theMatrixResurrections:Movie {title: 'The Matrix Resurrections', released: 2021}), + (keanu)-[:ACTED_IN]->(johnnyMnemonic), + (keanu)-[:ACTED_IN]->(theMatrixRevolutions), + (keanu)-[:ACTED_IN]->(theMatrixReloaded), + (keanu)-[:ACTED_IN]->(theReplacements), + (keanu)-[:ACTED_IN]->(theMatrix), + (keanu)-[:ACTED_IN]->(theDevilsAdvocate), + (keanu)-[:ACTED_IN]->(theMatrixResurrections); From c772cab766bf29eb94b98500049875a5d4ab495e Mon Sep 17 00:00:00 2001 From: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io> Date: Fri, 12 Jan 2024 11:32:34 +0300 Subject: [PATCH 5/7] ToString function now returns double values with precision 15 (#1576) The DoubleToString function has been updated to handle higher precision doubles correctly. The unnecessary string length restriction has been removed, allowing the function to convert the full double value without prematurely truncating it. This change ensures that the string representation of doubles is more accurate, especially for very large or very small numbers. Unit tests have been added to verify the correct behavior for a range of double values. --- .../interpret/awesome_memgraph_functions.cpp | 2 +- src/utils/string.hpp | 29 ++++++++++++++++++- tests/unit/query_expression_evaluator.cpp | 7 +++-- tests/unit/utils_string.cpp | 21 +++++++++++++- 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/query/interpret/awesome_memgraph_functions.cpp b/src/query/interpret/awesome_memgraph_functions.cpp index 7e1a7290f..ece0aec78 100644 --- a/src/query/interpret/awesome_memgraph_functions.cpp +++ b/src/query/interpret/awesome_memgraph_functions.cpp @@ -957,7 +957,7 @@ TypedValue ToString(const TypedValue *args, int64_t nargs, const FunctionContext return TypedValue(std::to_string(arg.ValueInt()), ctx.memory); } if (arg.IsDouble()) { - return TypedValue(std::to_string(arg.ValueDouble()), ctx.memory); + return TypedValue(memgraph::utils::DoubleToString(arg.ValueDouble()), ctx.memory); } if (arg.IsDate()) { return TypedValue(arg.ValueDate().ToString(), ctx.memory); diff --git a/src/utils/string.hpp b/src/utils/string.hpp index 833c158c8..8593fc57f 100644 --- a/src/utils/string.hpp +++ b/src/utils/string.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -17,6 +17,7 @@ #include <charconv> #include <cstdint> #include <cstring> +#include <iomanip> #include <iostream> #include <iterator> #include <random> @@ -459,4 +460,30 @@ inline std::string_view Substr(const std::string_view string, size_t pos = 0, si return string.substr(pos, len); } +/** + * Convert a double value to a string representation. + * Precision of converted value is 16. + * Function also removes trailing zeros after the dot. + * + * @param value The double value to be converted. + * + * @return The string representation of the double value. + * + * @throws None + */ +inline std::string DoubleToString(const double value) { + static const int PRECISION = 15; + + std::stringstream ss; + ss << std::setprecision(PRECISION) << std::fixed << value; + auto sv = ss.view(); + + // Because of setprecision and fixed manipulator we are guaranteed to have the dot + sv = sv.substr(0, sv.find_last_not_of('0') + 1); + if (sv.ends_with('.')) { + sv = sv.substr(0, sv.size() - 1); + } + return std::string(sv); +} + } // namespace memgraph::utils diff --git a/tests/unit/query_expression_evaluator.cpp b/tests/unit/query_expression_evaluator.cpp index c070aaa32..b2a7c1f7a 100644 --- a/tests/unit/query_expression_evaluator.cpp +++ b/tests/unit/query_expression_evaluator.cpp @@ -2075,9 +2075,10 @@ TYPED_TEST(FunctionTest, ToStringInteger) { } TYPED_TEST(FunctionTest, ToStringDouble) { - EXPECT_EQ(this->EvaluateFunction("TOSTRING", -42.42).ValueString(), "-42.420000"); - EXPECT_EQ(this->EvaluateFunction("TOSTRING", 0.0).ValueString(), "0.000000"); - EXPECT_EQ(this->EvaluateFunction("TOSTRING", 238910.2313217).ValueString(), "238910.231322"); + EXPECT_EQ(this->EvaluateFunction("TOSTRING", -42.42).ValueString(), "-42.420000000000002"); + EXPECT_EQ(this->EvaluateFunction("TOSTRING", 0.0).ValueString(), "0"); + EXPECT_EQ(this->EvaluateFunction("TOSTRING", 238910.2313217).ValueString(), "238910.231321700004628"); + EXPECT_EQ(this->EvaluateFunction("TOSTRING", 238910.23132171234).ValueString(), "238910.231321712344652"); } TYPED_TEST(FunctionTest, ToStringBool) { diff --git a/tests/unit/utils_string.cpp b/tests/unit/utils_string.cpp index cefe57a6a..fdf64ae9f 100644 --- a/tests/unit/utils_string.cpp +++ b/tests/unit/utils_string.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -171,3 +171,22 @@ TEST(String, Substr) { EXPECT_EQ(Substr(string, string.size() - 1, 1), string.substr(string.size() - 1, 1)); EXPECT_EQ(Substr(string, string.size() - 1, 2), string.substr(string.size() - 1, 2)); } + +TEST(String, DoubleToString) { + EXPECT_EQ(DoubleToString(0), "0"); + EXPECT_EQ(DoubleToString(1), "1"); + EXPECT_EQ(DoubleToString(1234567890123456), "1234567890123456"); + EXPECT_EQ(DoubleToString(static_cast<double>(12345678901234567)), "12345678901234568"); + EXPECT_EQ(DoubleToString(0.5), "0.5"); + EXPECT_EQ(DoubleToString(1.0), "1"); + EXPECT_EQ(DoubleToString(5.8), "5.8"); + EXPECT_EQ(DoubleToString(1.01234000), "1.01234"); + EXPECT_EQ(DoubleToString(1.036837585345), "1.036837585345"); + EXPECT_EQ(DoubleToString(103.6837585345), "103.683758534500001"); + EXPECT_EQ(DoubleToString(1.01234567890123456789), "1.012345678901235"); + EXPECT_EQ(DoubleToString(1234567.01234567891234567), "1234567.012345678871498"); + EXPECT_EQ(DoubleToString(0.00001), "0.00001"); + EXPECT_EQ(DoubleToString(0.00000000000001), "0.00000000000001"); + EXPECT_EQ(DoubleToString(0.000000000000001), "0.000000000000001"); + EXPECT_EQ(DoubleToString(0.0000000000000001), "0"); +} From 0a7a7bc0d154e7a7eaf0f5682b9a3109f017e413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= <marko.budiselic@memgraph.com> Date: Sat, 13 Jan 2024 08:43:33 +0100 Subject: [PATCH 6/7] Add Tantivy ADR (#1633) --- ADRs/001_tantivy.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 ADRs/001_tantivy.md diff --git a/ADRs/001_tantivy.md b/ADRs/001_tantivy.md new file mode 100644 index 000000000..ac887c861 --- /dev/null +++ b/ADRs/001_tantivy.md @@ -0,0 +1,32 @@ +# Tantivy ADR + +**Author** +Marko Budiselic (github.com/gitbuda) + +**Status** +APPROVED + +**Date** +January 5, 2024 + +**Problem** + +For some of Memgraph workloads, text search is a required feature. We don't +want to build a new text search engine because that's not Memgraph's core +value. + +**Criteria** + +- easy integration with our C++ codebase +- ability to operate in-memory and on-disk +- sufficient features (regex, full-text search, fuzzy search, aggregations over + text data) +- production-ready + +**Decision** + +All known C++ libraries are not production-ready. Recent Rust libraries, in +particular [Tantivy](https://github.com/quickwit-oss/tantivy), seem to provide +much more features, it is production ready. The way how we'll integrate Tantivy +into the current Memgraph codebase is via +[cxx](https://github.com/dtolnay/cxx). **We select Tantivy.** From 23dff58d22945960db71d911680cb89415548a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ivan=20Milinovi=C4=87?= <44698587+imilinovic@users.noreply.github.com> Date: Sun, 14 Jan 2024 11:14:46 +0100 Subject: [PATCH 7/7] Improve memory tracking (#1631) --- src/memory/query_memory_control.cpp | 14 +++++++++++--- src/memory/query_memory_control.hpp | 18 ++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/memory/query_memory_control.cpp b/src/memory/query_memory_control.cpp index 91730c900..5e569bd13 100644 --- a/src/memory/query_memory_control.cpp +++ b/src/memory/query_memory_control.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -36,14 +36,22 @@ namespace memgraph::memory { void QueriesMemoryControl::UpdateThreadToTransactionId(const std::thread::id &thread_id, uint64_t transaction_id) { auto accessor = thread_id_to_transaction_id.access(); - accessor.insert({thread_id, transaction_id}); + auto elem = accessor.find(thread_id); + if (elem == accessor.end()) { + accessor.insert({thread_id, {transaction_id, 1}}); + } else { + elem->transaction_id.cnt++; + } } void QueriesMemoryControl::EraseThreadToTransactionId(const std::thread::id &thread_id, uint64_t transaction_id) { auto accessor = thread_id_to_transaction_id.access(); auto elem = accessor.find(thread_id); MG_ASSERT(elem != accessor.end() && elem->transaction_id == transaction_id); - accessor.remove(thread_id); + elem->transaction_id.cnt--; + if (elem->transaction_id.cnt == 0) { + accessor.remove(thread_id); + } } void QueriesMemoryControl::TrackAllocOnCurrentThread(size_t size) { diff --git a/src/memory/query_memory_control.hpp b/src/memory/query_memory_control.hpp index 901917757..3852027a5 100644 --- a/src/memory/query_memory_control.hpp +++ b/src/memory/query_memory_control.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -78,9 +78,20 @@ class QueriesMemoryControl { bool IsThreadTracked(); private: + struct TransactionId { + uint64_t id; + uint64_t cnt; + + bool operator<(const TransactionId &other) const { return id < other.id; } + bool operator==(const TransactionId &other) const { return id == other.id; } + + bool operator<(uint64_t other) const { return id < other; } + bool operator==(uint64_t other) const { return id == other; } + }; + struct ThreadIdToTransactionId { std::thread::id thread_id; - uint64_t transaction_id; + TransactionId transaction_id; bool operator<(const ThreadIdToTransactionId &other) const { return thread_id < other.thread_id; } bool operator==(const ThreadIdToTransactionId &other) const { return thread_id == other.thread_id; } @@ -98,6 +109,9 @@ class QueriesMemoryControl { bool operator<(uint64_t other) const { return transaction_id < other; } bool operator==(uint64_t other) const { return transaction_id == other; } + + bool operator<(TransactionId other) const { return transaction_id < other.id; } + bool operator==(TransactionId other) const { return transaction_id == other.id; } }; utils::SkipList<ThreadIdToTransactionId> thread_id_to_transaction_id;