From 62f6a58c32437b61887bdc8a3166e3a271d1e8cf Mon Sep 17 00:00:00 2001 From: Teon Banek Date: Wed, 24 May 2017 16:13:25 +0200 Subject: [PATCH] Generalize MakeLogicalPlan with regards to planner Summary: This change modifies the planning API to be more general, in order to support picking different planning strategies. The current planning strategy has been named RuleBasedPlanner. Reviewers: florijan, mislav.bradac Reviewed By: florijan Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D411 --- src/query/interpreter.hpp | 3 +- src/query/plan/planner.cpp | 147 ++++++++++------------------------- src/query/plan/planner.hpp | 90 ++++++++++++++++++++- tests/unit/query_planner.cpp | 20 ++--- 4 files changed, 139 insertions(+), 121 deletions(-) diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 4b4371d47..a348485a7 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -39,7 +39,8 @@ void Interpret(const std::string &query, GraphDbAccessor &db_accessor, high_level_tree->Accept(symbol_generator); // high level tree -> logical plan - auto logical_plan = plan::MakeLogicalPlan(visitor.storage(), symbol_table); + auto logical_plan = plan::MakeLogicalPlan( + visitor.storage(), symbol_table); // generate frame based on symbol table max_position Frame frame(symbol_table.max_position()); diff --git a/src/query/plan/planner.cpp b/src/query/plan/planner.cpp index cb673f934..c698cb2d9 100644 --- a/src/query/plan/planner.cpp +++ b/src/query/plan/planner.cpp @@ -582,74 +582,6 @@ LogicalOperator *HandleWriteClause(Clause *clause, LogicalOperator *input_op, return nullptr; } -// Normalized representation of a pattern that needs to be matched. -struct Expansion { - // The first node in the expansion, it can be a single node. - NodeAtom *node1 = nullptr; - // Optional edge which connects the 2 nodes. - EdgeAtom *edge = nullptr; - // Optional node at the other end of an edge. If the expansion contains an - // edge, then this node is required. - NodeAtom *node2 = nullptr; -}; - -// Normalized representation of a single or multiple Match clauses. -// -// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m) -// WHERE c.prop < 42` will produce the following. -// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and `(n) -[e3]- (m)`. -// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the -// first `MATCH` and the set `{e3}` for the second. -// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the -// other obtained from `WHERE` on symbol `c`. -struct Matching { - // All expansions that need to be performed across Match clauses. - std::vector expansions; - // Symbols for edges established in match, used to ensure Cyphermorphism. - // There are multiple sets, because each Match clause determines a single set. - std::vector> edge_symbols; - // Pairs of filter expression and symbols used in them. The list should be - // filled using CollectPatternFilters function. - std::vector>> filters; -}; - -// Represents a read (+ write) part of a query. Each part ends with either: -// * RETURN clause; -// * WITH clause or -// * any of the write clauses. -// -// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the -// generated QueryPart will have `matching` generated for the `MATCH`. -// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses -// in that exact order. The pattern inside the first `MERGE` will be used to -// generate the first `merge_matching` element, and the second `MERGE` pattern -// will produce the second `merge_matching` element. This way, if someone -// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is -// in the same order as their respective `merge_matching` elements. -struct QueryPart { - // All MATCH clauses merged into one Matching. - Matching matching; - // Each OPTIONAL MATCH converted to Matching. - std::vector optional_matching; - // Matching for each MERGE clause. Since Merge is contained in - // remaining_clauses, this vector contains matching in the same order as Merge - // appears. - std::vector merge_matching; - // All the remaining clauses (without Match). - std::vector remaining_clauses; -}; - -// Context which contains variables commonly used during planning. -struct PlanningContext { - SymbolTable &symbol_table; - AstTreeStorage &ast_storage; - // bound_symbols set is used to differentiate cycles in pattern matching, so - // that the operator can be correctly initialized whether to read the symbol - // or write it. E.g. `MATCH (n) -[r]- (n)` would bind (and write) the first - // `n`, but the latter `n` would only read the already written information. - std::unordered_set bound_symbols; -}; - // Converts multiple Patterns to Expansions. Each Pattern can contain an // arbitrarily long chain of nodes and edges. The conversion to an Expansion is // done by splitting a pattern into triplets (node1, edge, node2). The triplets @@ -709,42 +641,6 @@ void AddMatching(const Match &match, const SymbolTable &symbol_table, matching); } -// Converts a Query to multiple QueryParts. In the process new Ast nodes may be -// created, e.g. filter expressions. -std::vector CollectQueryParts(const SymbolTable &symbol_table, - AstTreeStorage &storage) { - auto query = storage.query(); - std::vector query_parts(1); - auto *query_part = &query_parts.back(); - for (auto &clause : query->clauses_) { - if (auto *match = dynamic_cast(clause)) { - if (match->optional_) { - query_part->optional_matching.emplace_back(Matching{}); - AddMatching(*match, symbol_table, storage, - query_part->optional_matching.back()); - } else { - debug_assert(query_part->optional_matching.empty(), - "Match clause cannot follow optional match."); - AddMatching(*match, symbol_table, storage, query_part->matching); - } - } else { - query_part->remaining_clauses.push_back(clause); - if (auto *merge = dynamic_cast(clause)) { - query_part->merge_matching.emplace_back(Matching{}); - AddMatching({merge->pattern_}, nullptr, symbol_table, storage, - query_part->merge_matching.back()); - } else if (dynamic_cast(clause)) { - query_parts.emplace_back(QueryPart{}); - query_part = &query_parts.back(); - } else if (dynamic_cast(clause)) { - // TODO: Support RETURN UNION ... - return query_parts; - } - } - } - return query_parts; -} - LogicalOperator *PlanMatching(const Matching &matching, LogicalOperator *input_op, AstTreeStorage &storage, MatchContext &context) { @@ -854,10 +750,45 @@ auto GenMerge(query::Merge &merge, LogicalOperator *input_op, } // namespace -std::unique_ptr MakeLogicalPlan(AstTreeStorage &storage, - SymbolTable &symbol_table) { - auto query_parts = CollectQueryParts(symbol_table, storage); - PlanningContext context{symbol_table, storage}; +// Converts a Query to multiple QueryParts. In the process new Ast nodes may be +// created, e.g. filter expressions. +std::vector CollectQueryParts(const SymbolTable &symbol_table, + AstTreeStorage &storage) { + auto query = storage.query(); + std::vector query_parts(1); + auto *query_part = &query_parts.back(); + for (auto &clause : query->clauses_) { + if (auto *match = dynamic_cast(clause)) { + if (match->optional_) { + query_part->optional_matching.emplace_back(Matching{}); + AddMatching(*match, symbol_table, storage, + query_part->optional_matching.back()); + } else { + debug_assert(query_part->optional_matching.empty(), + "Match clause cannot follow optional match."); + AddMatching(*match, symbol_table, storage, query_part->matching); + } + } else { + query_part->remaining_clauses.push_back(clause); + if (auto *merge = dynamic_cast(clause)) { + query_part->merge_matching.emplace_back(Matching{}); + AddMatching({merge->pattern_}, nullptr, symbol_table, storage, + query_part->merge_matching.back()); + } else if (dynamic_cast(clause)) { + query_parts.emplace_back(QueryPart{}); + query_part = &query_parts.back(); + } else if (dynamic_cast(clause)) { + // TODO: Support RETURN UNION ... + return query_parts; + } + } + } + return query_parts; +} + +std::unique_ptr RuleBasedPlanner::Plan( + std::vector &query_parts) { + auto &context = context_; LogicalOperator *input_op = nullptr; // Set to true if a query command writes to the database. bool is_write = false; diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index e008e9855..ae93f3c37 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -11,14 +11,100 @@ class SymbolTable; namespace plan { +// Normalized representation of a pattern that needs to be matched. +struct Expansion { + // The first node in the expansion, it can be a single node. + NodeAtom *node1 = nullptr; + // Optional edge which connects the 2 nodes. + EdgeAtom *edge = nullptr; + // Optional node at the other end of an edge. If the expansion contains an + // edge, then this node is required. + NodeAtom *node2 = nullptr; +}; + +// Normalized representation of a single or multiple Match clauses. +// +// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m) +// WHERE c.prop < 42` will produce the following. +// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and `(n) -[e3]- (m)`. +// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the +// first `MATCH` and the set `{e3}` for the second. +// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the +// other obtained from `WHERE` on symbol `c`. +struct Matching { + // All expansions that need to be performed across Match clauses. + std::vector expansions; + // Symbols for edges established in match, used to ensure Cyphermorphism. + // There are multiple sets, because each Match clause determines a single set. + std::vector> edge_symbols; + // Pairs of filter expression and symbols used in them. The list should be + // filled using CollectPatternFilters function. + std::vector>> filters; +}; + +// Represents a read (+ write) part of a query. Each part ends with either: +// * RETURN clause; +// * WITH clause or +// * any of the write clauses. +// +// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the +// generated QueryPart will have `matching` generated for the `MATCH`. +// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses +// in that exact order. The pattern inside the first `MERGE` will be used to +// generate the first `merge_matching` element, and the second `MERGE` pattern +// will produce the second `merge_matching` element. This way, if someone +// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is +// in the same order as their respective `merge_matching` elements. +struct QueryPart { + // All MATCH clauses merged into one Matching. + Matching matching; + // Each OPTIONAL MATCH converted to Matching. + std::vector optional_matching; + // Matching for each MERGE clause. Since Merge is contained in + // remaining_clauses, this vector contains matching in the same order as Merge + // appears. + std::vector merge_matching; + // All the remaining clauses (without Match). + std::vector remaining_clauses; +}; + +// Context which contains variables commonly used during planning. +struct PlanningContext { + SymbolTable &symbol_table; + AstTreeStorage &ast_storage; + // bound_symbols set is used to differentiate cycles in pattern matching, so + // that the operator can be correctly initialized whether to read the symbol + // or write it. E.g. `MATCH (n) -[r]- (n)` would bind (and write) the first + // `n`, but the latter `n` would only read the already written information. + std::unordered_set bound_symbols; +}; + +class RuleBasedPlanner { + public: + RuleBasedPlanner(PlanningContext &context) : context_(context) {} + + using PlanResult = std::unique_ptr; + PlanResult Plan(std::vector &); + + private: + PlanningContext &context_; +}; + +std::vector CollectQueryParts(const SymbolTable &, AstTreeStorage &); + /// @brief Generates the LogicalOperator tree and returns the root operation. /// /// The tree is constructed by traversing the @c Query node from given /// @c AstTreeStorage. The storage may also be used to create new AST nodes for /// use in operators. @c SymbolTable is used to determine inputs and outputs of /// certain operators. -std::unique_ptr MakeLogicalPlan( - AstTreeStorage &storage, query::SymbolTable &symbol_table); +template +typename TPlanner::PlanResult MakeLogicalPlan(AstTreeStorage &storage, + SymbolTable &symbol_table) { + auto query_parts = CollectQueryParts(symbol_table, storage); + PlanningContext context{symbol_table, storage}; + return TPlanner(context).Plan(query_parts); +} } // namespace plan diff --git a/tests/unit/query_planner.cpp b/tests/unit/query_planner.cpp index 76d7cdb9d..2a00d51ce 100644 --- a/tests/unit/query_planner.cpp +++ b/tests/unit/query_planner.cpp @@ -222,7 +222,7 @@ auto CheckPlan(LogicalOperator &plan, const SymbolTable &symbol_table, template auto CheckPlan(AstTreeStorage &storage, TChecker... checker) { auto symbol_table = MakeSymbolTable(*storage.query()); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, checker...); } @@ -240,7 +240,7 @@ TEST(TestLogicalPlanner, CreateNodeReturn) { auto query = QUERY(CREATE(PATTERN(NODE("n"))), RETURN(ident_n, AS("n"))); auto symbol_table = MakeSymbolTable(*query); auto acc = ExpectAccumulate({symbol_table.at(*ident_n)}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectCreateNode(), acc, ExpectProduce()); } @@ -516,7 +516,7 @@ TEST(TestLogicalPlanner, CreateWithSum) { auto symbol_table = MakeSymbolTable(*query); auto acc = ExpectAccumulate({symbol_table.at(*n_prop->expression_)}); auto aggr = ExpectAggregate({sum}, {}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); // We expect both the accumulation and aggregation because the part before // WITH updates the database. CheckPlan(*plan, symbol_table, ExpectCreateNode(), acc, aggr, @@ -553,7 +553,7 @@ TEST(TestLogicalPlanner, CreateWithSkipReturnLimit) { RETURN(IDENT("m"), AS("m"), LIMIT(LITERAL(1)))); auto symbol_table = MakeSymbolTable(*query); auto acc = ExpectAccumulate({symbol_table.at(*ident_n)}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); // Since we have a write query, we need to have Accumulate. This is a bit // different than Neo4j 3.0, which optimizes WITH followed by RETURN as a // single RETURN clause and then moves Skip and Limit before Accumulate. This @@ -576,7 +576,7 @@ TEST(TestLogicalPlanner, CreateReturnSumSkipLimit) { auto symbol_table = MakeSymbolTable(*query); auto acc = ExpectAccumulate({symbol_table.at(*n_prop->expression_)}); auto aggr = ExpectAggregate({sum}, {}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectCreateNode(), acc, aggr, ExpectProduce(), ExpectSkip(), ExpectLimit()); } @@ -615,7 +615,7 @@ TEST(TestLogicalPlanner, CreateWithOrderByWhere) { symbol_table.at(*r_prop->expression_), // `r` in ORDER BY symbol_table.at(*m_prop->expression_), // `m` in WHERE }); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectCreateNode(), ExpectCreateExpand(), acc, ExpectProduce(), ExpectFilter(), ExpectOrderBy()); } @@ -653,7 +653,7 @@ TEST(TestLogicalPlanner, MatchMerge) { auto symbol_table = MakeSymbolTable(*query); // We expect Accumulate after Merge, because it is considered as a write. auto acc = ExpectAccumulate({symbol_table.at(*ident_n)}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectMerge(on_match, on_create), acc, ExpectProduce()); for (auto &op : on_match) delete op; @@ -710,7 +710,7 @@ TEST(TestLogicalPlanner, CreateWithDistinctSumWhereReturn) { auto symbol_table = MakeSymbolTable(*query); auto acc = ExpectAccumulate({symbol_table.at(*node_n->identifier_)}); auto aggr = ExpectAggregate({sum}, {}); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectCreateNode(), acc, aggr, ExpectProduce(), ExpectFilter(), ExpectDistinct(), ExpectProduce()); } @@ -792,7 +792,7 @@ TEST(TestLogicalPlanner, MatchReturnAsterisk) { ret->body_.all_identifiers = true; auto query = QUERY(MATCH(PATTERN(NODE("n"), EDGE("e"), NODE("m"))), ret); auto symbol_table = MakeSymbolTable(*query); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectExpand(), ExpectProduce()); std::vector output_names; @@ -814,7 +814,7 @@ TEST(TestLogicalPlanner, MatchReturnAsteriskSum) { ret->body_.all_identifiers = true; auto query = QUERY(MATCH(PATTERN(NODE("n"))), ret); auto symbol_table = MakeSymbolTable(*query); - auto plan = MakeLogicalPlan(storage, symbol_table); + auto plan = MakeLogicalPlan(storage, symbol_table); auto *produce = dynamic_cast(plan.get()); ASSERT_TRUE(produce); const auto &named_expressions = produce->named_expressions();