From b33b6715055def7f2923f893420c96f07364ac1c Mon Sep 17 00:00:00 2001 From: Teon Banek Date: Tue, 4 Apr 2017 11:18:50 +0200 Subject: [PATCH] Plan multiple MATCH clauses and multiple patterns Reviewers: mislav.bradac, florijan Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D218 --- src/query/frontend/logical/operator.hpp | 3 +- src/query/frontend/logical/planner.cpp | 51 ++++--- tests/unit/query_planner.cpp | 185 +++++++++++++++++------- 3 files changed, 165 insertions(+), 74 deletions(-) diff --git a/src/query/frontend/logical/operator.hpp b/src/query/frontend/logical/operator.hpp index 3152c1e38..d33c5f923 100644 --- a/src/query/frontend/logical/operator.hpp +++ b/src/query/frontend/logical/operator.hpp @@ -60,7 +60,8 @@ using LogicalOperatorVisitor = ::utils::Visitor, ExpandUniquenessFilter>; + ExpandUniquenessFilter, + ExpandUniquenessFilter>; /** @brief Base class for logical operators. * diff --git a/src/query/frontend/logical/planner.cpp b/src/query/frontend/logical/planner.cpp index 543bec97e..9d08892a3 100644 --- a/src/query/frontend/logical/planner.cpp +++ b/src/query/frontend/logical/planner.cpp @@ -100,20 +100,19 @@ auto GenCreate(Create &create, LogicalOperator *input_op, return last_op; } -auto GenMatch(Match &match, LogicalOperator *input_op, - const query::SymbolTable &symbol_table, - std::unordered_set &bound_symbols) { +auto GenMatchForPattern(Pattern &pattern, LogicalOperator *input_op, + const query::SymbolTable &symbol_table, + std::unordered_set &bound_symbols, + std::vector &edge_symbols) { auto base = [&](NodeAtom *node) { - if (input_op) { - // TODO: Support clauses before match. - throw NotYetImplemented(); + LogicalOperator *last_op = input_op; + // If the first atom binds a symbol, we generate a ScanAll which writes it. + // Otherwise, someone else generates it (e.g. a previous ScanAll). + if (BindSymbol(bound_symbols, symbol_table.at(*node->identifier_))) { + last_op = new ScanAll(node, std::shared_ptr(last_op)); } - // First atom always binds a symbol, and we don't care if it already - // existed, - // because we create a ScanAll which writes that symbol. This may need to - // change when we support clauses before match. - BindSymbol(bound_symbols, symbol_table.at(*node->identifier_)); - LogicalOperator *last_op = new ScanAll(node); + // Even though we may skip generating ScanAll, we still want to add a filter + // in case this atom adds more labels/properties for filtering. if (!node->labels_.empty() || !node->properties_.empty()) { last_op = new NodeFilter(std::shared_ptr(last_op), symbol_table.at(*node->identifier_), node); @@ -132,11 +131,22 @@ auto GenMatch(Match &match, LogicalOperator *input_op, if (!BindSymbol(bound_symbols, symbol_table.at(*node->identifier_))) { node_cycle = true; } - if (!BindSymbol(bound_symbols, symbol_table.at(*edge->identifier_))) { + const auto &edge_symbol = symbol_table.at(*edge->identifier_); + if (!BindSymbol(bound_symbols, edge_symbol)) { edge_cycle = true; } last_op = new Expand(node, edge, std::shared_ptr(last_op), input_symbol, node_cycle, edge_cycle); + if (!edge_cycle) { + // Ensure Cyphermorphism (different edge symbols always map to different + // edges). + if (!edge_symbols.empty()) { + last_op = new ExpandUniquenessFilter( + std::shared_ptr(last_op), edge_symbol, + edge_symbols); + } + edge_symbols.emplace_back(edge_symbol); + } if (!edge->edge_types_.empty() || !edge->properties_.empty()) { last_op = new EdgeFilter(std::shared_ptr(last_op), symbol_table.at(*edge->identifier_), edge); @@ -147,13 +157,18 @@ auto GenMatch(Match &match, LogicalOperator *input_op, } return last_op; }; + return ReducePattern(pattern, base, collect); +} - if (match.patterns_.size() != 1) { - // TODO: Support matching multiple patterns. - throw NotYetImplemented(); +auto GenMatch(Match &match, LogicalOperator *input_op, + const query::SymbolTable &symbol_table, + std::unordered_set &bound_symbols) { + auto last_op = input_op; + std::vector edge_symbols; + for (auto pattern : match.patterns_) { + last_op = GenMatchForPattern(*pattern, last_op, symbol_table, bound_symbols, + edge_symbols); } - auto last_op = - ReducePattern(*match.patterns_[0], base, collect); if (match.where_) { last_op = new Filter(std::shared_ptr(last_op), match.where_->expression_); diff --git a/tests/unit/query_planner.cpp b/tests/unit/query_planner.cpp index d2d4218d3..fdb118e2d 100644 --- a/tests/unit/query_planner.cpp +++ b/tests/unit/query_planner.cpp @@ -3,6 +3,7 @@ #include "gtest/gtest.h" +#include "dbms/dbms.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/logical/operator.hpp" #include "query/frontend/logical/planner.hpp" @@ -24,7 +25,7 @@ class PlanChecker : public LogicalOperatorVisitor { using LogicalOperatorVisitor::Visit; using LogicalOperatorVisitor::PostVisit; - PlanChecker(std::list types) : types_(types) {} + PlanChecker(const std::list &types) : types_(types) {} void Visit(CreateNode &op) override { AssertType(op); } void Visit(CreateExpand &op) override { AssertType(op); } @@ -40,6 +41,14 @@ class PlanChecker : public LogicalOperatorVisitor { void Visit(SetLabels &op) override { AssertType(op); } void Visit(RemoveProperty &op) override { AssertType(op); } void Visit(RemoveLabels &op) override { AssertType(op); } + void Visit(ExpandUniquenessFilter &op) override { + AssertType(op); + } + void Visit(ExpandUniquenessFilter &op) override { + AssertType(op); + } + + std::list types_; private: void AssertType(const LogicalOperator &op) { @@ -47,140 +56,206 @@ class PlanChecker : public LogicalOperatorVisitor { ASSERT_EQ(types_.back(), typeid(op).hash_code()); types_.pop_back(); } - std::list types_; }; -auto CheckPlan(query::Query &query, std::list expected_types) { +template +auto CheckPlan(query::Query &query) { SymbolTable symbol_table; SymbolGenerator symbol_generator(symbol_table); query.Accept(symbol_generator); auto plan = MakeLogicalPlan(query, symbol_table); - PlanChecker plan_checker(expected_types); + std::list type_hashes{typeid(TOps).hash_code()...}; + PlanChecker plan_checker(type_hashes); plan->Accept(plan_checker); + EXPECT_TRUE(plan_checker.types_.empty()); } TEST(TestLogicalPlanner, MatchNodeReturn) { // Test MATCH (n) RETURN n AS n AstTreeStorage storage; auto query = QUERY(MATCH(PATTERN(NODE("n"))), RETURN(IDENT("n"), AS("n"))); - CheckPlan(*query, {typeid(ScanAll).hash_code(), typeid(Produce).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, CreateNodeReturn) { // Test CREATE (n) RETURN n AS n AstTreeStorage storage; auto query = QUERY(CREATE(PATTERN(NODE("n"))), RETURN(IDENT("n"), AS("n"))); - CheckPlan(*query, - {typeid(CreateNode).hash_code(), typeid(Produce).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, CreateExpand) { // Test CREATE (n) -[r :rel1]-> (m) AstTreeStorage storage; - std::string relationship("relationship"); + Dbms dbms; + auto dba = dbms.active(); + auto relationship = dba->edge_type("relationship"); auto query = QUERY(CREATE(PATTERN( - NODE("n"), EDGE("r", &relationship, Direction::RIGHT), NODE("m")))); - CheckPlan(*query, - {typeid(CreateNode).hash_code(), typeid(CreateExpand).hash_code()}); + NODE("n"), EDGE("r", relationship, Direction::RIGHT), NODE("m")))); + CheckPlan(*query); } TEST(TestLogicalPlanner, CreateMultipleNode) { // Test CREATE (n), (m) AstTreeStorage storage; auto query = QUERY(CREATE(PATTERN(NODE("n")), PATTERN(NODE("m")))); - CheckPlan(*query, - {typeid(CreateNode).hash_code(), typeid(CreateNode).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, CreateNodeExpandNode) { // Test CREATE (n) -[r :rel]-> (m), (l) AstTreeStorage storage; - std::string relationship("rel"); + Dbms dbms; + auto dba = dbms.active(); + auto relationship = dba->edge_type("rel"); auto query = QUERY(CREATE( - PATTERN(NODE("n"), EDGE("r", &relationship, Direction::RIGHT), NODE("m")), + PATTERN(NODE("n"), EDGE("r", relationship, Direction::RIGHT), NODE("m")), PATTERN(NODE("l")))); - CheckPlan(*query, - {typeid(CreateNode).hash_code(), typeid(CreateExpand).hash_code(), - typeid(CreateNode).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchCreateExpand) { // Test MATCH (n) CREATE (n) -[r :rel1]-> (m) AstTreeStorage storage; - std::string relationship("relationship"); - auto query = QUERY( - MATCH(PATTERN(NODE("n"))), - CREATE(PATTERN(NODE("n"), EDGE("r", &relationship, Direction::RIGHT), - NODE("m")))); - CheckPlan(*query, - {typeid(ScanAll).hash_code(), typeid(CreateExpand).hash_code()}); + Dbms dbms; + auto dba = dbms.active(); + auto relationship = dba->edge_type("relationship"); + auto query = + QUERY(MATCH(PATTERN(NODE("n"))), + CREATE(PATTERN(NODE("n"), EDGE("r", relationship, Direction::RIGHT), + NODE("m")))); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchLabeledNodes) { // Test MATCH (n :label) RETURN n AS n AstTreeStorage storage; - std::string label("label"); + Dbms dbms; + auto dba = dbms.active(); + auto label = dba->label("label"); auto query = - QUERY(MATCH(PATTERN(NODE("n", &label))), RETURN(IDENT("n"), AS("n"))); - CheckPlan(*query, - {typeid(ScanAll).hash_code(), typeid(NodeFilter).hash_code(), - typeid(Produce).hash_code()}); + QUERY(MATCH(PATTERN(NODE("n", label))), RETURN(IDENT("n"), AS("n"))); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchPathReturn) { // Test MATCH (n) -[r :relationship]- (m) RETURN n AS n AstTreeStorage storage; - std::string relationship("relationship"); + Dbms dbms; + auto dba = dbms.active(); + auto relationship = dba->edge_type("relationship"); auto query = - QUERY(MATCH(PATTERN(NODE("n"), EDGE("r", &relationship), NODE("m"))), + QUERY(MATCH(PATTERN(NODE("n"), EDGE("r", relationship), NODE("m"))), RETURN(IDENT("n"), AS("n"))); - CheckPlan(*query, - {typeid(ScanAll).hash_code(), typeid(Expand).hash_code(), - typeid(EdgeFilter).hash_code(), typeid(Produce).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchWhereReturn) { // Test MATCH (n) WHERE n.property < 42 RETURN n AS n AstTreeStorage storage; - std::string property("property"); + Dbms dbms; + auto dba = dbms.active(); + auto property = dba->property("property"); auto query = QUERY(MATCH(PATTERN(NODE("n"))), - WHERE(LESS(PROPERTY_LOOKUP("n", &property), LITERAL(42))), + WHERE(LESS(PROPERTY_LOOKUP("n", property), LITERAL(42))), RETURN(IDENT("n"), AS("n"))); - CheckPlan(*query, {typeid(ScanAll).hash_code(), typeid(Filter).hash_code(), - typeid(Produce).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchDelete) { // Test MATCH (n) DELETE n AstTreeStorage storage; auto query = QUERY(MATCH(PATTERN(NODE("n"))), DELETE(IDENT("n"))); - CheckPlan(*query, {typeid(ScanAll).hash_code(), typeid(Delete).hash_code()}); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchNodeSet) { // Test MATCH (n) SET n.prop = 42, n = n, n :label AstTreeStorage storage; - std::string prop("prop"); - std::string label("label"); + Dbms dbms; + auto dba = dbms.active(); + auto prop = dba->property("prop"); + auto label = dba->label("label"); auto query = QUERY(MATCH(PATTERN(NODE("n"))), - SET(PROPERTY_LOOKUP("n", &prop), LITERAL(42)), - SET("n", IDENT("n")), SET("n", {&label})); - CheckPlan(*query, - {typeid(ScanAll).hash_code(), typeid(SetProperty).hash_code(), - typeid(SetProperties).hash_code(), typeid(SetLabels).hash_code()}); + SET(PROPERTY_LOOKUP("n", prop), LITERAL(42)), + SET("n", IDENT("n")), SET("n", {label})); + CheckPlan(*query); } TEST(TestLogicalPlanner, MatchRemove) { // Test MATCH (n) REMOVE n.prop REMOVE n :label AstTreeStorage storage; - std::string prop("prop"); - std::string label("label"); - auto query = - QUERY(MATCH(PATTERN(NODE("n"))), REMOVE(PROPERTY_LOOKUP("n", &prop)), - REMOVE("n", {&label})); - CheckPlan(*query, - {typeid(ScanAll).hash_code(), typeid(RemoveProperty).hash_code(), - typeid(RemoveLabels).hash_code()}); + Dbms dbms; + auto dba = dbms.active(); + auto prop = dba->property("prop"); + auto label = dba->label("label"); + auto query = QUERY(MATCH(PATTERN(NODE("n"))), + REMOVE(PROPERTY_LOOKUP("n", prop)), REMOVE("n", {label})); + CheckPlan(*query); } +TEST(TestLogicalPlanner, MatchMultiPattern) { + // Test MATCH (n) -[r]- (m), (j) -[e]- (i) + AstTreeStorage storage; + auto query = QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m")), + PATTERN(NODE("j"), EDGE("e"), NODE("i")))); + // We expect the expansions after the first to have a uniqueness filter in a + // single MATCH clause. + CheckPlan>(*query); } + +TEST(TestLogicalPlanner, MatchMultiPatternSameStart) { + // Test MATCH (n), (n) -[e]- (m) + AstTreeStorage storage; + auto query = QUERY( + MATCH(PATTERN(NODE("n")), PATTERN(NODE("n"), EDGE("e"), NODE("m")))); + // We expect the second pattern to generate only an Expand, since another + // ScanAll would be redundant. + CheckPlan(*query); +} + +TEST(TestLogicalPlanner, MatchMultiPatternSameExpandStart) { + // Test MATCH (n) -[r]- (m), (m) -[e]- (l) + AstTreeStorage storage; + auto query = QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m")), + PATTERN(NODE("m"), EDGE("e"), NODE("l")))); + // We expect the second pattern to generate only an Expand. Another + // ScanAll would be redundant, as it would generate the nodes obtained from + // expansion. Additionally, a uniqueness filter is expected. + CheckPlan>( + *query); +} + +TEST(TestLogicalPlanner, MultiMatch) { + // Test MATCH (n) -[r]- (m) MATCH (j) -[e]- (i) -[f]- (h) + AstTreeStorage storage; + auto query = QUERY( + MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m"))), + MATCH(PATTERN(NODE("j"), EDGE("e"), NODE("i"), EDGE("f"), NODE("h")))); + // Multiple MATCH clauses form a Cartesian product, so the uniqueness should + // not cross MATCH boundaries. + CheckPlan>(*query); +} + +TEST(TestLogicalPlanner, MultiMatchSameStart) { + // Test MATCH (n) MATCH (n) -[r]- (m) + AstTreeStorage storage; + auto query = QUERY(MATCH(PATTERN(NODE("n"))), + MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m")))); + // Similar to MatchMultiPatternSameStart, we expect only Expand from second + // MATCH clause. + CheckPlan(*query); +} + +TEST(TestLogicalPlanner, MatchEdgeCycle) { + // Test MATCH (n) -[r]- (m) -[r]- (j) + AstTreeStorage storage; + auto query = QUERY( + MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m"), EDGE("r"), NODE("j")))); + // There is no ExpandUniquenessFilter for referencing the same edge. + CheckPlan(*query); +} + +} // namespace