diff --git a/CHANGELOG.md b/CHANGELOG.md index 81b29bd0f..8099e15e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Major Features and Improvements +* Support for variable length path `MATCH`. * Support for `all` function in openCypher. * User specified transaction execution timeout. * Support for query parameters (except for parameters in place of property maps). diff --git a/docs/user_technical/open-cypher.md b/docs/user_technical/open-cypher.md index c39cb5e5f..705064b7a 100644 --- a/docs/user_technical/open-cypher.md +++ b/docs/user_technical/open-cypher.md @@ -47,9 +47,25 @@ While their friends can be found with the following. MATCH (n :Person {age: 42}) -[:FriendOf]- (friend) RETURN friend. +There are cases when a user needs to find data which is connected by +traversing a path of connections, but the user doesn't know how many +connections need to be traversed. openCypher allows for designating patterns +with *variable path lengths*. Matching such a path is achieved by using the +`*` (*asterisk*) symbol inside the pattern for a connection. For example, +traversing from `node1` to `node2` by following any number of connections in a +single direction can be achieved with: + + MATCH (node1) -[*]-> (node2) + +If paths are very long, finding them could take a long time. To prevent that, +a user can provide the minimum and maximum length of the path. For example, +paths of length between 2 and 4 can be obtained with a query like: + + MATCH (node1) -[*2..4]-> (node2) + More details on how `MATCH` works can be found [here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/match/). -Note that *variable length paths* and *named paths* are not yet supported. +Note that *named paths* are not yet supported. The `MATCH` clause can be modified by prepending the `OPTIONAL` keyword. `OPTIONAL MATCH` clause behaves the same as a regular `MATCH`, but when it diff --git a/docs/user_technical/upcoming-features.md b/docs/user_technical/upcoming-features.md index 1d1584eb0..bfd90a57a 100644 --- a/docs/user_technical/upcoming-features.md +++ b/docs/user_technical/upcoming-features.md @@ -48,26 +48,7 @@ example: MATCH path = (node1) -[connection]-> (node2) -Path naming is especially useful with another upcoming feature, *variable -length paths*. - -#### Variable Length Paths - -There are cases when a user needs to find data which is connected by -traversing a path of connections, but the user doesn't know how many -connections need to be traversed. openCypher allows for designating patterns -with variable path lengths. Matching such a path is achieved by using the `*` -(*asterisk*) symbol inside the pattern for a connection. For example, -traversing from `node1` to `node2` by following any number of connections in a -single direction can be achieved with: - - MATCH (node1) -[*]-> (node2) - -If paths are very long, finding them could take a long time. To prevent that, -a user can provide the minimum and maximum length of the path. For example, -paths of length between 2 and 4 can be obtained with a query like: - - MATCH (node1) -[*2..4]-> (node2) +Path naming is especially useful with the *variable length paths* feature. #### Functions diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 6704a05fc..f0cbffe3f 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -592,10 +592,15 @@ class ExpandVariableCursor : public Cursor { if (PullInput(frame, symbol_table)) { // if lower bound is zero we also yield empty paths if (self_.lower_bound_ && self_.lower_bound_.value() == 0) { - // take into account existing_edge when yielding empty paths auto &edges_on_frame = frame[self_.edge_symbol_].Value>(); - if (!self_.existing_edge_ || edges_on_frame.empty()) return true; + auto &start_vertex = + frame[self_.input_symbol_].Value(); + // take into account existing_edge when yielding empty paths + if ((!self_.existing_edge_ || edges_on_frame.empty()) && + // Place the start vertex on the frame. + self_.HandleExistingNode(start_vertex, frame)) + return true; } // if lower bound is not zero, we just continue, the next // loop iteration will attempt to expand and we're good diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index a7b3942c2..08318917d 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -12,16 +12,18 @@ class SymbolTable; namespace plan { -/// @brief Normalized representation of a pattern that needs to be matched. +/// Normalized representation of a pattern that needs to be matched. struct Expansion { - /// @brief The first node in the expansion, it can be a single node. + /// The first node in the expansion, it can be a single node. NodeAtom *node1 = nullptr; - /// @brief Optional edge which connects the 2 nodes. + /// Optional edge which connects the 2 nodes. EdgeAtom *edge = nullptr; - /// @brief Direction of the edge, it may be flipped compared to original + /// Direction of the edge, it may be flipped compared to original /// @c EdgeAtom during plan generation. EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH; - /// @brief Optional node at the other end of an edge. If the expansion + /// Set of symbols found inside the range expressions of a variable path edge. + std::unordered_set symbols_in_range; + /// Optional node at the other end of an edge. If the expansion /// contains an edge, then this node is required. NodeAtom *node2 = nullptr; }; diff --git a/src/query/plan/rule_based_planner.cpp b/src/query/plan/rule_based_planner.cpp index fed8abf2a..ede9f459e 100644 --- a/src/query/plan/rule_based_planner.cpp +++ b/src/query/plan/rule_based_planner.cpp @@ -622,13 +622,20 @@ LogicalOperator *HandleWriteClause(Clause *clause, LogicalOperator *input_op, // This representation makes it easier to permute from which node or edge we // want to start expanding. std::vector NormalizePatterns( - const std::vector &patterns) { + const SymbolTable &symbol_table, const std::vector &patterns) { std::vector expansions; auto ignore_node = [&](auto *node) {}; auto collect_expansion = [&](auto *prev_node, auto *edge, auto *current_node) { - expansions.emplace_back( - Expansion{prev_node, edge, edge->direction_, current_node}); + UsedSymbolsCollector collector(symbol_table); + if (edge->lower_bound_) { + edge->lower_bound_->Accept(collector); + } + if (edge->upper_bound_) { + edge->upper_bound_->Accept(collector); + } + expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, + collector.symbols_, current_node}); }; for (const auto &pattern : patterns) { if (pattern->atoms_.size() == 1U) { @@ -650,7 +657,7 @@ std::vector NormalizePatterns( void AddMatching(const std::vector &patterns, Where *where, const SymbolTable &symbol_table, AstTreeStorage &storage, Matching &matching) { - auto expansions = NormalizePatterns(patterns); + auto expansions = NormalizePatterns(symbol_table, patterns); std::unordered_set edge_symbols; for (const auto &expansion : expansions) { if (expansion.edge) { @@ -813,10 +820,6 @@ LogicalOperator *PlanMatching(const Matching &matching, } // We have an edge, so generate Expand. if (expansion.edge) { - if (expansion.edge->has_range_) { - throw utils::NotYetImplemented( - "planning variable length relationships"); - } // If the expand symbols were already bound, then we need to indicate // that they exist. The Expand will then check whether the pattern holds // instead of writing the expansion to symbols. @@ -834,10 +837,36 @@ LogicalOperator *PlanMatching(const Matching &matching, } else { context.new_symbols.emplace_back(edge_symbol); } - last_op = - new Expand(node_symbol, edge_symbol, expansion.direction, - std::shared_ptr(last_op), node1_symbol, - existing_node, existing_edge, context.graph_view); + if (expansion.edge->has_range_) { + std::experimental::optional lower_bound; + std::experimental::optional upper_bound; + auto get_bound = [](auto *bound_expr) { + auto *literal = dynamic_cast(bound_expr); + if (!literal || literal->value_.type() != TypedValue::Type::Int || + literal->value_.Value() < 0) { + throw SemanticException( + "Length of variable path must be a non-negative integer " + "literal."); + } + return literal->value_.Value(); + }; + // Default lower bound to 1 if none provided. + lower_bound = expansion.edge->lower_bound_ + ? get_bound(expansion.edge->lower_bound_) + : 1U; + if (expansion.edge->upper_bound_) { + upper_bound = get_bound(expansion.edge->upper_bound_); + } + last_op = new ExpandVariable( + node_symbol, edge_symbol, expansion.direction, lower_bound, + upper_bound, std::shared_ptr(last_op), + node1_symbol, existing_node, existing_edge, context.graph_view); + } else { + last_op = + new Expand(node_symbol, edge_symbol, expansion.direction, + std::shared_ptr(last_op), node1_symbol, + existing_node, existing_edge, context.graph_view); + } if (!existing_edge) { // Ensure Cyphermorphism (different edge symbols always map to different // edges). @@ -1023,6 +1052,10 @@ void Filters::CollectPatternFilters(Pattern &pattern, auto add_expand_filter = [&](NodeAtom *prev_node, EdgeAtom *edge, NodeAtom *node) { const auto &edge_symbol = symbol_table.at(*edge->identifier_); + if (edge->has_range_ && + (!edge->edge_types_.empty() || !edge->properties_.empty())) { + throw utils::NotYetImplemented("filtering variable length paths"); + } if (!edge->edge_types_.empty()) { all_filters_.emplace_back( storage.Create(edge->identifier_, edge->edge_types_), diff --git a/src/query/plan/variable_start_planner.cpp b/src/query/plan/variable_start_planner.cpp index 3ab7ebbd8..04b7540b9 100644 --- a/src/query/plan/variable_start_planner.cpp +++ b/src/query/plan/variable_start_planner.cpp @@ -44,19 +44,44 @@ class NodeSymbolEqual { }; // Finds the next Expansion which has one of its nodes among the already -// expanded nodes. The function may modify expansions, by flipping their nodes +// expanded symbols. The function may modify expansions, by flipping their nodes // and direction. This is done, so that the return iterator always points to the // expansion whose node1 is the already expanded one, while node2 may not be. -auto NextExpansion(const std::unordered_set &expanded_nodes, +auto NextExpansion(const SymbolTable &symbol_table, + const std::unordered_set &expanded_symbols, + const std::unordered_set &all_expansion_symbols, std::vector &expansions) { + // Returns true if the expansion is a regular expand or if it is a variable + // path expand, but with bound symbols used inside the range expression. + auto can_expand = [&](auto &expansion) { + for (const auto &range_symbol : expansion.symbols_in_range) { + // If the symbols used in range need to be bound during this whole + // expansion, we must check whether they have already been expanded and + // therefore bound. If the symbols are not found in the whole expansion, + // then the semantic analysis should guarantee that the symbols have been + // bound long before we expand. + if (all_expansion_symbols.find(range_symbol) != + all_expansion_symbols.end() && + expanded_symbols.find(range_symbol) == expanded_symbols.end()) { + return false; + } + } + return true; + }; auto expansion_it = expansions.begin(); for (; expansion_it != expansions.end(); ++expansion_it) { - if (expanded_nodes.find(expansion_it->node1) != expanded_nodes.end()) { + if (!can_expand(*expansion_it)) { + continue; + } + const auto &node1_symbol = + symbol_table.at(*expansion_it->node1->identifier_); + if (expanded_symbols.find(node1_symbol) != expanded_symbols.end()) { return expansion_it; } auto *node2 = expansion_it->node2; - if (node2 && expanded_nodes.find(node2) != expanded_nodes.end()) { + if (node2 && + expanded_symbols.find(symbol_table.at(*node2->identifier_)) != + expanded_symbols.end()) { // We need to flip the expansion, since we want to expand from node2. std::swap(expansion_it->node2, expansion_it->node1); if (expansion_it->direction != EdgeAtom::Direction::BOTH) { @@ -79,19 +104,30 @@ std::vector ExpansionsFrom( const NodeAtom *start_node, std::vector original_expansions, const SymbolTable &symbol_table) { std::vector expansions; - std::unordered_set - expanded_nodes({start_node}, original_expansions.size(), - NodeSymbolHash(symbol_table), - NodeSymbolEqual(symbol_table)); + std::unordered_set expanded_symbols( + {symbol_table.at(*start_node->identifier_)}); + std::unordered_set all_expansion_symbols; + for (const auto &expansion : original_expansions) { + all_expansion_symbols.insert( + symbol_table.at(*expansion.node1->identifier_)); + if (expansion.edge) { + all_expansion_symbols.insert( + symbol_table.at(*expansion.edge->identifier_)); + all_expansion_symbols.insert( + symbol_table.at(*expansion.node2->identifier_)); + } + } while (!original_expansions.empty()) { - auto next_it = NextExpansion(expanded_nodes, original_expansions); + auto next_it = NextExpansion(symbol_table, expanded_symbols, + all_expansion_symbols, original_expansions); if (next_it == original_expansions.end()) { // Pick a new starting expansion, since we cannot continue the chain. next_it = original_expansions.begin(); } - expanded_nodes.insert(next_it->node1); + expanded_symbols.insert(symbol_table.at(*next_it->node1->identifier_)); if (next_it->node2) { - expanded_nodes.insert(next_it->node2); + expanded_symbols.insert(symbol_table.at(*next_it->edge->identifier_)); + expanded_symbols.insert(symbol_table.at(*next_it->node2->identifier_)); } expansions.emplace_back(*next_it); original_expansions.erase(next_it); @@ -271,7 +307,7 @@ auto VaryMultiMatchingStarts(const std::vector &matchings, // Produces alternative query parts out of a single part by varying how each // graph matching is done. -std::vector VaryQuertPartMatching(const QueryPart &query_part, +std::vector VaryQueryPartMatching(const QueryPart &query_part, const SymbolTable &symbol_table) { std::vector variants; // Get multiple regular matchings, each starting from different node. @@ -333,7 +369,7 @@ auto VaryQueryMatching(const std::vector &query_parts, std::vector> alternative_query_parts; for (const auto &query_part : query_parts) { alternative_query_parts.emplace_back( - VaryQuertPartMatching(query_part, symbol_table)); + VaryQueryPartMatching(query_part, symbol_table)); } return iter::slice( CartesianProduct(std::move(alternative_query_parts)), 0UL, diff --git a/tests/qa/tck_engine/tests/memgraph_V1/features/match.feature b/tests/qa/tck_engine/tests/memgraph_V1/features/match.feature index 4713fdcba..8bd497802 100644 --- a/tests/qa/tck_engine/tests/memgraph_V1/features/match.feature +++ b/tests/qa/tck_engine/tests/memgraph_V1/features/match.feature @@ -406,3 +406,54 @@ Feature: Match | 1 | | 3 | | 4 | + + Scenario: Test match unbounded variable path + Given an empty graph + And having executed: + """ + CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3}) + """ + When executing query: + """ + MATCH (n) -[r*]-> (m) RETURN n.a, m.a + """ + Then the result should be: + | n.a | m.a | + | 1 | 2 | + | 1 | 3 | + | 2 | 3 | + + Scenario: Test match 0 length variable path + Given an empty graph + And having executed: + """ + CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3}) + """ + When executing query: + """ + MATCH (n) -[r*0]-> (m) RETURN n.a, m.a + """ + Then the result should be: + | n.a | m.a | + | 1 | 1 | + | 2 | 2 | + | 3 | 3 | + + Scenario: Test match bounded variable path + Given an empty graph + And having executed: + """ + CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3}) + """ + When executing query: + """ + MATCH (n) -[r*0..1]-> (m) RETURN n.a, m.a + """ + Then the result should be: + | n.a | m.a | + | 1 | 1 | + | 1 | 2 | + | 2 | 2 | + | 2 | 3 | + | 3 | 3 | + diff --git a/tests/unit/query_planner.cpp b/tests/unit/query_planner.cpp index 1046c1a24..61439f53c 100644 --- a/tests/unit/query_planner.cpp +++ b/tests/unit/query_planner.cpp @@ -54,6 +54,7 @@ class PlanChecker : public HierarchicalLogicalOperatorVisitor { PRE_VISIT(ScanAllByLabelPropertyValue); PRE_VISIT(ScanAllByLabelPropertyRange); PRE_VISIT(Expand); + PRE_VISIT(ExpandVariable); PRE_VISIT(Filter); PRE_VISIT(Produce); PRE_VISIT(SetProperty); @@ -122,6 +123,7 @@ using ExpectDelete = OpChecker; using ExpectScanAll = OpChecker; using ExpectScanAllByLabel = OpChecker; using ExpectExpand = OpChecker; +using ExpectExpandVariable = OpChecker; using ExpectFilter = OpChecker; using ExpectProduce = OpChecker; using ExpectSetProperty = OpChecker; @@ -1215,4 +1217,23 @@ TEST(TestLogicalPlanner, ReturnSumGroupByAll) { CheckPlan(storage, aggr, ExpectProduce()); } +TEST(TestLogicalPlanner, MatchExpandVariable) { + // Test MATCH (n) -[r *..3]-> (m) RETURN r + AstTreeStorage storage; + auto edge = EDGE("r"); + edge->has_range_ = true; + edge->upper_bound_ = LITERAL(3); + QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("r")); + CheckPlan(storage, ExpectScanAll(), ExpectExpandVariable(), ExpectProduce()); +} + +TEST(TestLogicalPlanner, MatchExpandVariableNoBounds) { + // Test MATCH (n) -[r *]-> (m) RETURN r + AstTreeStorage storage; + auto edge = EDGE("r"); + edge->has_range_ = true; + QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("r")); + CheckPlan(storage, ExpectScanAll(), ExpectExpandVariable(), ExpectProduce()); +} + } // namespace