Plan variable length expand
Summary: Handle reordering expansions with variable path length Reviewers: florijan, mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D579
This commit is contained in:
parent
b33aae42ab
commit
9ae1a9a585
@ -4,6 +4,7 @@
|
||||
|
||||
### Major Features and Improvements
|
||||
|
||||
* Support for variable length path `MATCH`.
|
||||
* Support for `all` function in openCypher.
|
||||
* User specified transaction execution timeout.
|
||||
* Support for query parameters (except for parameters in place of property maps).
|
||||
|
@ -47,9 +47,25 @@ While their friends can be found with the following.
|
||||
|
||||
MATCH (n :Person {age: 42}) -[:FriendOf]- (friend) RETURN friend.
|
||||
|
||||
There are cases when a user needs to find data which is connected by
|
||||
traversing a path of connections, but the user doesn't know how many
|
||||
connections need to be traversed. openCypher allows for designating patterns
|
||||
with *variable path lengths*. Matching such a path is achieved by using the
|
||||
`*` (*asterisk*) symbol inside the pattern for a connection. For example,
|
||||
traversing from `node1` to `node2` by following any number of connections in a
|
||||
single direction can be achieved with:
|
||||
|
||||
MATCH (node1) -[*]-> (node2)
|
||||
|
||||
If paths are very long, finding them could take a long time. To prevent that,
|
||||
a user can provide the minimum and maximum length of the path. For example,
|
||||
paths of length between 2 and 4 can be obtained with a query like:
|
||||
|
||||
MATCH (node1) -[*2..4]-> (node2)
|
||||
|
||||
More details on how `MATCH` works can be found
|
||||
[here](https://neo4j.com/docs/developer-manual/current/cypher/clauses/match/).
|
||||
Note that *variable length paths* and *named paths* are not yet supported.
|
||||
Note that *named paths* are not yet supported.
|
||||
|
||||
The `MATCH` clause can be modified by prepending the `OPTIONAL` keyword.
|
||||
`OPTIONAL MATCH` clause behaves the same as a regular `MATCH`, but when it
|
||||
|
@ -48,26 +48,7 @@ example:
|
||||
|
||||
MATCH path = (node1) -[connection]-> (node2)
|
||||
|
||||
Path naming is especially useful with another upcoming feature, *variable
|
||||
length paths*.
|
||||
|
||||
#### Variable Length Paths
|
||||
|
||||
There are cases when a user needs to find data which is connected by
|
||||
traversing a path of connections, but the user doesn't know how many
|
||||
connections need to be traversed. openCypher allows for designating patterns
|
||||
with variable path lengths. Matching such a path is achieved by using the `*`
|
||||
(*asterisk*) symbol inside the pattern for a connection. For example,
|
||||
traversing from `node1` to `node2` by following any number of connections in a
|
||||
single direction can be achieved with:
|
||||
|
||||
MATCH (node1) -[*]-> (node2)
|
||||
|
||||
If paths are very long, finding them could take a long time. To prevent that,
|
||||
a user can provide the minimum and maximum length of the path. For example,
|
||||
paths of length between 2 and 4 can be obtained with a query like:
|
||||
|
||||
MATCH (node1) -[*2..4]-> (node2)
|
||||
Path naming is especially useful with the *variable length paths* feature.
|
||||
|
||||
#### Functions
|
||||
|
||||
|
@ -592,10 +592,15 @@ class ExpandVariableCursor : public Cursor {
|
||||
if (PullInput(frame, symbol_table)) {
|
||||
// if lower bound is zero we also yield empty paths
|
||||
if (self_.lower_bound_ && self_.lower_bound_.value() == 0) {
|
||||
// take into account existing_edge when yielding empty paths
|
||||
auto &edges_on_frame =
|
||||
frame[self_.edge_symbol_].Value<std::vector<TypedValue>>();
|
||||
if (!self_.existing_edge_ || edges_on_frame.empty()) return true;
|
||||
auto &start_vertex =
|
||||
frame[self_.input_symbol_].Value<VertexAccessor>();
|
||||
// take into account existing_edge when yielding empty paths
|
||||
if ((!self_.existing_edge_ || edges_on_frame.empty()) &&
|
||||
// Place the start vertex on the frame.
|
||||
self_.HandleExistingNode(start_vertex, frame))
|
||||
return true;
|
||||
}
|
||||
// if lower bound is not zero, we just continue, the next
|
||||
// loop iteration will attempt to expand and we're good
|
||||
|
@ -12,16 +12,18 @@ class SymbolTable;
|
||||
|
||||
namespace plan {
|
||||
|
||||
/// @brief Normalized representation of a pattern that needs to be matched.
|
||||
/// Normalized representation of a pattern that needs to be matched.
|
||||
struct Expansion {
|
||||
/// @brief The first node in the expansion, it can be a single node.
|
||||
/// The first node in the expansion, it can be a single node.
|
||||
NodeAtom *node1 = nullptr;
|
||||
/// @brief Optional edge which connects the 2 nodes.
|
||||
/// Optional edge which connects the 2 nodes.
|
||||
EdgeAtom *edge = nullptr;
|
||||
/// @brief Direction of the edge, it may be flipped compared to original
|
||||
/// Direction of the edge, it may be flipped compared to original
|
||||
/// @c EdgeAtom during plan generation.
|
||||
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
|
||||
/// @brief Optional node at the other end of an edge. If the expansion
|
||||
/// Set of symbols found inside the range expressions of a variable path edge.
|
||||
std::unordered_set<Symbol> symbols_in_range;
|
||||
/// Optional node at the other end of an edge. If the expansion
|
||||
/// contains an edge, then this node is required.
|
||||
NodeAtom *node2 = nullptr;
|
||||
};
|
||||
|
@ -622,13 +622,20 @@ LogicalOperator *HandleWriteClause(Clause *clause, LogicalOperator *input_op,
|
||||
// This representation makes it easier to permute from which node or edge we
|
||||
// want to start expanding.
|
||||
std::vector<Expansion> NormalizePatterns(
|
||||
const std::vector<Pattern *> &patterns) {
|
||||
const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
|
||||
std::vector<Expansion> expansions;
|
||||
auto ignore_node = [&](auto *node) {};
|
||||
auto collect_expansion = [&](auto *prev_node, auto *edge,
|
||||
auto *current_node) {
|
||||
expansions.emplace_back(
|
||||
Expansion{prev_node, edge, edge->direction_, current_node});
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
if (edge->lower_bound_) {
|
||||
edge->lower_bound_->Accept(collector);
|
||||
}
|
||||
if (edge->upper_bound_) {
|
||||
edge->upper_bound_->Accept(collector);
|
||||
}
|
||||
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_,
|
||||
collector.symbols_, current_node});
|
||||
};
|
||||
for (const auto &pattern : patterns) {
|
||||
if (pattern->atoms_.size() == 1U) {
|
||||
@ -650,7 +657,7 @@ std::vector<Expansion> NormalizePatterns(
|
||||
void AddMatching(const std::vector<Pattern *> &patterns, Where *where,
|
||||
const SymbolTable &symbol_table, AstTreeStorage &storage,
|
||||
Matching &matching) {
|
||||
auto expansions = NormalizePatterns(patterns);
|
||||
auto expansions = NormalizePatterns(symbol_table, patterns);
|
||||
std::unordered_set<Symbol> edge_symbols;
|
||||
for (const auto &expansion : expansions) {
|
||||
if (expansion.edge) {
|
||||
@ -813,10 +820,6 @@ LogicalOperator *PlanMatching(const Matching &matching,
|
||||
}
|
||||
// We have an edge, so generate Expand.
|
||||
if (expansion.edge) {
|
||||
if (expansion.edge->has_range_) {
|
||||
throw utils::NotYetImplemented(
|
||||
"planning variable length relationships");
|
||||
}
|
||||
// If the expand symbols were already bound, then we need to indicate
|
||||
// that they exist. The Expand will then check whether the pattern holds
|
||||
// instead of writing the expansion to symbols.
|
||||
@ -834,10 +837,36 @@ LogicalOperator *PlanMatching(const Matching &matching,
|
||||
} else {
|
||||
context.new_symbols.emplace_back(edge_symbol);
|
||||
}
|
||||
last_op =
|
||||
new Expand(node_symbol, edge_symbol, expansion.direction,
|
||||
std::shared_ptr<LogicalOperator>(last_op), node1_symbol,
|
||||
existing_node, existing_edge, context.graph_view);
|
||||
if (expansion.edge->has_range_) {
|
||||
std::experimental::optional<size_t> lower_bound;
|
||||
std::experimental::optional<size_t> upper_bound;
|
||||
auto get_bound = [](auto *bound_expr) {
|
||||
auto *literal = dynamic_cast<PrimitiveLiteral *>(bound_expr);
|
||||
if (!literal || literal->value_.type() != TypedValue::Type::Int ||
|
||||
literal->value_.Value<int64_t>() < 0) {
|
||||
throw SemanticException(
|
||||
"Length of variable path must be a non-negative integer "
|
||||
"literal.");
|
||||
}
|
||||
return literal->value_.Value<int64_t>();
|
||||
};
|
||||
// Default lower bound to 1 if none provided.
|
||||
lower_bound = expansion.edge->lower_bound_
|
||||
? get_bound(expansion.edge->lower_bound_)
|
||||
: 1U;
|
||||
if (expansion.edge->upper_bound_) {
|
||||
upper_bound = get_bound(expansion.edge->upper_bound_);
|
||||
}
|
||||
last_op = new ExpandVariable(
|
||||
node_symbol, edge_symbol, expansion.direction, lower_bound,
|
||||
upper_bound, std::shared_ptr<LogicalOperator>(last_op),
|
||||
node1_symbol, existing_node, existing_edge, context.graph_view);
|
||||
} else {
|
||||
last_op =
|
||||
new Expand(node_symbol, edge_symbol, expansion.direction,
|
||||
std::shared_ptr<LogicalOperator>(last_op), node1_symbol,
|
||||
existing_node, existing_edge, context.graph_view);
|
||||
}
|
||||
if (!existing_edge) {
|
||||
// Ensure Cyphermorphism (different edge symbols always map to different
|
||||
// edges).
|
||||
@ -1023,6 +1052,10 @@ void Filters::CollectPatternFilters(Pattern &pattern,
|
||||
auto add_expand_filter = [&](NodeAtom *prev_node, EdgeAtom *edge,
|
||||
NodeAtom *node) {
|
||||
const auto &edge_symbol = symbol_table.at(*edge->identifier_);
|
||||
if (edge->has_range_ &&
|
||||
(!edge->edge_types_.empty() || !edge->properties_.empty())) {
|
||||
throw utils::NotYetImplemented("filtering variable length paths");
|
||||
}
|
||||
if (!edge->edge_types_.empty()) {
|
||||
all_filters_.emplace_back(
|
||||
storage.Create<EdgeTypeTest>(edge->identifier_, edge->edge_types_),
|
||||
|
@ -44,19 +44,44 @@ class NodeSymbolEqual {
|
||||
};
|
||||
|
||||
// Finds the next Expansion which has one of its nodes among the already
|
||||
// expanded nodes. The function may modify expansions, by flipping their nodes
|
||||
// expanded symbols. The function may modify expansions, by flipping their nodes
|
||||
// and direction. This is done, so that the return iterator always points to the
|
||||
// expansion whose node1 is the already expanded one, while node2 may not be.
|
||||
auto NextExpansion(const std::unordered_set<const NodeAtom *, NodeSymbolHash,
|
||||
NodeSymbolEqual> &expanded_nodes,
|
||||
auto NextExpansion(const SymbolTable &symbol_table,
|
||||
const std::unordered_set<Symbol> &expanded_symbols,
|
||||
const std::unordered_set<Symbol> &all_expansion_symbols,
|
||||
std::vector<Expansion> &expansions) {
|
||||
// Returns true if the expansion is a regular expand or if it is a variable
|
||||
// path expand, but with bound symbols used inside the range expression.
|
||||
auto can_expand = [&](auto &expansion) {
|
||||
for (const auto &range_symbol : expansion.symbols_in_range) {
|
||||
// If the symbols used in range need to be bound during this whole
|
||||
// expansion, we must check whether they have already been expanded and
|
||||
// therefore bound. If the symbols are not found in the whole expansion,
|
||||
// then the semantic analysis should guarantee that the symbols have been
|
||||
// bound long before we expand.
|
||||
if (all_expansion_symbols.find(range_symbol) !=
|
||||
all_expansion_symbols.end() &&
|
||||
expanded_symbols.find(range_symbol) == expanded_symbols.end()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto expansion_it = expansions.begin();
|
||||
for (; expansion_it != expansions.end(); ++expansion_it) {
|
||||
if (expanded_nodes.find(expansion_it->node1) != expanded_nodes.end()) {
|
||||
if (!can_expand(*expansion_it)) {
|
||||
continue;
|
||||
}
|
||||
const auto &node1_symbol =
|
||||
symbol_table.at(*expansion_it->node1->identifier_);
|
||||
if (expanded_symbols.find(node1_symbol) != expanded_symbols.end()) {
|
||||
return expansion_it;
|
||||
}
|
||||
auto *node2 = expansion_it->node2;
|
||||
if (node2 && expanded_nodes.find(node2) != expanded_nodes.end()) {
|
||||
if (node2 &&
|
||||
expanded_symbols.find(symbol_table.at(*node2->identifier_)) !=
|
||||
expanded_symbols.end()) {
|
||||
// We need to flip the expansion, since we want to expand from node2.
|
||||
std::swap(expansion_it->node2, expansion_it->node1);
|
||||
if (expansion_it->direction != EdgeAtom::Direction::BOTH) {
|
||||
@ -79,19 +104,30 @@ std::vector<Expansion> ExpansionsFrom(
|
||||
const NodeAtom *start_node, std::vector<Expansion> original_expansions,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::vector<Expansion> expansions;
|
||||
std::unordered_set<const NodeAtom *, NodeSymbolHash, NodeSymbolEqual>
|
||||
expanded_nodes({start_node}, original_expansions.size(),
|
||||
NodeSymbolHash(symbol_table),
|
||||
NodeSymbolEqual(symbol_table));
|
||||
std::unordered_set<Symbol> expanded_symbols(
|
||||
{symbol_table.at(*start_node->identifier_)});
|
||||
std::unordered_set<Symbol> all_expansion_symbols;
|
||||
for (const auto &expansion : original_expansions) {
|
||||
all_expansion_symbols.insert(
|
||||
symbol_table.at(*expansion.node1->identifier_));
|
||||
if (expansion.edge) {
|
||||
all_expansion_symbols.insert(
|
||||
symbol_table.at(*expansion.edge->identifier_));
|
||||
all_expansion_symbols.insert(
|
||||
symbol_table.at(*expansion.node2->identifier_));
|
||||
}
|
||||
}
|
||||
while (!original_expansions.empty()) {
|
||||
auto next_it = NextExpansion(expanded_nodes, original_expansions);
|
||||
auto next_it = NextExpansion(symbol_table, expanded_symbols,
|
||||
all_expansion_symbols, original_expansions);
|
||||
if (next_it == original_expansions.end()) {
|
||||
// Pick a new starting expansion, since we cannot continue the chain.
|
||||
next_it = original_expansions.begin();
|
||||
}
|
||||
expanded_nodes.insert(next_it->node1);
|
||||
expanded_symbols.insert(symbol_table.at(*next_it->node1->identifier_));
|
||||
if (next_it->node2) {
|
||||
expanded_nodes.insert(next_it->node2);
|
||||
expanded_symbols.insert(symbol_table.at(*next_it->edge->identifier_));
|
||||
expanded_symbols.insert(symbol_table.at(*next_it->node2->identifier_));
|
||||
}
|
||||
expansions.emplace_back(*next_it);
|
||||
original_expansions.erase(next_it);
|
||||
@ -271,7 +307,7 @@ auto VaryMultiMatchingStarts(const std::vector<Matching> &matchings,
|
||||
|
||||
// Produces alternative query parts out of a single part by varying how each
|
||||
// graph matching is done.
|
||||
std::vector<QueryPart> VaryQuertPartMatching(const QueryPart &query_part,
|
||||
std::vector<QueryPart> VaryQueryPartMatching(const QueryPart &query_part,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::vector<QueryPart> variants;
|
||||
// Get multiple regular matchings, each starting from different node.
|
||||
@ -333,7 +369,7 @@ auto VaryQueryMatching(const std::vector<QueryPart> &query_parts,
|
||||
std::vector<std::vector<QueryPart>> alternative_query_parts;
|
||||
for (const auto &query_part : query_parts) {
|
||||
alternative_query_parts.emplace_back(
|
||||
VaryQuertPartMatching(query_part, symbol_table));
|
||||
VaryQueryPartMatching(query_part, symbol_table));
|
||||
}
|
||||
return iter::slice(
|
||||
CartesianProduct<QueryPart>(std::move(alternative_query_parts)), 0UL,
|
||||
|
@ -406,3 +406,54 @@ Feature: Match
|
||||
| 1 |
|
||||
| 3 |
|
||||
| 4 |
|
||||
|
||||
Scenario: Test match unbounded variable path
|
||||
Given an empty graph
|
||||
And having executed:
|
||||
"""
|
||||
CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3})
|
||||
"""
|
||||
When executing query:
|
||||
"""
|
||||
MATCH (n) -[r*]-> (m) RETURN n.a, m.a
|
||||
"""
|
||||
Then the result should be:
|
||||
| n.a | m.a |
|
||||
| 1 | 2 |
|
||||
| 1 | 3 |
|
||||
| 2 | 3 |
|
||||
|
||||
Scenario: Test match 0 length variable path
|
||||
Given an empty graph
|
||||
And having executed:
|
||||
"""
|
||||
CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3})
|
||||
"""
|
||||
When executing query:
|
||||
"""
|
||||
MATCH (n) -[r*0]-> (m) RETURN n.a, m.a
|
||||
"""
|
||||
Then the result should be:
|
||||
| n.a | m.a |
|
||||
| 1 | 1 |
|
||||
| 2 | 2 |
|
||||
| 3 | 3 |
|
||||
|
||||
Scenario: Test match bounded variable path
|
||||
Given an empty graph
|
||||
And having executed:
|
||||
"""
|
||||
CREATE ({a: 1}) -[:r]-> ({a:2}) -[:r]-> ({a:3})
|
||||
"""
|
||||
When executing query:
|
||||
"""
|
||||
MATCH (n) -[r*0..1]-> (m) RETURN n.a, m.a
|
||||
"""
|
||||
Then the result should be:
|
||||
| n.a | m.a |
|
||||
| 1 | 1 |
|
||||
| 1 | 2 |
|
||||
| 2 | 2 |
|
||||
| 2 | 3 |
|
||||
| 3 | 3 |
|
||||
|
||||
|
@ -54,6 +54,7 @@ class PlanChecker : public HierarchicalLogicalOperatorVisitor {
|
||||
PRE_VISIT(ScanAllByLabelPropertyValue);
|
||||
PRE_VISIT(ScanAllByLabelPropertyRange);
|
||||
PRE_VISIT(Expand);
|
||||
PRE_VISIT(ExpandVariable);
|
||||
PRE_VISIT(Filter);
|
||||
PRE_VISIT(Produce);
|
||||
PRE_VISIT(SetProperty);
|
||||
@ -122,6 +123,7 @@ using ExpectDelete = OpChecker<Delete>;
|
||||
using ExpectScanAll = OpChecker<ScanAll>;
|
||||
using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>;
|
||||
using ExpectExpand = OpChecker<Expand>;
|
||||
using ExpectExpandVariable = OpChecker<ExpandVariable>;
|
||||
using ExpectFilter = OpChecker<Filter>;
|
||||
using ExpectProduce = OpChecker<Produce>;
|
||||
using ExpectSetProperty = OpChecker<SetProperty>;
|
||||
@ -1215,4 +1217,23 @@ TEST(TestLogicalPlanner, ReturnSumGroupByAll) {
|
||||
CheckPlan(storage, aggr, ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, MatchExpandVariable) {
|
||||
// Test MATCH (n) -[r *..3]-> (m) RETURN r
|
||||
AstTreeStorage storage;
|
||||
auto edge = EDGE("r");
|
||||
edge->has_range_ = true;
|
||||
edge->upper_bound_ = LITERAL(3);
|
||||
QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("r"));
|
||||
CheckPlan(storage, ExpectScanAll(), ExpectExpandVariable(), ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, MatchExpandVariableNoBounds) {
|
||||
// Test MATCH (n) -[r *]-> (m) RETURN r
|
||||
AstTreeStorage storage;
|
||||
auto edge = EDGE("r");
|
||||
edge->has_range_ = true;
|
||||
QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("r"));
|
||||
CheckPlan(storage, ExpectScanAll(), ExpectExpandVariable(), ExpectProduce());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user