Generate multiple plans depending on starting node
Summary: Permute query parts. Permute matching only by selecting the starting node. Flip the expansion when expanding from the other node. Split planner into rule_based_planner and variable_start_planner Use symbol hash when collecting expansion nodes Multiple node atoms may point to the same symbol, and we could generate multiple starting positions per atom which are the same. Using symbol hash and equality prevents generating those redundant plans. Correctly permute optional and merge matchings Test VariableStartPlanner Reviewers: florijan, mislav.bradac, buda, lion Reviewed By: florijan, buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D417
This commit is contained in:
parent
b1536ea1ec
commit
f0422c0e11
@ -360,7 +360,8 @@ set(memgraph_src_files
|
||||
${src_dir}/query/typed_value.cpp
|
||||
${src_dir}/query/interpret/awesome_memgraph_functions.cpp
|
||||
${src_dir}/query/plan/operator.cpp
|
||||
${src_dir}/query/plan/planner.cpp
|
||||
${src_dir}/query/plan/rule_based_planner.cpp
|
||||
${src_dir}/query/plan/variable_start_planner.cpp
|
||||
${src_dir}/query/frontend/semantic/symbol_generator.cpp
|
||||
)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@ -12,60 +12,76 @@ class SymbolTable;
|
||||
|
||||
namespace plan {
|
||||
|
||||
// Normalized representation of a pattern that needs to be matched.
|
||||
/// @brief Normalized representation of a pattern that needs to be matched.
|
||||
struct Expansion {
|
||||
// The first node in the expansion, it can be a single node.
|
||||
/// @brief The first node in the expansion, it can be a single node.
|
||||
NodeAtom *node1 = nullptr;
|
||||
// Optional edge which connects the 2 nodes.
|
||||
/// @brief Optional edge which connects the 2 nodes.
|
||||
EdgeAtom *edge = nullptr;
|
||||
// Optional node at the other end of an edge. If the expansion contains an
|
||||
// edge, then this node is required.
|
||||
/// @brief Direction of the edge, it may be flipped compared to original
|
||||
/// @c EdgeAtom during plan generation.
|
||||
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
|
||||
/// @brief Optional node at the other end of an edge. If the expansion
|
||||
/// contains an edge, then this node is required.
|
||||
NodeAtom *node2 = nullptr;
|
||||
};
|
||||
|
||||
// Normalized representation of a single or multiple Match clauses.
|
||||
//
|
||||
// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
|
||||
// WHERE c.prop < 42` will produce the following.
|
||||
// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and `(n) -[e3]- (m)`.
|
||||
// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
|
||||
// first `MATCH` and the set `{e3}` for the second.
|
||||
// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
|
||||
// other obtained from `WHERE` on symbol `c`.
|
||||
/// @brief Normalized representation of a single or multiple Match clauses.
|
||||
///
|
||||
/// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
|
||||
/// WHERE c.prop < 42` will produce the following.
|
||||
/// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and
|
||||
/// `(n) -[e3]- (m)`.
|
||||
/// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
|
||||
/// first `MATCH` and the set `{e3}` for the second.
|
||||
/// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
|
||||
/// other obtained from `WHERE` on symbol `c`.
|
||||
struct Matching {
|
||||
// All expansions that need to be performed across Match clauses.
|
||||
/// @brief All expansions that need to be performed across @c Match clauses.
|
||||
std::vector<Expansion> expansions;
|
||||
// Symbols for edges established in match, used to ensure Cyphermorphism.
|
||||
// There are multiple sets, because each Match clause determines a single set.
|
||||
/// @brief Symbols for edges established in match, used to ensure
|
||||
/// Cyphermorphism.
|
||||
///
|
||||
/// There are multiple sets, because each Match clause determines a single
|
||||
/// set.
|
||||
std::vector<std::unordered_set<Symbol>> edge_symbols;
|
||||
// Pairs of filter expression and symbols used in them. The list should be
|
||||
// filled using CollectPatternFilters function.
|
||||
/// @brief Pairs of filter expression and symbols used in them.
|
||||
std::vector<std::pair<Expression *, std::unordered_set<Symbol>>> filters;
|
||||
};
|
||||
|
||||
// Represents a read (+ write) part of a query. Each part ends with either:
|
||||
// * RETURN clause;
|
||||
// * WITH clause or
|
||||
// * any of the write clauses.
|
||||
//
|
||||
// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
|
||||
// generated QueryPart will have `matching` generated for the `MATCH`.
|
||||
// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
|
||||
// in that exact order. The pattern inside the first `MERGE` will be used to
|
||||
// generate the first `merge_matching` element, and the second `MERGE` pattern
|
||||
// will produce the second `merge_matching` element. This way, if someone
|
||||
// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
|
||||
// in the same order as their respective `merge_matching` elements.
|
||||
/// @brief Represents a read (+ write) part of a query. Parts are split on
|
||||
/// `WITH` clauses.
|
||||
///
|
||||
/// Each part ends with either:
|
||||
///
|
||||
/// * `RETURN` clause;
|
||||
/// * `WITH` clause or
|
||||
/// * any of the write clauses.
|
||||
///
|
||||
/// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
|
||||
/// generated QueryPart will have `matching` generated for the `MATCH`.
|
||||
/// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
|
||||
/// in that exact order. The pattern inside the first `MERGE` will be used to
|
||||
/// generate the first `merge_matching` element, and the second `MERGE` pattern
|
||||
/// will produce the second `merge_matching` element. This way, if someone
|
||||
/// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
|
||||
/// in the same order as their respective `merge_matching` elements.
|
||||
struct QueryPart {
|
||||
// All MATCH clauses merged into one Matching.
|
||||
/// @brief All `MATCH` clauses merged into one @c Matching.
|
||||
Matching matching;
|
||||
// Each OPTIONAL MATCH converted to Matching.
|
||||
/// @brief Each `OPTIONAL MATCH` converted to @c Matching.
|
||||
std::vector<Matching> optional_matching;
|
||||
// Matching for each MERGE clause. Since Merge is contained in
|
||||
// remaining_clauses, this vector contains matching in the same order as Merge
|
||||
// appears.
|
||||
/// @brief @c Matching for each `MERGE` clause.
|
||||
///
|
||||
/// Storing the normalized pattern of a @c Merge does not preclude storing the
|
||||
/// @c Merge clause itself inside `remaining_clauses`. The reason is that we
|
||||
/// need to have access to other parts of the clause, such as `SET` clauses
|
||||
/// which need to be run.
|
||||
///
|
||||
/// Since @c Merge is contained in `remaining_clauses`, this vector contains
|
||||
/// matching in the same order as @c Merge appears.
|
||||
std::vector<Matching> merge_matching;
|
||||
// All the remaining clauses (without Match).
|
||||
/// @brief All the remaining clauses (without @c Match).
|
||||
std::vector<Clause *> remaining_clauses;
|
||||
};
|
||||
|
||||
@ -93,17 +109,49 @@ struct PlanningContext {
|
||||
std::unordered_set<Symbol> bound_symbols;
|
||||
};
|
||||
|
||||
/// @brief Planner which uses hardcoded rules to produce operators.
|
||||
///
|
||||
/// @sa MakeLogicalPlan
|
||||
class RuleBasedPlanner {
|
||||
public:
|
||||
RuleBasedPlanner(PlanningContext &context) : context_(context) {}
|
||||
|
||||
/// @brief The result of plan generation is the root of the generated operator
|
||||
/// tree.
|
||||
using PlanResult = std::unique_ptr<LogicalOperator>;
|
||||
/// @brief Generates the operator tree based on explicitly set rules.
|
||||
PlanResult Plan(std::vector<QueryPart> &);
|
||||
|
||||
private:
|
||||
PlanningContext &context_;
|
||||
};
|
||||
|
||||
/// @brief Planner which generates multiple plans by changing the order of graph
|
||||
/// traversal.
|
||||
///
|
||||
/// This planner picks different starting nodes from which to start graph
|
||||
/// traversal. Generating a single plan is backed by @c RuleBasedPlanner.
|
||||
///
|
||||
/// @sa MakeLogicalPlan
|
||||
class VariableStartPlanner {
|
||||
public:
|
||||
VariableStartPlanner(PlanningContext &context) : context_(context) {}
|
||||
|
||||
/// @brief The result of plan generation is a vector of roots to multiple
|
||||
/// generated operator trees.
|
||||
using PlanResult = std::vector<std::unique_ptr<LogicalOperator>>;
|
||||
/// @brief Generate multiple plans by varying the order of graph traversal.
|
||||
PlanResult Plan(std::vector<QueryPart> &);
|
||||
|
||||
private:
|
||||
PlanningContext &context_;
|
||||
};
|
||||
|
||||
/// @brief Convert the AST to multiple @c QueryParts.
|
||||
///
|
||||
/// This function will normalize patterns inside @c Match and @c Merge clauses
|
||||
/// and do some other preprocessing in order to generate multiple @c QueryPart
|
||||
/// structures.
|
||||
std::vector<QueryPart> CollectQueryParts(const SymbolTable &, AstTreeStorage &);
|
||||
|
||||
/// @brief Generates the LogicalOperator tree and returns the resulting plan.
|
||||
@ -117,6 +165,10 @@ std::vector<QueryPart> CollectQueryParts(const SymbolTable &, AstTreeStorage &);
|
||||
/// table.
|
||||
/// @param db Optional @c GraphDbAccessor, which is used to query database
|
||||
/// information in order to improve generated plans.
|
||||
/// @return @c PlanResult which depends on the @c TPlanner used.
|
||||
///
|
||||
/// @sa RuleBasedPlanner
|
||||
/// @sa VariableStartPlanner
|
||||
template <class TPlanner>
|
||||
typename TPlanner::PlanResult MakeLogicalPlan(
|
||||
AstTreeStorage &storage, SymbolTable &symbol_table,
|
||||
|
@ -596,7 +596,8 @@ std::vector<Expansion> NormalizePatterns(
|
||||
auto ignore_node = [&](auto *node) {};
|
||||
auto collect_expansion = [&](auto *prev_node, auto *edge,
|
||||
auto *current_node) {
|
||||
expansions.emplace_back(Expansion{prev_node, edge, current_node});
|
||||
expansions.emplace_back(
|
||||
Expansion{prev_node, edge, edge->direction_, current_node});
|
||||
};
|
||||
for (const auto &pattern : patterns) {
|
||||
if (pattern->atoms_.size() == 1U) {
|
||||
@ -710,7 +711,7 @@ LogicalOperator *PlanMatching(const Matching &matching,
|
||||
context.new_symbols.emplace_back(edge_symbol);
|
||||
}
|
||||
last_op =
|
||||
new Expand(node_symbol, edge_symbol, expansion.edge->direction_,
|
||||
new Expand(node_symbol, edge_symbol, expansion.direction,
|
||||
std::shared_ptr<LogicalOperator>(last_op), node1_symbol,
|
||||
existing_node, existing_edge, context.graph_view);
|
||||
if (!existing_edge) {
|
253
src/query/plan/variable_start_planner.cpp
Normal file
253
src/query/plan/variable_start_planner.cpp
Normal file
@ -0,0 +1,253 @@
|
||||
#include "query/plan/planner.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
namespace {
|
||||
|
||||
class NodeSymbolHash {
|
||||
public:
|
||||
NodeSymbolHash(const SymbolTable &symbol_table)
|
||||
: symbol_table_(symbol_table) {}
|
||||
|
||||
size_t operator()(const NodeAtom *node_atom) const {
|
||||
return std::hash<Symbol>{}(symbol_table_.at(*node_atom->identifier_));
|
||||
}
|
||||
|
||||
private:
|
||||
const SymbolTable &symbol_table_;
|
||||
};
|
||||
|
||||
class NodeSymbolEqual {
|
||||
public:
|
||||
NodeSymbolEqual(const SymbolTable &symbol_table)
|
||||
: symbol_table_(symbol_table) {}
|
||||
|
||||
size_t operator()(const NodeAtom *node_atom1,
|
||||
const NodeAtom *node_atom2) const {
|
||||
return symbol_table_.at(*node_atom1->identifier_) ==
|
||||
symbol_table_.at(*node_atom2->identifier_);
|
||||
}
|
||||
|
||||
private:
|
||||
const SymbolTable &symbol_table_;
|
||||
};
|
||||
|
||||
// Finds the next Expansion which has one of its nodes among the already
|
||||
// expanded nodes. The function may modify expansions, by flipping their nodes
|
||||
// and direction. This is done, so that the return iterator always points to the
|
||||
// expansion whose node1 is the already expanded one, while node2 may not be.
|
||||
auto NextExpansion(const std::unordered_set<const NodeAtom *, NodeSymbolHash,
|
||||
NodeSymbolEqual> &expanded_nodes,
|
||||
std::vector<Expansion> &expansions) {
|
||||
auto expansion_it = expansions.begin();
|
||||
for (; expansion_it != expansions.end(); ++expansion_it) {
|
||||
if (expanded_nodes.find(expansion_it->node1) != expanded_nodes.end()) {
|
||||
return expansion_it;
|
||||
}
|
||||
auto *node2 = expansion_it->node2;
|
||||
if (node2 && expanded_nodes.find(node2) != expanded_nodes.end()) {
|
||||
// We need to flip the expansion, since we want to expand from node2.
|
||||
std::swap(expansion_it->node2, expansion_it->node1);
|
||||
if (expansion_it->direction != EdgeAtom::Direction::BOTH) {
|
||||
expansion_it->direction =
|
||||
expansion_it->direction == EdgeAtom::Direction::IN
|
||||
? EdgeAtom::Direction::OUT
|
||||
: EdgeAtom::Direction::IN;
|
||||
}
|
||||
return expansion_it;
|
||||
}
|
||||
}
|
||||
return expansion_it;
|
||||
}
|
||||
|
||||
// Generates expansions emanating from the start_node by forming a chain. When
|
||||
// the chain can no longer be continued, a different starting node is picked
|
||||
// among remaining expansions and the process continues. This is done until all
|
||||
// original_expansions are used.
|
||||
std::vector<Expansion> ExpansionsFrom(
|
||||
const NodeAtom *start_node, std::vector<Expansion> original_expansions,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::vector<Expansion> expansions;
|
||||
std::unordered_set<const NodeAtom *, NodeSymbolHash, NodeSymbolEqual>
|
||||
expanded_nodes({start_node}, original_expansions.size(),
|
||||
NodeSymbolHash(symbol_table),
|
||||
NodeSymbolEqual(symbol_table));
|
||||
while (!original_expansions.empty()) {
|
||||
auto next_it = NextExpansion(expanded_nodes, original_expansions);
|
||||
if (next_it == original_expansions.end()) {
|
||||
// Pick a new starting expansion, since we cannot continue the chain.
|
||||
next_it = original_expansions.begin();
|
||||
}
|
||||
expanded_nodes.insert(next_it->node1);
|
||||
if (next_it->node2) {
|
||||
expanded_nodes.insert(next_it->node2);
|
||||
}
|
||||
expansions.emplace_back(*next_it);
|
||||
original_expansions.erase(next_it);
|
||||
}
|
||||
return expansions;
|
||||
}
|
||||
|
||||
// Collect all unique nodes from expansions. Uniqueness is determined by
|
||||
// symbol uniqueness.
|
||||
auto ExpansionNodes(const std::vector<Expansion> &expansions,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::unordered_set<NodeAtom *, NodeSymbolHash, NodeSymbolEqual> nodes(
|
||||
expansions.size(), NodeSymbolHash(symbol_table),
|
||||
NodeSymbolEqual(symbol_table));
|
||||
for (const auto &expansion : expansions) {
|
||||
// TODO: Handle labels and properties from different node atoms.
|
||||
nodes.insert(expansion.node1);
|
||||
if (expansion.node2) {
|
||||
nodes.insert(expansion.node2);
|
||||
}
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
// Generates n matchings, where n is the number of nodes to match. Each Matching
|
||||
// will have a different node as a starting node for expansion.
|
||||
std::vector<Matching> VaryMatchingStart(const Matching &matching,
|
||||
const SymbolTable &symbol_table) {
|
||||
if (matching.expansions.empty()) {
|
||||
return std::vector<Matching>{matching};
|
||||
}
|
||||
const auto start_nodes = ExpansionNodes(matching.expansions, symbol_table);
|
||||
std::vector<Matching> permutations;
|
||||
for (const auto &start_node : start_nodes) {
|
||||
permutations.emplace_back(
|
||||
Matching{ExpansionsFrom(start_node, matching.expansions, symbol_table),
|
||||
matching.edge_symbols, matching.filters});
|
||||
}
|
||||
return permutations;
|
||||
}
|
||||
|
||||
// Produces a Cartesian product among vectors between begin and end iterator.
|
||||
// For example:
|
||||
//
|
||||
// std::vector<int> first_set{1,2,3};
|
||||
// std::vector<int> second_set{4,5};
|
||||
// std::vector<std::vector<int>> all_sets{first_set, second_set};
|
||||
// // prod should be {{1, 4}, {1, 5}, {2, 4}, {2, 5}, {3, 4}, {3, 5}}
|
||||
// auto prod = CartesianProduct(all_sets.cbegin(), all_sets.cend())
|
||||
template <typename T>
|
||||
std::vector<std::vector<T>> CartesianProduct(
|
||||
typename std::vector<std::vector<T>>::const_iterator begin,
|
||||
typename std::vector<std::vector<T>>::const_iterator end) {
|
||||
std::vector<std::vector<T>> products;
|
||||
if (begin == end) {
|
||||
return products;
|
||||
}
|
||||
auto later_products = CartesianProduct<T>(begin + 1, end);
|
||||
for (const auto &elem : *begin) {
|
||||
if (later_products.empty()) {
|
||||
products.emplace_back(std::vector<T>{elem});
|
||||
} else {
|
||||
for (const auto &rest : later_products) {
|
||||
std::vector<T> product{elem};
|
||||
product.insert(product.end(), rest.begin(), rest.end());
|
||||
products.emplace_back(std::move(product));
|
||||
}
|
||||
}
|
||||
}
|
||||
return products;
|
||||
}
|
||||
|
||||
// Similar to VaryMatchingStart, but varies the starting nodes for all given
|
||||
// matchings. After all matchings produce multiple alternative starts, the
|
||||
// Cartesian product of all of them is returned.
|
||||
std::vector<std::vector<Matching>> VaryMultiMatchingStarts(
|
||||
const std::vector<Matching> &matchings, const SymbolTable &symbol_table) {
|
||||
std::vector<std::vector<Matching>> variants;
|
||||
for (const auto &matching : matchings) {
|
||||
variants.emplace_back(VaryMatchingStart(matching, symbol_table));
|
||||
}
|
||||
return CartesianProduct<Matching>(variants.cbegin(), variants.cend());
|
||||
}
|
||||
|
||||
// Produces alternative query parts out of a single part by varying how each
|
||||
// graph matching is done.
|
||||
std::vector<QueryPart> VaryQuertPartMatching(const QueryPart &query_part,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::vector<QueryPart> variants;
|
||||
// Get multiple regular matchings, each starting from different node.
|
||||
auto matchings = VaryMatchingStart(query_part.matching, symbol_table);
|
||||
// Get multiple optional matchings, where each combination has different
|
||||
// starting nodes.
|
||||
auto optional_matchings =
|
||||
VaryMultiMatchingStarts(query_part.optional_matching, symbol_table);
|
||||
// Like optional matching, but for merge matchings.
|
||||
auto merge_matchings =
|
||||
VaryMultiMatchingStarts(query_part.merge_matching, symbol_table);
|
||||
// After we have all valid combinations of each matching, we need to produce
|
||||
// combinations of them. This is similar to Cartesian product, but some
|
||||
// matchings can be empty (optional and merge) and `matchings` is of different
|
||||
// type (vector) than `optional_matchings` and `merge_matchings` (which are
|
||||
// vectors of vectors).
|
||||
for (const auto &matching : matchings) {
|
||||
// matchings will always have at least a single element, so we can use a for
|
||||
// loop. On the other hand, optional and merge matchings can be empty so we
|
||||
// need an iterator and do...while loop.
|
||||
auto optional_it = optional_matchings.begin();
|
||||
auto optional_end = optional_matchings.end();
|
||||
do {
|
||||
auto merge_it = merge_matchings.begin();
|
||||
auto merge_end = merge_matchings.end();
|
||||
do {
|
||||
// Produce parts for each possible combination. E.g. if we have:
|
||||
// * matchings (m1) and (m2)
|
||||
// * optional matchings (o1) and (o2)
|
||||
// * merge matching (g1)
|
||||
// We want to produce parts for:
|
||||
// * (m1), (o1), (g1)
|
||||
// * (m1), (o2), (g1)
|
||||
// * (m2), (o1), (g1)
|
||||
// * (m2), (o2), (g1)
|
||||
variants.emplace_back(QueryPart{matching});
|
||||
variants.back().remaining_clauses = query_part.remaining_clauses;
|
||||
if (optional_it != optional_matchings.end()) {
|
||||
// In case we started with empty optional matchings.
|
||||
variants.back().optional_matching = *optional_it;
|
||||
}
|
||||
if (merge_it != merge_matchings.end()) {
|
||||
// In case we started with empty merge matchings.
|
||||
variants.back().merge_matching = *merge_it;
|
||||
}
|
||||
// Since we can start with the iterator at the end, we have to first
|
||||
// compare it and then increment it. After we increment, we need to
|
||||
// check again to avoid generating with empty matching.
|
||||
} while (merge_it != merge_end && ++merge_it != merge_end);
|
||||
} while (optional_it != optional_end && ++optional_it != optional_end);
|
||||
}
|
||||
return variants;
|
||||
}
|
||||
|
||||
// Generates different, equivalent query parts by taking different graph
|
||||
// matching routes for each query part.
|
||||
std::vector<std::vector<QueryPart>> VaryQueryMatching(
|
||||
const std::vector<QueryPart> &query_parts,
|
||||
const SymbolTable &symbol_table) {
|
||||
std::vector<std::vector<QueryPart>> alternative_query_parts;
|
||||
for (const auto &query_part : query_parts) {
|
||||
alternative_query_parts.emplace_back(
|
||||
VaryQuertPartMatching(query_part, symbol_table));
|
||||
}
|
||||
return CartesianProduct<QueryPart>(alternative_query_parts.cbegin(),
|
||||
alternative_query_parts.cend());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<std::unique_ptr<LogicalOperator>> VariableStartPlanner::Plan(
|
||||
std::vector<QueryPart> &query_parts) {
|
||||
std::vector<std::unique_ptr<LogicalOperator>> plans;
|
||||
auto alternatives = VaryQueryMatching(query_parts, context_.symbol_table);
|
||||
RuleBasedPlanner rule_planner(context_);
|
||||
for (auto &alternative_query_parts : alternatives) {
|
||||
context_.bound_symbols.clear();
|
||||
plans.emplace_back(rule_planner.Plan(alternative_query_parts));
|
||||
}
|
||||
return plans;
|
||||
}
|
||||
|
||||
} // namespace query::plan
|
214
tests/unit/query_variable_start_planner.cpp
Normal file
214
tests/unit/query_variable_start_planner.cpp
Normal file
@ -0,0 +1,214 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "dbms/dbms.hpp"
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "utils/algorithm.hpp"
|
||||
|
||||
#include "query_plan_common.hpp"
|
||||
|
||||
using namespace query::plan;
|
||||
using query::AstTreeStorage;
|
||||
using Direction = query::EdgeAtom::Direction;
|
||||
|
||||
namespace std {
|
||||
|
||||
// Overloads for printing resulting rows from a query.
|
||||
std::ostream &operator<<(std::ostream &stream,
|
||||
const std::vector<TypedValue> &row) {
|
||||
PrintIterable(stream, row);
|
||||
return stream;
|
||||
}
|
||||
std::ostream &operator<<(std::ostream &stream,
|
||||
const std::vector<std::vector<TypedValue>> &rows) {
|
||||
PrintIterable(stream, rows, "\n");
|
||||
return stream;
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace {
|
||||
|
||||
auto MakeSymbolTable(query::Query &query) {
|
||||
query::SymbolTable symbol_table;
|
||||
query::SymbolGenerator symbol_generator(symbol_table);
|
||||
query.Accept(symbol_generator);
|
||||
return symbol_table;
|
||||
}
|
||||
|
||||
void AssertRows(const std::vector<std::vector<TypedValue>> &datum,
|
||||
std::vector<std::vector<TypedValue>> expected) {
|
||||
auto row_equal = [](const auto &row1, const auto &row2) {
|
||||
if (row1.size() != row2.size()) {
|
||||
return false;
|
||||
}
|
||||
TypedValue::BoolEqual value_eq;
|
||||
auto row1_it = row1.begin();
|
||||
for (auto row2_it = row2.begin(); row2_it != row2.end();
|
||||
++row1_it, ++row2_it) {
|
||||
if (!value_eq(*row1_it, *row2_it)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
ASSERT_TRUE(std::is_permutation(datum.begin(), datum.end(), expected.begin(),
|
||||
expected.end(), row_equal))
|
||||
<< "Actual rows:" << std::endl
|
||||
<< datum << std::endl
|
||||
<< "Expected rows:" << std::endl
|
||||
<< expected;
|
||||
};
|
||||
|
||||
void CheckPlansProduce(
|
||||
size_t expected_plan_count, AstTreeStorage &storage, GraphDbAccessor &dba,
|
||||
std::function<void(const std::vector<std::vector<TypedValue>> &)> check) {
|
||||
auto symbol_table = MakeSymbolTable(*storage.query());
|
||||
auto plans =
|
||||
MakeLogicalPlan<VariableStartPlanner>(storage, symbol_table, &dba);
|
||||
EXPECT_EQ(std::distance(plans.begin(), plans.end()), expected_plan_count);
|
||||
for (const auto &plan : plans) {
|
||||
auto *produce = dynamic_cast<Produce *>(plan.get());
|
||||
ASSERT_TRUE(produce);
|
||||
auto results = CollectProduce(produce, symbol_table, dba);
|
||||
check(results);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestVariableStartPlanner, MatchReturn) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
// Make a graph (v1) -[:r]-> (v2)
|
||||
auto v1 = dba->insert_vertex();
|
||||
auto v2 = dba->insert_vertex();
|
||||
dba->insert_edge(v1, v2, dba->edge_type("r"));
|
||||
dba->advance_command();
|
||||
// Test MATCH (n) -[r]-> (m) RETURN n
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m"))),
|
||||
RETURN("n"));
|
||||
// We have 2 nodes `n` and `m` from which we could start, so expect 2 plans.
|
||||
CheckPlansProduce(2, storage, *dba, [&](const auto &results) {
|
||||
// We expect to produce only a single (v1) node.
|
||||
AssertRows(results, {{v1}});
|
||||
});
|
||||
}
|
||||
|
||||
TEST(TestVariableStartPlanner, MatchTripletPatternReturn) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
// Make a graph (v1) -[:r]-> (v2) -[:r]-> (v3)
|
||||
auto v1 = dba->insert_vertex();
|
||||
auto v2 = dba->insert_vertex();
|
||||
auto v3 = dba->insert_vertex();
|
||||
dba->insert_edge(v1, v2, dba->edge_type("r"));
|
||||
dba->insert_edge(v2, v3, dba->edge_type("r"));
|
||||
dba->advance_command();
|
||||
{
|
||||
// Test `MATCH (n) -[r]-> (m) -[e]-> (l) RETURN n`
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m"),
|
||||
EDGE("e", nullptr, Direction::OUT), NODE("l"))),
|
||||
RETURN("n"));
|
||||
// We have 3 nodes: `n`, `m` and `l` from which we could start.
|
||||
CheckPlansProduce(3, storage, *dba, [&](const auto &results) {
|
||||
// We expect to produce only a single (v1) node.
|
||||
AssertRows(results, {{v1}});
|
||||
});
|
||||
}
|
||||
{
|
||||
// Equivalent to `MATCH (n) -[r]-> (m), (m) -[e]-> (l) RETURN n`.
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(
|
||||
PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m")),
|
||||
PATTERN(NODE("m"), EDGE("e", nullptr, Direction::OUT), NODE("l"))),
|
||||
RETURN("n"));
|
||||
CheckPlansProduce(3, storage, *dba, [&](const auto &results) {
|
||||
AssertRows(results, {{v1}});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestVariableStartPlanner, MatchOptionalMatchReturn) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
// Make a graph (v1) -[:r]-> (v2) -[:r]-> (v3)
|
||||
auto v1 = dba->insert_vertex();
|
||||
auto v2 = dba->insert_vertex();
|
||||
auto v3 = dba->insert_vertex();
|
||||
dba->insert_edge(v1, v2, dba->edge_type("r"));
|
||||
dba->insert_edge(v2, v3, dba->edge_type("r"));
|
||||
dba->advance_command();
|
||||
// Test MATCH (n) -[r]-> (m) OPTIONAL MATCH (m) -[e]-> (l) RETURN n, l
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m"))),
|
||||
OPTIONAL_MATCH(
|
||||
PATTERN(NODE("m"), EDGE("e", nullptr, Direction::OUT), NODE("l"))),
|
||||
RETURN("n", "l"));
|
||||
// We have 2 nodes `n` and `m` from which we could start the MATCH, and 2
|
||||
// nodes for OPTIONAL MATCH. This should produce 2 * 2 plans.
|
||||
CheckPlansProduce(4, storage, *dba, [&](const auto &results) {
|
||||
// We expect to produce 2 rows:
|
||||
// * (v1), (v3)
|
||||
// * (v2), null
|
||||
AssertRows(results, {{v1, v3}, {v2, TypedValue::Null}});
|
||||
});
|
||||
}
|
||||
|
||||
TEST(TestVariableStartPlanner, MatchOptionalMatchMergeReturn) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
// Graph (v1) -[:r]-> (v2)
|
||||
auto v1 = dba->insert_vertex();
|
||||
auto v2 = dba->insert_vertex();
|
||||
auto r_type = dba->edge_type("r");
|
||||
dba->insert_edge(v1, v2, r_type);
|
||||
dba->advance_command();
|
||||
// Test MATCH (n) -[r]-> (m) OPTIONAL MATCH (m) -[e]-> (l)
|
||||
// MERGE (u) -[q:r]-> (v) RETURN n, m, l, u, v
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m"))),
|
||||
OPTIONAL_MATCH(
|
||||
PATTERN(NODE("m"), EDGE("e", nullptr, Direction::OUT), NODE("l"))),
|
||||
MERGE(PATTERN(NODE("u"), EDGE("q", r_type, Direction::OUT), NODE("v"))),
|
||||
RETURN("n", "m", "l", "u", "v"));
|
||||
// Since MATCH, OPTIONAL MATCH and MERGE each have 2 nodes from which we can
|
||||
// start, we generate 2 * 2 * 2 plans.
|
||||
CheckPlansProduce(8, storage, *dba, [&](const auto &results) {
|
||||
// We expect to produce a single row: (v1), (v2), null, (v1), (v2)
|
||||
AssertRows(results, {{v1, v2, TypedValue::Null, v1, v2}});
|
||||
});
|
||||
}
|
||||
|
||||
TEST(TestVariableStartPlanner, MatchWithMatchReturn) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
// Graph (v1) -[:r]-> (v2)
|
||||
auto v1 = dba->insert_vertex();
|
||||
auto v2 = dba->insert_vertex();
|
||||
dba->insert_edge(v1, v2, dba->edge_type("r"));
|
||||
dba->advance_command();
|
||||
// Test MATCH (n) -[r]-> (m) WITH n MATCH (m) -[r]-> (l) RETURN n, m, l
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n"), EDGE("r", nullptr, Direction::OUT), NODE("m"))),
|
||||
WITH("n"),
|
||||
MATCH(PATTERN(NODE("m"), EDGE("r", nullptr, Direction::OUT), NODE("l"))),
|
||||
RETURN("n", "m", "l"));
|
||||
// We can start from 2 nodes in each match. Since WITH separates query parts,
|
||||
// we expect to get 2 plans for each, which totals 2 * 2.
|
||||
CheckPlansProduce(4, storage, *dba, [&](const auto &results) {
|
||||
// We expect to produce a single row: (v1), (v1), (v2)
|
||||
AssertRows(results, {{v1, v1, v2}});
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
Reference in New Issue
Block a user