Refactor collecting filters during planning

Summary:
Move QueryParts and Filters to a new file.
Reorganize FilterInfo struct.
Remove label filter if we do indexed scan by label.
Remove property filter used in indexed scan.

Reviewers: florijan

Reviewed By: florijan

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D915
This commit is contained in:
Teon Banek 2017-10-24 10:18:20 +02:00
parent cbf8dacc11
commit db34cb2a40
7 changed files with 828 additions and 683 deletions

View File

@ -204,6 +204,7 @@ set(memgraph_src_files
${src_dir}/query/interpret/awesome_memgraph_functions.cpp
${src_dir}/query/interpreter.cpp
${src_dir}/query/plan/operator.cpp
${src_dir}/query/plan/preprocess.cpp
${src_dir}/query/plan/rule_based_planner.cpp
${src_dir}/query/plan/variable_start_planner.cpp
${src_dir}/query/typed_value.cpp

View File

@ -4,6 +4,7 @@
#pragma once
#include "query/plan/preprocess.hpp"
#include "query/plan/rule_based_planner.hpp"
#include "query/plan/variable_start_planner.hpp"

View File

@ -0,0 +1,489 @@
#include "query/plan/preprocess.hpp"
#include <algorithm>
#include <functional>
#include <stack>
namespace query::plan {
namespace {
void ForEachPattern(
Pattern &pattern, std::function<void(NodeAtom *)> base,
std::function<void(NodeAtom *, EdgeAtom *, NodeAtom *)> collect) {
DCHECK(!pattern.atoms_.empty()) << "Missing atoms in pattern";
auto atoms_it = pattern.atoms_.begin();
auto current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
DCHECK(current_node) << "First pattern atom is not a node";
base(current_node);
// Remaining atoms need to follow sequentially as (EdgeAtom, NodeAtom)*
while (atoms_it != pattern.atoms_.end()) {
auto edge = dynamic_cast<EdgeAtom *>(*atoms_it++);
DCHECK(edge) << "Expected an edge atom in pattern.";
DCHECK(atoms_it != pattern.atoms_.end())
<< "Edge atom should not end the pattern.";
auto prev_node = current_node;
current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
DCHECK(current_node) << "Expected a node atom in pattern.";
collect(prev_node, edge, current_node);
}
}
// Collects symbols from identifiers found in visited AST nodes.
class UsedSymbolsCollector : public HierarchicalTreeVisitor {
public:
explicit UsedSymbolsCollector(const SymbolTable &symbol_table)
: symbol_table_(symbol_table) {}
using HierarchicalTreeVisitor::PostVisit;
using HierarchicalTreeVisitor::PreVisit;
using HierarchicalTreeVisitor::Visit;
bool PostVisit(All &all) override {
// Remove the symbol which is bound by all, because we are only interested
// in free (unbound) symbols.
symbols_.erase(symbol_table_.at(*all.identifier_));
return true;
}
bool Visit(Identifier &ident) override {
symbols_.insert(symbol_table_.at(ident));
return true;
}
bool Visit(PrimitiveLiteral &) override { return true; }
bool Visit(ParameterLookup &) override { return true; }
bool Visit(query::CreateIndex &) override { return true; }
std::unordered_set<Symbol> symbols_;
const SymbolTable &symbol_table_;
};
// Converts multiple Patterns to Expansions. Each Pattern can contain an
// arbitrarily long chain of nodes and edges. The conversion to an Expansion is
// done by splitting a pattern into triplets (node1, edge, node2). The triplets
// conserve the semantics of the pattern. For example, in a pattern:
// (m) -[e]- (n) -[f]- (o) the same can be achieved with:
// (m) -[e]- (n), (n) -[f]- (o).
// This representation makes it easier to permute from which node or edge we
// want to start expanding.
std::vector<Expansion> NormalizePatterns(
const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
std::vector<Expansion> expansions;
auto ignore_node = [&](auto *) {};
auto collect_expansion = [&](auto *prev_node, auto *edge,
auto *current_node) {
UsedSymbolsCollector collector(symbol_table);
// Remove symbols which are bound by variable expansions.
if (edge->IsVariable()) {
if (edge->lower_bound_) edge->lower_bound_->Accept(collector);
if (edge->upper_bound_) edge->upper_bound_->Accept(collector);
collector.symbols_.erase(symbol_table.at(*edge->inner_edge_));
collector.symbols_.erase(symbol_table.at(*edge->inner_node_));
if (edge->filter_expression_) edge->filter_expression_->Accept(collector);
}
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, false,
collector.symbols_, current_node});
};
for (const auto &pattern : patterns) {
if (pattern->atoms_.size() == 1U) {
auto *node = dynamic_cast<NodeAtom *>(pattern->atoms_[0]);
DCHECK(node) << "First pattern atom is not a node";
expansions.emplace_back(Expansion{node});
} else {
ForEachPattern(*pattern, ignore_node, collect_expansion);
}
}
return expansions;
}
// Fills the given Matching, by converting the Match patterns to normalized
// representation as Expansions. Filters used in the Match are also collected,
// as well as edge symbols which determine Cyphermorphism. Collecting filters
// will lift them out of a pattern and generate new expressions (just like they
// were in a Where clause).
void AddMatching(const std::vector<Pattern *> &patterns, Where *where,
SymbolTable &symbol_table, AstTreeStorage &storage,
Matching &matching) {
auto expansions = NormalizePatterns(symbol_table, patterns);
std::unordered_set<Symbol> edge_symbols;
for (const auto &expansion : expansions) {
// Matching may already have some expansions, so offset our index.
const int expansion_ix = matching.expansions.size();
// Map node1 symbol to expansion
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
matching.node_symbol_to_expansions[node1_sym].insert(expansion_ix);
// Add node1 to all symbols.
matching.expansion_symbols.insert(node1_sym);
if (expansion.edge) {
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
// Fill edge symbols for Cyphermorphism.
edge_symbols.insert(edge_sym);
// Map node2 symbol to expansion
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
matching.node_symbol_to_expansions[node2_sym].insert(expansion_ix);
// Add edge and node2 to all symbols
matching.expansion_symbols.insert(edge_sym);
matching.expansion_symbols.insert(node2_sym);
}
matching.expansions.push_back(expansion);
}
if (!edge_symbols.empty()) {
matching.edge_symbols.emplace_back(edge_symbols);
}
for (auto *pattern : patterns) {
matching.filters.CollectPatternFilters(*pattern, symbol_table, storage);
if (pattern->identifier_->user_declared_) {
std::vector<Symbol> path_elements;
for (auto *pattern_atom : pattern->atoms_)
path_elements.emplace_back(symbol_table.at(*pattern_atom->identifier_));
matching.named_paths.emplace(symbol_table.at(*pattern->identifier_),
std::move(path_elements));
}
}
if (where) {
matching.filters.CollectWhereFilter(*where, symbol_table);
}
}
void AddMatching(const Match &match, SymbolTable &symbol_table,
AstTreeStorage &storage, Matching &matching) {
return AddMatching(match.patterns_, match.where_, symbol_table, storage,
matching);
}
auto SplitExpressionOnAnd(Expression *expression) {
std::vector<Expression *> expressions;
std::stack<Expression *> pending_expressions;
pending_expressions.push(expression);
while (!pending_expressions.empty()) {
auto *current_expression = pending_expressions.top();
pending_expressions.pop();
if (auto *and_op = dynamic_cast<AndOperator *>(current_expression)) {
pending_expressions.push(and_op->expression1_);
pending_expressions.push(and_op->expression2_);
} else {
expressions.push_back(current_expression);
}
}
return expressions;
}
} // namespace
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table,
const Symbol &symbol,
const GraphDbTypes::Property &property,
Expression *value)
: symbol_(symbol), property_(property), value_(value) {
UsedSymbolsCollector collector(symbol_table);
value->Accept(collector);
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
PropertyFilter::PropertyFilter(
const SymbolTable &symbol_table, const Symbol &symbol,
const GraphDbTypes::Property &property,
const std::experimental::optional<PropertyFilter::Bound> &lower_bound,
const std::experimental::optional<PropertyFilter::Bound> &upper_bound)
: symbol_(symbol),
property_(property),
lower_bound_(lower_bound),
upper_bound_(upper_bound) {
UsedSymbolsCollector collector(symbol_table);
if (lower_bound) {
lower_bound->value()->Accept(collector);
}
if (upper_bound) {
upper_bound->value()->Accept(collector);
}
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
bool operator==(const PropertyFilter &a, const PropertyFilter &b) {
auto bound_eq = [](const auto &a_bound, const auto &b_bound) {
if (!a_bound && !b_bound) return true;
if (a_bound && b_bound)
return a_bound->value() == b_bound->value() &&
a_bound->type() == b_bound->type();
return false;
};
return a.symbol_ == b.symbol_ && a.property_ == b.property_ &&
a.is_symbol_in_value_ == b.is_symbol_in_value_ &&
a.value_ == b.value_ && bound_eq(a.lower_bound_, b.lower_bound_) &&
bound_eq(a.upper_bound_, b.upper_bound_);
}
bool operator==(const FilterInfo &a, const FilterInfo &b) {
return a.type == b.type && a.expression == b.expression &&
a.used_symbols == b.used_symbols && a.labels == b.labels &&
a.property_filter == b.property_filter;
}
void Filters::EraseFilter(const FilterInfo &filter) {
auto filter_it = std::find(all_filters_.begin(), all_filters_.end(), filter);
if (filter_it == all_filters_.end()) return;
all_filters_.erase(filter_it);
}
void Filters::EraseLabelFilter(const Symbol &symbol,
const GraphDbTypes::Label &label) {
for (auto filter_it = all_filters_.begin();
filter_it != all_filters_.end();) {
if (filter_it->type != FilterInfo::Type::Label) {
++filter_it;
continue;
}
if (!utils::Contains(filter_it->used_symbols, symbol)) {
++filter_it;
continue;
}
auto label_it =
std::find(filter_it->labels.begin(), filter_it->labels.end(), label);
if (label_it == filter_it->labels.end()) {
++filter_it;
continue;
}
filter_it->labels.erase(label_it);
DCHECK(!utils::Contains(filter_it->labels, label))
<< "Didn't expect duplicated labels";
if (filter_it->labels.empty()) {
// If there are no labels to filter, then erase the whole FilterInfo.
filter_it = all_filters_.erase(filter_it);
} else {
++filter_it;
}
}
}
void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
AstTreeStorage &storage) {
UsedSymbolsCollector collector(symbol_table);
auto add_properties_variable = [&](EdgeAtom *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
for (auto &prop_pair : atom->properties_) {
// We need to store two property-lookup filters in all_filters. One is
// used for inlining property filters into variable expansion, and
// utilizes the inner_edge symbol. The other is used for post-expansion
// filtering and does not use the inner_edge symbol, but the edge symbol
// (a list of edges).
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.emplace(symbol_table.at(*atom->inner_node_));
collector.symbols_.emplace(symbol_table.at(*atom->inner_edge_));
// First handle the inline property filter.
auto *property_lookup =
storage.Create<PropertyLookup>(atom->inner_edge_, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
// Currently, variable expand has no gains if we set PropertyFilter.
all_filters_.emplace_back(FilterInfo{FilterInfo::Type::Generic,
prop_equal, collector.symbols_});
}
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
// Now handle the post-expansion filter.
// Create a new identifier and a symbol which will be filled in All.
auto *identifier = atom->identifier_->Clone(storage);
symbol_table[*identifier] =
symbol_table.CreateSymbol(identifier->name_, false);
// Create an equality expression and store it in all_filters_.
auto *property_lookup =
storage.Create<PropertyLookup>(identifier, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
// Currently, variable expand has no gains if we set PropertyFilter.
all_filters_.emplace_back(
FilterInfo{FilterInfo::Type::Generic,
storage.Create<All>(identifier, atom->identifier_,
storage.Create<Where>(prop_equal)),
collector.symbols_});
}
}
};
auto add_properties = [&](auto *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
for (auto &prop_pair : atom->properties_) {
// Create an equality expression and store it in all_filters_.
auto *property_lookup =
storage.Create<PropertyLookup>(atom->identifier_, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
collector.symbols_.clear();
prop_equal->Accept(collector);
FilterInfo filter_info{FilterInfo::Type::Property, prop_equal,
collector.symbols_};
// Store a PropertyFilter on the value of the property.
filter_info.property_filter.emplace(
symbol_table, symbol, prop_pair.first.second, prop_pair.second);
all_filters_.emplace_back(filter_info);
}
};
auto add_node_filter = [&](NodeAtom *node) {
const auto &node_symbol = symbol_table.at(*node->identifier_);
if (!node->labels_.empty()) {
// Create a LabelsTest and store it.
auto *labels_test =
storage.Create<LabelsTest>(node->identifier_, node->labels_);
auto label_filter = FilterInfo{FilterInfo::Type::Label, labels_test,
std::unordered_set<Symbol>{node_symbol}};
label_filter.labels = node->labels_;
all_filters_.emplace_back(label_filter);
}
add_properties(node);
};
auto add_expand_filter = [&](NodeAtom *, EdgeAtom *edge, NodeAtom *node) {
if (edge->IsVariable())
add_properties_variable(edge);
else
add_properties(edge);
add_node_filter(node);
};
ForEachPattern(pattern, add_node_filter, add_expand_filter);
}
// Adds the where filter expression to `all_filters_` and collects additional
// information for potential property and label indexing.
void Filters::CollectWhereFilter(Where &where,
const SymbolTable &symbol_table) {
auto where_filters = SplitExpressionOnAnd(where.expression_);
for (const auto &filter : where_filters) {
all_filters_.emplace_back(AnalyzeFilter(filter, symbol_table));
}
}
// Analyzes the filter expression by collecting information on filtering labels
// and properties to be used with indexing.
FilterInfo Filters::AnalyzeFilter(Expression *expr,
const SymbolTable &symbol_table) {
using Bound = PropertyFilter::Bound;
// Create the base filter info.
FilterInfo filter{FilterInfo::Type::Generic, expr};
{
UsedSymbolsCollector collector(symbol_table);
expr->Accept(collector);
filter.used_symbols = collector.symbols_;
}
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
auto *&ident) {
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
(ident = dynamic_cast<Identifier *>(prop_lookup->expression_));
};
auto add_prop_equal = [&](auto *maybe_lookup, auto *val_expr) {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
filter.type = FilterInfo::Type::Property;
filter.property_filter =
PropertyFilter(symbol_table, symbol_table.at(*ident),
prop_lookup->property_, val_expr);
}
};
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(expr1, prop_lookup, ident)) {
// n.prop > value
filter.type = FilterInfo::Type::Property;
filter.property_filter.emplace(
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
Bound(expr2, bound_type), std::experimental::nullopt);
}
if (get_property_lookup(expr2, prop_lookup, ident)) {
// value > n.prop
filter.type = FilterInfo::Type::Property;
filter.property_filter.emplace(
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
std::experimental::nullopt, Bound(expr1, bound_type));
}
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
DCHECK(!dynamic_cast<AndOperator *>(expr))
<< "Expected AndOperators have been split.";
if (auto *labels_test = dynamic_cast<LabelsTest *>(expr)) {
// Since LabelsTest may contain any expression, we can only use the
// simplest test on an identifier.
if (auto *ident = dynamic_cast<Identifier *>(labels_test->expression_)) {
filter.type = FilterInfo::Type::Label;
filter.labels = labels_test->labels_;
}
} else if (auto *eq = dynamic_cast<EqualOperator *>(expr)) {
// Try to get property equality test from the top expressions.
// Unfortunately, we cannot go deeper inside Equal, because chained equals
// need not correspond to And. For example, `(n.prop = value) = false)`:
// EQ
// / \
// EQ false -- top expressions
// / \
// n.prop value
// Here the `prop` may be different than `value` resulting in `false`. This
// would compare with the top level `false`, producing `true`. Therefore, it
// is incorrect to pick up `n.prop = value` for scanning by property index.
add_prop_equal(eq->expression1_, eq->expression2_);
// And reversed.
add_prop_equal(eq->expression2_, eq->expression1_);
// TODO: What about n.prop = m.prop case? Do we generate 2 PropertyFilters?
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
add_prop_greater(gt->expression1_, gt->expression2_,
Bound::Type::EXCLUSIVE);
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
add_prop_greater(ge->expression1_, ge->expression2_,
Bound::Type::INCLUSIVE);
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
// Like greater, but in reverse.
add_prop_greater(lt->expression2_, lt->expression1_,
Bound::Type::EXCLUSIVE);
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
// Like greater equal, but in reverse.
add_prop_greater(le->expression2_, le->expression1_,
Bound::Type::INCLUSIVE);
}
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
// indexing by range. Note, that the generated Ast uses AND for chained
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
// as `expr1 < n.prop AND n.prop < expr2`.
return filter;
}
// Converts a Query to multiple QueryParts. In the process new Ast nodes may be
// created, e.g. filter expressions.
std::vector<QueryPart> CollectQueryParts(SymbolTable &symbol_table,
AstTreeStorage &storage) {
auto query = storage.query();
std::vector<QueryPart> query_parts(1);
auto *query_part = &query_parts.back();
for (auto &clause : query->clauses_) {
if (auto *match = dynamic_cast<Match *>(clause)) {
if (match->optional_) {
query_part->optional_matching.emplace_back(Matching{});
AddMatching(*match, symbol_table, storage,
query_part->optional_matching.back());
} else {
DCHECK(query_part->optional_matching.empty())
<< "Match clause cannot follow optional match.";
AddMatching(*match, symbol_table, storage, query_part->matching);
}
} else {
query_part->remaining_clauses.push_back(clause);
if (auto *merge = dynamic_cast<query::Merge *>(clause)) {
query_part->merge_matching.emplace_back(Matching{});
AddMatching({merge->pattern_}, nullptr, symbol_table, storage,
query_part->merge_matching.back());
} else if (dynamic_cast<With *>(clause) ||
dynamic_cast<query::Unwind *>(clause)) {
// This query part is done, continue with a new one.
query_parts.emplace_back(QueryPart{});
query_part = &query_parts.back();
} else if (dynamic_cast<Return *>(clause)) {
// TODO: Support RETURN UNION ...
return query_parts;
}
}
}
return query_parts;
}
} // namespace query::plan

View File

@ -0,0 +1,234 @@
/// @file
#pragma once
#include <experimental/optional>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace query::plan {
/// Normalized representation of a pattern that needs to be matched.
struct Expansion {
/// The first node in the expansion, it can be a single node.
NodeAtom *node1 = nullptr;
/// Optional edge which connects the 2 nodes.
EdgeAtom *edge = nullptr;
/// Direction of the edge, it may be flipped compared to original
/// @c EdgeAtom during plan generation.
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
/// True if the direction and nodes were flipped.
bool is_flipped = false;
/// Set of symbols found inside the range expressions of a variable path edge.
std::unordered_set<Symbol> symbols_in_range{};
/// Optional node at the other end of an edge. If the expansion
/// contains an edge, then this node is required.
NodeAtom *node2 = nullptr;
};
/// Stores the symbols and expression used to filter a property.
class PropertyFilter {
public:
using Bound = ScanAllByLabelPropertyRange::Bound;
PropertyFilter(const SymbolTable &, const Symbol &,
const GraphDbTypes::Property &, Expression *);
PropertyFilter(const SymbolTable &, const Symbol &,
const GraphDbTypes::Property &,
const std::experimental::optional<Bound> &,
const std::experimental::optional<Bound> &);
/// Symbol whose property is looked up.
Symbol symbol_;
GraphDbTypes::Property property_;
/// True if the same symbol is used in expressions for value or bounds.
bool is_symbol_in_value_ = false;
/// Expression which when evaluated produces the value a property must
/// equal.
Expression *value_ = nullptr;
/// Expressions which produce lower and upper bounds for a property.
std::experimental::optional<Bound> lower_bound_{};
std::experimental::optional<Bound> upper_bound_{};
};
bool operator==(const PropertyFilter &, const PropertyFilter &);
inline bool operator!=(const PropertyFilter &a, const PropertyFilter &b) {
return !(a == b);
}
/// Stores additional information for a filter expression.
struct FilterInfo {
/// A FilterInfo can be a generic filter expression or a specific filtering
/// applied for labels or a property. Non generic types contain extra
/// information which can be used to produce indexed scans of graph
/// elements.
enum class Type { Generic, Label, Property };
Type type;
/// The filter expression which must be satisfied.
Expression *expression;
/// Set of used symbols by the filter @c expression.
std::unordered_set<Symbol> used_symbols;
/// Labels for Type::Label filtering.
std::vector<GraphDbTypes::Label> labels;
/// Property information for Type::Property filtering.
std::experimental::optional<PropertyFilter> property_filter;
};
bool operator==(const FilterInfo &, const FilterInfo &);
inline bool operator!=(const FilterInfo &a, const FilterInfo &b) {
return !(a == b);
}
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
///
/// Info is stored as a list of FilterInfo objects corresponding to all filter
/// expressions that should be generated.
class Filters {
public:
using iterator = std::vector<FilterInfo>::iterator;
using const_iterator = std::vector<FilterInfo>::const_iterator;
auto begin() { return all_filters_.begin(); }
auto begin() const { return all_filters_.begin(); }
auto end() { return all_filters_.end(); }
auto end() const { return all_filters_.end(); }
auto empty() const { return all_filters_.empty(); }
auto erase(iterator pos) { return all_filters_.erase(pos); }
auto erase(const_iterator pos) { return all_filters_.erase(pos); }
auto erase(iterator first, iterator last) {
return all_filters_.erase(first, last);
}
auto erase(const_iterator first, const_iterator last) {
return all_filters_.erase(first, last);
}
auto FilteredLabels(const Symbol &symbol) const {
std::unordered_set<GraphDbTypes::Label> labels;
for (const auto &filter : all_filters_) {
if (filter.type == FilterInfo::Type::Label &&
utils::Contains(filter.used_symbols, symbol)) {
DCHECK(filter.used_symbols.size() == 1U)
<< "Expected a single used symbol for label filter";
labels.insert(filter.labels.begin(), filter.labels.end());
}
}
return labels;
}
// Remove a filter; may invalidate iterators.
void EraseFilter(const FilterInfo &);
// Remove a label filter for symbol; may invalidate iterators.
void EraseLabelFilter(const Symbol &, const GraphDbTypes::Label &);
// Returns a vector of FilterInfo for properties.
auto PropertyFilters(const Symbol &symbol) const {
std::vector<FilterInfo> filters;
for (const auto &filter : all_filters_) {
if (filter.type == FilterInfo::Type::Property) {
filters.push_back(filter);
}
}
return filters;
}
/// Collects filtering information from a pattern.
///
/// Goes through all the atoms in a pattern and generates filter expressions
/// for found labels, properties and edge types. The generated expressions are
/// stored.
void CollectPatternFilters(Pattern &, SymbolTable &, AstTreeStorage &);
/// Collects filtering information from a where expression.
///
/// Takes the where expression and stores it, then analyzes the expression for
/// additional information. The additional information is used to populate
/// label filters and property filters, so that indexed scanning can use it.
void CollectWhereFilter(Where &, const SymbolTable &);
private:
FilterInfo AnalyzeFilter(Expression *, const SymbolTable &);
std::vector<FilterInfo> all_filters_;
};
/// Normalized representation of a single or multiple Match clauses.
///
/// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
/// WHERE c.prop < 42` will produce the following.
/// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and
/// `(n) -[e3]- (m)`.
/// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
/// first `MATCH` and the set `{e3}` for the second.
/// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
/// other obtained from `WHERE` on symbol `c`.
struct Matching {
/// All expansions that need to be performed across @c Match clauses.
std::vector<Expansion> expansions;
/// Symbols for edges established in match, used to ensure Cyphermorphism.
///
/// There are multiple sets, because each Match clause determines a single
/// set.
std::vector<std::unordered_set<Symbol>> edge_symbols;
/// Information on used filter expressions while matching.
Filters filters;
/// Maps node symbols to expansions which bind them.
std::unordered_map<Symbol, std::set<int>> node_symbol_to_expansions{};
/// Maps named path symbols to a vector of Symbols that define its pattern.
std::unordered_map<Symbol, std::vector<Symbol>> named_paths{};
/// All node and edge symbols across all expansions (from all matches).
std::unordered_set<Symbol> expansion_symbols{};
};
/// @brief Represents a read (+ write) part of a query. Parts are split on
/// `WITH` clauses.
///
/// Each part ends with either:
///
/// * `RETURN` clause;
/// * `WITH` clause or
/// * any of the write clauses.
///
/// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
/// generated QueryPart will have `matching` generated for the `MATCH`.
/// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
/// in that exact order. The pattern inside the first `MERGE` will be used to
/// generate the first `merge_matching` element, and the second `MERGE` pattern
/// will produce the second `merge_matching` element. This way, if someone
/// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
/// in the same order as their respective `merge_matching` elements.
struct QueryPart {
/// @brief All `MATCH` clauses merged into one @c Matching.
Matching matching;
/// @brief Each `OPTIONAL MATCH` converted to @c Matching.
std::vector<Matching> optional_matching{};
/// @brief @c Matching for each `MERGE` clause.
///
/// Storing the normalized pattern of a @c Merge does not preclude storing the
/// @c Merge clause itself inside `remaining_clauses`. The reason is that we
/// need to have access to other parts of the clause, such as `SET` clauses
/// which need to be run.
///
/// Since @c Merge is contained in `remaining_clauses`, this vector contains
/// matching in the same order as @c Merge appears.
std::vector<Matching> merge_matching{};
/// @brief All the remaining clauses (without @c Match).
std::vector<Clause *> remaining_clauses{};
};
/// @brief Convert the AST to multiple @c QueryParts.
///
/// This function will normalize patterns inside @c Match and @c Merge clauses
/// and do some other preprocessing in order to generate multiple @c QueryPart
/// structures. @c AstTreeStorage and @c SymbolTable may be used to create new
/// AST nodes.
std::vector<QueryPart> CollectQueryParts(SymbolTable &, AstTreeStorage &);
} // namespace query::plan

View File

@ -62,27 +62,6 @@ auto ReducePattern(
return last_res;
}
void ForEachPattern(
Pattern &pattern, std::function<void(NodeAtom *)> base,
std::function<void(NodeAtom *, EdgeAtom *, NodeAtom *)> collect) {
DCHECK(!pattern.atoms_.empty()) << "Missing atoms in pattern";
auto atoms_it = pattern.atoms_.begin();
auto current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
DCHECK(current_node) << "First pattern atom is not a node";
base(current_node);
// Remaining atoms need to follow sequentially as (EdgeAtom, NodeAtom)*
while (atoms_it != pattern.atoms_.end()) {
auto edge = dynamic_cast<EdgeAtom *>(*atoms_it++);
DCHECK(edge) << "Expected an edge atom in pattern.";
DCHECK(atoms_it != pattern.atoms_.end())
<< "Edge atom should not end the pattern.";
auto prev_node = current_node;
current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
DCHECK(current_node) << "Expected a node atom in pattern.";
collect(prev_node, edge, current_node);
}
}
auto GenCreate(Create &create, LogicalOperator *input_op,
const SymbolTable &symbol_table,
std::unordered_set<Symbol> &bound_symbols) {
@ -94,38 +73,8 @@ auto GenCreate(Create &create, LogicalOperator *input_op,
return last_op;
}
// Collects symbols from identifiers found in visited AST nodes.
class UsedSymbolsCollector : public HierarchicalTreeVisitor {
public:
explicit UsedSymbolsCollector(const SymbolTable &symbol_table)
: symbol_table_(symbol_table) {}
using HierarchicalTreeVisitor::PostVisit;
using HierarchicalTreeVisitor::PreVisit;
using HierarchicalTreeVisitor::Visit;
bool PostVisit(All &all) override {
// Remove the symbol which is bound by all, because we are only interested
// in free (unbound) symbols.
symbols_.erase(symbol_table_.at(*all.identifier_));
return true;
}
bool Visit(Identifier &ident) override {
symbols_.insert(symbol_table_.at(ident));
return true;
}
bool Visit(PrimitiveLiteral &) override { return true; }
bool Visit(ParameterLookup &) override { return true; }
bool Visit(query::CreateIndex &) override { return true; }
std::unordered_set<Symbol> symbols_;
const SymbolTable &symbol_table_;
};
bool HasBoundFilterSymbols(const std::unordered_set<Symbol> &bound_symbols,
const Filters::FilterInfo &filter) {
const FilterInfo &filter) {
for (const auto &symbol : filter.used_symbols) {
if (bound_symbols.find(symbol) == bound_symbols.end()) {
return false;
@ -505,137 +454,18 @@ auto GenReturnBody(LogicalOperator *input_op, bool advance_command,
return last_op;
}
// Converts multiple Patterns to Expansions. Each Pattern can contain an
// arbitrarily long chain of nodes and edges. The conversion to an Expansion is
// done by splitting a pattern into triplets (node1, edge, node2). The triplets
// conserve the semantics of the pattern. For example, in a pattern:
// (m) -[e]- (n) -[f]- (o) the same can be achieved with:
// (m) -[e]- (n), (n) -[f]- (o).
// This representation makes it easier to permute from which node or edge we
// want to start expanding.
std::vector<Expansion> NormalizePatterns(
const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
std::vector<Expansion> expansions;
auto ignore_node = [&](auto *) {};
auto collect_expansion = [&](auto *prev_node, auto *edge,
auto *current_node) {
UsedSymbolsCollector collector(symbol_table);
// Remove symbols which are bound by variable expansions.
if (edge->IsVariable()) {
if (edge->lower_bound_) edge->lower_bound_->Accept(collector);
if (edge->upper_bound_) edge->upper_bound_->Accept(collector);
collector.symbols_.erase(symbol_table.at(*edge->inner_edge_));
collector.symbols_.erase(symbol_table.at(*edge->inner_node_));
if (edge->filter_expression_) edge->filter_expression_->Accept(collector);
}
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, false,
collector.symbols_, current_node});
};
for (const auto &pattern : patterns) {
if (pattern->atoms_.size() == 1U) {
auto *node = dynamic_cast<NodeAtom *>(pattern->atoms_[0]);
DCHECK(node) << "First pattern atom is not a node";
expansions.emplace_back(Expansion{node});
} else {
ForEachPattern(*pattern, ignore_node, collect_expansion);
}
}
return expansions;
}
// Fills the given Matching, by converting the Match patterns to normalized
// representation as Expansions. Filters used in the Match are also collected,
// as well as edge symbols which determine Cyphermorphism. Collecting filters
// will lift them out of a pattern and generate new expressions (just like they
// were in a Where clause).
void AddMatching(const std::vector<Pattern *> &patterns, Where *where,
SymbolTable &symbol_table, AstTreeStorage &storage,
Matching &matching) {
auto expansions = NormalizePatterns(symbol_table, patterns);
std::unordered_set<Symbol> edge_symbols;
for (const auto &expansion : expansions) {
// Matching may already have some expansions, so offset our index.
const int expansion_ix = matching.expansions.size();
// Map node1 symbol to expansion
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
matching.node_symbol_to_expansions[node1_sym].insert(expansion_ix);
// Add node1 to all symbols.
matching.expansion_symbols.insert(node1_sym);
if (expansion.edge) {
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
// Fill edge symbols for Cyphermorphism.
edge_symbols.insert(edge_sym);
// Map node2 symbol to expansion
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
matching.node_symbol_to_expansions[node2_sym].insert(expansion_ix);
// Add edge and node2 to all symbols
matching.expansion_symbols.insert(edge_sym);
matching.expansion_symbols.insert(node2_sym);
}
matching.expansions.push_back(expansion);
}
if (!edge_symbols.empty()) {
matching.edge_symbols.emplace_back(edge_symbols);
}
for (auto *pattern : patterns) {
matching.filters.CollectPatternFilters(*pattern, symbol_table, storage);
if (pattern->identifier_->user_declared_) {
std::vector<Symbol> path_elements;
for (auto *pattern_atom : pattern->atoms_)
path_elements.emplace_back(symbol_table.at(*pattern_atom->identifier_));
matching.named_paths.emplace(symbol_table.at(*pattern->identifier_),
std::move(path_elements));
}
}
if (where) {
matching.filters.CollectWhereFilter(*where, symbol_table);
}
}
void AddMatching(const Match &match, SymbolTable &symbol_table,
AstTreeStorage &storage, Matching &matching) {
return AddMatching(match.patterns_, match.where_, symbol_table, storage,
matching);
}
auto SplitExpressionOnAnd(Expression *expression) {
std::vector<Expression *> expressions;
std::stack<Expression *> pending_expressions;
pending_expressions.push(expression);
while (!pending_expressions.empty()) {
auto *current_expression = pending_expressions.top();
pending_expressions.pop();
if (auto *and_op = dynamic_cast<AndOperator *>(current_expression)) {
pending_expressions.push(and_op->expression1_);
pending_expressions.push(and_op->expression2_);
} else {
expressions.push_back(current_expression);
}
}
return expressions;
}
} // namespace
namespace impl {
// Returns false if the symbol was already bound, otherwise binds it and
// returns true.
bool BindSymbol(std::unordered_set<Symbol> &bound_symbols,
const Symbol &symbol) {
auto insertion = bound_symbols.insert(symbol);
return insertion.second;
}
Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
std::vector<Filters::FilterInfo> &all_filters,
AstTreeStorage &storage) {
Filters &filters, AstTreeStorage &storage) {
Expression *filter_expr = nullptr;
for (auto filters_it = all_filters.begin();
filters_it != all_filters.end();) {
for (auto filters_it = filters.begin(); filters_it != filters.end();) {
if (HasBoundFilterSymbols(bound_symbols, *filters_it)) {
filter_expr = impl::BoolJoin<AndOperator>(storage, filter_expr,
filters_it->expression);
filters_it = all_filters.erase(filters_it);
filters_it = filters.erase(filters_it);
} else {
filters_it++;
}
@ -645,9 +475,8 @@ Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
LogicalOperator *GenFilters(LogicalOperator *last_op,
const std::unordered_set<Symbol> &bound_symbols,
std::vector<Filters::FilterInfo> &all_filters,
AstTreeStorage &storage) {
auto *filter_expr = ExtractFilters(bound_symbols, all_filters, storage);
Filters &filters, AstTreeStorage &storage) {
auto *filter_expr = ExtractFilters(bound_symbols, filters, storage);
if (filter_expr) {
last_op =
new Filter(std::shared_ptr<LogicalOperator>(last_op), filter_expr);
@ -700,7 +529,7 @@ LogicalOperator *GenCreateForPattern(
const SymbolTable &symbol_table,
std::unordered_set<Symbol> &bound_symbols) {
auto base = [&](NodeAtom *node) -> LogicalOperator * {
if (BindSymbol(bound_symbols, symbol_table.at(*node->identifier_)))
if (bound_symbols.insert(symbol_table.at(*node->identifier_)).second)
return new CreateNode(node, std::shared_ptr<LogicalOperator>(input_op));
else
return input_op;
@ -713,10 +542,10 @@ LogicalOperator *GenCreateForPattern(
// If the expand node was already bound, then we need to indicate this,
// so that CreateExpand only creates an edge.
bool node_existing = false;
if (!BindSymbol(bound_symbols, symbol_table.at(*node->identifier_))) {
if (!bound_symbols.insert(symbol_table.at(*node->identifier_)).second) {
node_existing = true;
}
if (!BindSymbol(bound_symbols, symbol_table.at(*edge->identifier_))) {
if (!bound_symbols.insert(symbol_table.at(*edge->identifier_)).second) {
LOG(FATAL) << "Symbols used for created edges cannot be redeclared.";
}
return new CreateExpand(node, edge,
@ -792,238 +621,11 @@ LogicalOperator *GenWith(With &with, LogicalOperator *input_op,
// Reset bound symbols, so that only those in WITH are exposed.
bound_symbols.clear();
for (const auto &symbol : body.output_symbols()) {
BindSymbol(bound_symbols, symbol);
bound_symbols.insert(symbol);
}
return last_op;
}
} // namespace impl
// Analyzes the filter expression by collecting information on filtering labels
// and properties to be used with indexing. Note that `all_filters_` are never
// updated here, but only `label_filters_` and `property_filters_` are.
void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
using Bound = ScanAllByLabelPropertyRange::Bound;
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
auto *&ident) {
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
(ident = dynamic_cast<Identifier *>(prop_lookup->expression_));
};
auto add_prop_equal = [&](auto *maybe_lookup, auto *val_expr) {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
UsedSymbolsCollector collector(symbol_table);
val_expr->Accept(collector);
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
.emplace_back(PropertyFilter{collector.symbols_, val_expr});
}
};
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(expr1, prop_lookup, ident)) {
// n.prop > value
UsedSymbolsCollector collector(symbol_table);
expr2->Accept(collector);
auto prop_filter = PropertyFilter{collector.symbols_};
prop_filter.lower_bound = Bound{expr2, bound_type};
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
.emplace_back(std::move(prop_filter));
}
if (get_property_lookup(expr2, prop_lookup, ident)) {
// value > n.prop
UsedSymbolsCollector collector(symbol_table);
expr1->Accept(collector);
auto prop_filter = PropertyFilter{collector.symbols_};
prop_filter.upper_bound = Bound{expr1, bound_type};
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
.emplace_back(std::move(prop_filter));
}
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
if (auto *and_op = dynamic_cast<AndOperator *>(expr)) {
AnalyzeFilter(and_op->expression1_, symbol_table);
AnalyzeFilter(and_op->expression2_, symbol_table);
} else if (auto *labels_test = dynamic_cast<LabelsTest *>(expr)) {
// Since LabelsTest may contain any expression, we can only use the
// simplest test on an identifier.
if (auto *ident = dynamic_cast<Identifier *>(labels_test->expression_)) {
const auto &symbol = symbol_table.at(*ident);
label_filters_[symbol].insert(labels_test->labels_.begin(),
labels_test->labels_.end());
}
} else if (auto *eq = dynamic_cast<EqualOperator *>(expr)) {
// Try to get property equality test from the top expressions.
// Unfortunately, we cannot go deeper inside Equal, because chained equals
// need not correspond to And. For example, `(n.prop = value) = false)`:
// EQ
// / \
// EQ false -- top expressions
// / \
// n.prop value
// Here the `prop` may be different than `value` resulting in `false`. This
// would compare with the top level `false`, producing `true`. Therefore, it
// is incorrect to pick up `n.prop = value` for scanning by property index.
add_prop_equal(eq->expression1_, eq->expression2_);
// And reversed.
add_prop_equal(eq->expression2_, eq->expression1_);
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
add_prop_greater(gt->expression1_, gt->expression2_,
Bound::Type::EXCLUSIVE);
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
add_prop_greater(ge->expression1_, ge->expression2_,
Bound::Type::INCLUSIVE);
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
// Like greater, but in reverse.
add_prop_greater(lt->expression2_, lt->expression1_,
Bound::Type::EXCLUSIVE);
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
// Like greater equal, but in reverse.
add_prop_greater(le->expression2_, le->expression1_,
Bound::Type::INCLUSIVE);
}
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
// indexing by range. Note, that the generated Ast uses AND for chained
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
// as `expr1 < n.prop AND n.prop < expr2`.
}
void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
AstTreeStorage &storage) {
UsedSymbolsCollector collector(symbol_table);
auto add_properties_variable = [&](EdgeAtom *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
for (auto &prop_pair : atom->properties_) {
// We need to store two property-lookup filters in all_filters. One is
// used for inlining property filters into variable expansion, and
// utilizes the inner_edge symbol. The other is used for post-expansion
// filtering and does not use the inner_edge symbol, but the edge symbol
// (a list of edges).
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.emplace(symbol_table.at(*atom->inner_node_));
collector.symbols_.emplace(symbol_table.at(*atom->inner_edge_));
// First handle the inline property filter.
auto *property_lookup =
storage.Create<PropertyLookup>(atom->inner_edge_, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
all_filters_.emplace_back(FilterInfo{prop_equal, collector.symbols_});
}
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
// Now handle the post-expansion filter.
// Create a new identifier and a symbol which will be filled in All.
auto *identifier = atom->identifier_->Clone(storage);
symbol_table[*identifier] =
symbol_table.CreateSymbol(identifier->name_, false);
// Create an equality expression and store it in all_filters_.
auto *property_lookup =
storage.Create<PropertyLookup>(identifier, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
all_filters_.emplace_back(
FilterInfo{storage.Create<All>(identifier, atom->identifier_,
storage.Create<Where>(prop_equal)),
collector.symbols_});
}
}
};
auto add_properties = [&](auto *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
for (auto &prop_pair : atom->properties_) {
collector.symbols_.clear();
prop_pair.second->Accept(collector);
// Store a PropertyFilter on the value of the property.
property_filters_[symbol][prop_pair.first.second].emplace_back(
PropertyFilter{collector.symbols_, prop_pair.second});
// Create an equality expression and store it in all_filters_.
auto *property_lookup =
storage.Create<PropertyLookup>(atom->identifier_, prop_pair.first);
auto *prop_equal =
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
all_filters_.emplace_back(FilterInfo{prop_equal, collector.symbols_});
}
};
auto add_node_filter = [&](NodeAtom *node) {
const auto &node_symbol = symbol_table.at(*node->identifier_);
if (!node->labels_.empty()) {
// Store the filtered labels.
label_filters_[node_symbol].insert(node->labels_.begin(),
node->labels_.end());
// Create a LabelsTest and store it in all_filters_.
all_filters_.emplace_back(FilterInfo{
storage.Create<LabelsTest>(node->identifier_, node->labels_),
std::unordered_set<Symbol>{node_symbol}});
}
add_properties(node);
};
auto add_expand_filter = [&](NodeAtom *, EdgeAtom *edge, NodeAtom *node) {
if (edge->IsVariable())
add_properties_variable(edge);
else
add_properties(edge);
add_node_filter(node);
};
ForEachPattern(pattern, add_node_filter, add_expand_filter);
}
// Adds the where filter expression to `all_filters_` and collects additional
// information for potential property and label indexing.
void Filters::CollectWhereFilter(Where &where,
const SymbolTable &symbol_table) {
auto where_filters = SplitExpressionOnAnd(where.expression_);
for (const auto &filter : where_filters) {
UsedSymbolsCollector collector(symbol_table);
filter->Accept(collector);
all_filters_.emplace_back(FilterInfo{filter, collector.symbols_});
AnalyzeFilter(filter, symbol_table);
}
}
// Converts a Query to multiple QueryParts. In the process new Ast nodes may be
// created, e.g. filter expressions.
std::vector<QueryPart> CollectQueryParts(SymbolTable &symbol_table,
AstTreeStorage &storage) {
auto query = storage.query();
std::vector<QueryPart> query_parts(1);
auto *query_part = &query_parts.back();
for (auto &clause : query->clauses_) {
if (auto *match = dynamic_cast<Match *>(clause)) {
if (match->optional_) {
query_part->optional_matching.emplace_back(Matching{});
AddMatching(*match, symbol_table, storage,
query_part->optional_matching.back());
} else {
DCHECK(query_part->optional_matching.empty())
<< "Match clause cannot follow optional match.";
AddMatching(*match, symbol_table, storage, query_part->matching);
}
} else {
query_part->remaining_clauses.push_back(clause);
if (auto *merge = dynamic_cast<query::Merge *>(clause)) {
query_part->merge_matching.emplace_back(Matching{});
AddMatching({merge->pattern_}, nullptr, symbol_table, storage,
query_part->merge_matching.back());
} else if (dynamic_cast<With *>(clause) ||
dynamic_cast<query::Unwind *>(clause)) {
// This query part is done, continue with a new one.
query_parts.emplace_back(QueryPart{});
query_part = &query_parts.back();
} else if (dynamic_cast<Return *>(clause)) {
// TODO: Support RETURN UNION ...
return query_parts;
}
}
}
return query_parts;
}
} // namespace query::plan

View File

@ -7,159 +7,12 @@
#include "query/frontend/ast/ast.hpp"
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
DECLARE_int64(query_vertex_count_to_expand_existing);
namespace query::plan {
/// Normalized representation of a pattern that needs to be matched.
struct Expansion {
/// The first node in the expansion, it can be a single node.
NodeAtom *node1 = nullptr;
/// Optional edge which connects the 2 nodes.
EdgeAtom *edge = nullptr;
/// Direction of the edge, it may be flipped compared to original
/// @c EdgeAtom during plan generation.
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
/// True if the direction and nodes were flipped.
bool is_flipped = false;
/// Set of symbols found inside the range expressions of a variable path edge.
std::unordered_set<Symbol> symbols_in_range{};
/// Optional node at the other end of an edge. If the expansion
/// contains an edge, then this node is required.
NodeAtom *node2 = nullptr;
};
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
class Filters {
public:
/// Stores the symbols and expression used to filter a property.
struct PropertyFilter {
using Bound = ScanAllByLabelPropertyRange::Bound;
/// Set of used symbols in the @c expression.
std::unordered_set<Symbol> used_symbols;
/// Expression which when evaluated produces the value a property must
/// equal.
Expression *expression = nullptr;
std::experimental::optional<Bound> lower_bound{};
std::experimental::optional<Bound> upper_bound{};
};
/// Stores additional information for a filter expression.
struct FilterInfo {
/// The filter expression which must be satisfied.
Expression *expression;
/// Set of used symbols by the filter @c expression.
std::unordered_set<Symbol> used_symbols;
};
/// List of FilterInfo objects corresponding to all filter expressions that
/// should be generated.
auto &all_filters() { return all_filters_; }
const auto &all_filters() const { return all_filters_; }
/// Mapping from a symbol to labels that are filtered on it. These should be
/// used only for generating indexed scans.
const auto &label_filters() const { return label_filters_; }
/// Mapping from a symbol to edge types that are filtered on it. These should
/// be used for generating indexed expansions.
const auto &edge_type_filters() const { return edge_type_filters_; }
/// Mapping from a symbol to properties that are filtered on it. These should
/// be used only for generating indexed scans.
const auto &property_filters() const { return property_filters_; }
/// Collects filtering information from a pattern.
///
/// Goes through all the atoms in a pattern and generates filter expressions
/// for found labels, properties and edge types. The generated expressions are
/// stored in @c all_filters. Also, @c label_filters and @c property_filters
/// are populated.
void CollectPatternFilters(Pattern &, SymbolTable &, AstTreeStorage &);
/// Collects filtering information from a where expression.
///
/// Takes the where expression and stores it in @c all_filters, then analyzes
/// the expression for additional information. The additional information is
/// used to populate @c label_filters and @c property_filters, so that indexed
/// scanning can use it.
void CollectWhereFilter(Where &, const SymbolTable &);
private:
void AnalyzeFilter(Expression *, const SymbolTable &);
std::vector<FilterInfo> all_filters_;
std::unordered_map<Symbol, std::unordered_set<GraphDbTypes::Label>>
label_filters_;
std::unordered_map<Symbol, std::unordered_set<GraphDbTypes::EdgeType>>
edge_type_filters_;
std::unordered_map<Symbol, std::unordered_map<GraphDbTypes::Property,
std::vector<PropertyFilter>>>
property_filters_;
};
/// Normalized representation of a single or multiple Match clauses.
///
/// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
/// WHERE c.prop < 42` will produce the following.
/// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and
/// `(n) -[e3]- (m)`.
/// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
/// first `MATCH` and the set `{e3}` for the second.
/// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
/// other obtained from `WHERE` on symbol `c`.
struct Matching {
/// All expansions that need to be performed across @c Match clauses.
std::vector<Expansion> expansions;
/// Symbols for edges established in match, used to ensure Cyphermorphism.
///
/// There are multiple sets, because each Match clause determines a single
/// set.
std::vector<std::unordered_set<Symbol>> edge_symbols;
/// Information on used filter expressions while matching.
Filters filters;
/// Maps node symbols to expansions which bind them.
std::unordered_map<Symbol, std::set<int>> node_symbol_to_expansions{};
/// Maps named path symbols to a vector of Symbols that define its pattern.
std::unordered_map<Symbol, std::vector<Symbol>> named_paths{};
/// All node and edge symbols across all expansions (from all matches).
std::unordered_set<Symbol> expansion_symbols{};
};
/// @brief Represents a read (+ write) part of a query. Parts are split on
/// `WITH` clauses.
///
/// Each part ends with either:
///
/// * `RETURN` clause;
/// * `WITH` clause or
/// * any of the write clauses.
///
/// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
/// generated QueryPart will have `matching` generated for the `MATCH`.
/// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
/// in that exact order. The pattern inside the first `MERGE` will be used to
/// generate the first `merge_matching` element, and the second `MERGE` pattern
/// will produce the second `merge_matching` element. This way, if someone
/// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
/// in the same order as their respective `merge_matching` elements.
struct QueryPart {
/// @brief All `MATCH` clauses merged into one @c Matching.
Matching matching;
/// @brief Each `OPTIONAL MATCH` converted to @c Matching.
std::vector<Matching> optional_matching{};
/// @brief @c Matching for each `MERGE` clause.
///
/// Storing the normalized pattern of a @c Merge does not preclude storing the
/// @c Merge clause itself inside `remaining_clauses`. The reason is that we
/// need to have access to other parts of the clause, such as `SET` clauses
/// which need to be run.
///
/// Since @c Merge is contained in `remaining_clauses`, this vector contains
/// matching in the same order as @c Merge appears.
std::vector<Matching> merge_matching{};
/// @brief All the remaining clauses (without @c Match).
std::vector<Clause *> remaining_clauses{};
};
/// @brief Context which contains variables commonly used during planning.
template <class TDbAccessor>
struct PlanningContext {
@ -205,40 +58,26 @@ struct MatchContext {
std::vector<Symbol> new_symbols{};
};
/// @brief Convert the AST to multiple @c QueryParts.
///
/// This function will normalize patterns inside @c Match and @c Merge clauses
/// and do some other preprocessing in order to generate multiple @c QueryPart
/// structures. @c AstTreeStorage and @c SymbolTable may be used to create new
/// AST nodes.
std::vector<QueryPart> CollectQueryParts(SymbolTable &, AstTreeStorage &);
namespace impl {
// These functions are an internal implementation of RuleBasedPlanner. To avoid
// writing the whole code inline in this header file, they are declared here and
// defined in the cpp file.
bool BindSymbol(std::unordered_set<Symbol> &bound_symbols,
const Symbol &symbol);
// Iterates over `Filters` joining them in one expression via
// `AndOperator` if symbols they use are bound.. All the joined filters are
// removed from `Filters`.
Expression *ExtractFilters(const std::unordered_set<Symbol> &, Filters &,
AstTreeStorage &);
// Iterates over `all_filters` joining them in one expression via
// `AndOperator`. Filters which use unbound symbols are skipped
// The function takes a single argument, `FilterInfo`. All the joined filters
// are removed from `all_filters`.
Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
std::vector<Filters::FilterInfo> &all_filters,
AstTreeStorage &storage);
LogicalOperator *GenFilters(LogicalOperator *,
const std::unordered_set<Symbol> &, Filters &,
AstTreeStorage &);
LogicalOperator *GenFilters(LogicalOperator *last_op,
const std::unordered_set<Symbol> &bound_symbols,
std::vector<Filters::FilterInfo> &all_filters,
AstTreeStorage &storage);
//
/// For all given `named_paths` checks if the all it's symbols have been bound.
/// If so it creates a logical operator for named path generation, binds it's
/// symbol, removes that path from the collection of unhandled ones and returns
/// the new op. Otherwise it returns nullptr.
// For all given `named_paths` checks if all its symbols have been bound.
// If so, it creates a logical operator for named path generation, binds its
// symbol, removes that path from the collection of unhandled ones and returns
// the new op. Otherwise, returns `nullptr`.
LogicalOperator *GenNamedPaths(
LogicalOperator *last_op, std::unordered_set<Symbol> &bound_symbols,
std::unordered_map<Symbol, std::vector<Symbol>> &named_paths);
@ -306,7 +145,7 @@ class RuleBasedPlanner {
}
int merge_id = 0;
for (auto &clause : query_part.remaining_clauses) {
DCHECK(dynamic_cast<Match *>(clause) == nullptr)
DCHECK(!dynamic_cast<Match *>(clause))
<< "Unexpected Match in remaining clauses";
if (auto *ret = dynamic_cast<Return *>(clause)) {
input_op =
@ -332,7 +171,7 @@ class RuleBasedPlanner {
} else if (auto *unwind = dynamic_cast<query::Unwind *>(clause)) {
const auto &symbol =
context.symbol_table.at(*unwind->named_expression_);
impl::BindSymbol(context.bound_symbols, symbol);
context.bound_symbols.insert(symbol);
input_op =
new plan::Unwind(std::shared_ptr<LogicalOperator>(input_op),
unwind->named_expression_->expression_, symbol);
@ -352,59 +191,49 @@ class RuleBasedPlanner {
private:
TPlanningContext &context_;
struct LabelPropertyIndex {
GraphDbTypes::Label label;
// FilterInfo with PropertyFilter.
FilterInfo filter;
int64_t vertex_count;
};
// Finds the label-property combination which has indexed the lowest amount of
// vertices. `best_label` and `best_property` will be set to that combination
// and the function will return (`true`, vertex count in index). If the index
// cannot be found, the function will return (`false`, maximum int64_t), while
// leaving `best_label` and `best_property` unchanged.
std::pair<bool, int64_t> FindBestLabelPropertyIndex(
const std::unordered_set<GraphDbTypes::Label> &labels,
const std::unordered_map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>>
&property_filters,
const Symbol &symbol, const std::unordered_set<Symbol> &bound_symbols,
GraphDbTypes::Label &best_label,
std::pair<GraphDbTypes::Property, Filters::PropertyFilter>
&best_property) {
// vertices. If the index cannot be found, nullopt is returned.
std::experimental::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(
const Symbol &symbol, const Filters &filters,
const std::unordered_set<Symbol> &bound_symbols) {
auto are_bound = [&bound_symbols](const auto &used_symbols) {
for (const auto &used_symbol : used_symbols) {
if (bound_symbols.find(used_symbol) == bound_symbols.end()) {
if (!utils::Contains(bound_symbols, used_symbol)) {
return false;
}
}
return true;
};
bool found = false;
int64_t min_count = std::numeric_limits<int64_t>::max();
for (const auto &label : labels) {
for (const auto &prop_pair : property_filters) {
const auto &property = prop_pair.first;
std::experimental::optional<LabelPropertyIndex> found;
for (const auto &label : filters.FilteredLabels(symbol)) {
for (const auto &filter : filters.PropertyFilters(symbol)) {
const auto &property = filter.property_filter->property_;
if (context_.db.LabelPropertyIndexExists(label, property)) {
int64_t vertices_count = context_.db.VerticesCount(label, property);
if (vertices_count < min_count) {
for (const auto &prop_filter : prop_pair.second) {
if (prop_filter.used_symbols.find(symbol) !=
prop_filter.used_symbols.end()) {
// Skip filter expressions which use the symbol whose property
// we are looking up. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
// so we cannot scan `n` by property index.
continue;
}
if (are_bound(prop_filter.used_symbols)) {
// Take the first property filter which uses bound symbols.
best_label = label;
best_property = {property, prop_filter};
min_count = vertices_count;
found = true;
break;
}
int64_t vertex_count = context_.db.VerticesCount(label, property);
if (!found || vertex_count < found->vertex_count) {
if (filter.property_filter->is_symbol_in_value_) {
// Skip filter expressions which use the symbol whose property
// we are looking up. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
// so we cannot scan `n` by property index.
continue;
}
if (are_bound(filter.used_symbols)) {
// Take the property filter which uses bound symbols.
found = LabelPropertyIndex{label, filter, vertex_count};
}
}
}
}
}
return {found, min_count};
return found;
}
const GraphDbTypes::Label &FindBestLabelIndex(
@ -426,45 +255,37 @@ class RuleBasedPlanner {
// vertex count in the best index exceeds this number. In such a case,
// `nullptr` is returned and `last_op` is not chained.
ScanAll *GenScanByIndex(LogicalOperator *last_op, const Symbol &node_symbol,
const MatchContext &match_ctx,
const MatchContext &match_ctx, Filters &filters,
const std::experimental::optional<int64_t>
&max_vertex_count = std::experimental::nullopt) {
const auto labels =
utils::FindOr(match_ctx.matching.filters.label_filters(), node_symbol,
std::unordered_set<GraphDbTypes::Label>())
.first;
const auto labels = filters.FilteredLabels(node_symbol);
if (labels.empty()) {
// Without labels, we cannot generated any indexed ScanAll.
return nullptr;
}
const auto properties =
utils::FindOr(
match_ctx.matching.filters.property_filters(), node_symbol,
std::unordered_map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>>())
.first;
// First, try to see if we can use label+property index. If not, use just
// the label index (which ought to exist).
GraphDbTypes::Label best_label;
std::pair<GraphDbTypes::Property, Filters::PropertyFilter> best_property;
auto found_index = FindBestLabelPropertyIndex(
labels, properties, node_symbol, match_ctx.bound_symbols, best_label,
best_property);
if (found_index.first &&
auto found_index = FindBestLabelPropertyIndex(node_symbol, filters,
match_ctx.bound_symbols);
if (found_index &&
// Use label+property index if we satisfy max_vertex_count.
(!max_vertex_count || *max_vertex_count >= found_index.second)) {
const auto &prop_filter = best_property.second;
if (prop_filter.lower_bound || prop_filter.upper_bound) {
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
// Copy the property filter and then erase it from filters.
const auto prop_filter = *found_index->filter.property_filter;
filters.EraseFilter(found_index->filter);
filters.EraseLabelFilter(node_symbol, found_index->label);
if (prop_filter.lower_bound_ || prop_filter.upper_bound_) {
return new ScanAllByLabelPropertyRange(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.lower_bound,
prop_filter.upper_bound, match_ctx.graph_view);
std::shared_ptr<LogicalOperator>(last_op), node_symbol,
found_index->label, prop_filter.property_, prop_filter.lower_bound_,
prop_filter.upper_bound_, match_ctx.graph_view);
} else {
DCHECK(prop_filter.expression)
<< "Property filter should either have bounds or an expression.";
DCHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";
return new ScanAllByLabelPropertyValue(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.expression, match_ctx.graph_view);
std::shared_ptr<LogicalOperator>(last_op), node_symbol,
found_index->label, prop_filter.property_, prop_filter.value_,
match_ctx.graph_view);
}
}
auto label = FindBestLabelIndex(labels);
@ -474,6 +295,7 @@ class RuleBasedPlanner {
// than the allowed count.
return nullptr;
}
filters.EraseLabelFilter(node_symbol, label);
return new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
node_symbol, label, match_ctx.graph_view);
}
@ -484,21 +306,20 @@ class RuleBasedPlanner {
auto &storage = context_.ast_storage;
const auto &symbol_table = match_context.symbol_table;
const auto &matching = match_context.matching;
// Copy all_filters, because we will modify the list as we generate Filters.
auto all_filters = matching.filters.all_filters();
// Copy filters, because we will modify them as we generate Filters.
auto filters = matching.filters;
// Copy the named_paths for the same reason.
auto named_paths = matching.named_paths;
// Try to generate any filters even before the 1st match operator. This
// optimizes the optional match which filters only on symbols bound in
// regular match.
auto *last_op =
impl::GenFilters(input_op, bound_symbols, all_filters, storage);
auto *last_op = impl::GenFilters(input_op, bound_symbols, filters, storage);
for (const auto &expansion : matching.expansions) {
const auto &node1_symbol = symbol_table.at(*expansion.node1->identifier_);
if (impl::BindSymbol(bound_symbols, node1_symbol)) {
if (bound_symbols.insert(node1_symbol).second) {
// We have just bound this symbol, so generate ScanAll which fills it.
if (auto *indexed_scan =
GenScanByIndex(last_op, node1_symbol, match_context)) {
GenScanByIndex(last_op, node1_symbol, match_context, filters)) {
// First, try to get an indexed scan.
last_op = indexed_scan;
} else {
@ -508,11 +329,9 @@ class RuleBasedPlanner {
node1_symbol, match_context.graph_view);
}
match_context.new_symbols.emplace_back(node1_symbol);
last_op =
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
last_op = impl::GenNamedPaths(last_op, bound_symbols, named_paths);
last_op =
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
}
// We have an edge, so generate Expand.
if (expansion.edge) {
@ -533,28 +352,30 @@ class RuleBasedPlanner {
// Bind the inner edge and node symbols so they're available for
// inline filtering in ExpandVariable.
bool inner_edge_bound =
impl::BindSymbol(bound_symbols, inner_edge_symbol);
bound_symbols.insert(inner_edge_symbol).second;
bool inner_node_bound =
impl::BindSymbol(bound_symbols, inner_node_symbol);
bound_symbols.insert(inner_node_symbol).second;
DCHECK(inner_edge_bound && inner_node_bound)
<< "An inner edge and node can't be bound from before";
}
// Join regular filters with lambda filter expression, so that they
// are done inline together. Semantic analysis should guarantee that
// lambda filtering uses bound symbols.
auto *filter_expr = impl::BoolJoin<AndOperator>(
storage,
impl::ExtractFilters(bound_symbols, all_filters, storage),
storage, impl::ExtractFilters(bound_symbols, filters, storage),
edge->filter_expression_);
// At this point it's possible we have leftover filters for inline
// filtering (they use the inner symbols. If they were not collected,
// we have to remove them manually because no other filter-extraction
// will ever bind them again.
all_filters.erase(
std::remove_if(all_filters.begin(), all_filters.end(),
[ e = inner_edge_symbol, n = inner_node_symbol ](
Filters::FilterInfo & fi) {
filters.erase(
std::remove_if(filters.begin(), filters.end(),
[ e = inner_edge_symbol,
n = inner_node_symbol ](FilterInfo & fi) {
return utils::Contains(fi.used_symbols, e) ||
utils::Contains(fi.used_symbols, n);
}),
all_filters.end());
filters.end());
last_op = new ExpandVariable(
node_symbol, edge_symbol, edge->type_, expansion.direction,
@ -572,7 +393,7 @@ class RuleBasedPlanner {
// It would be better to somehow test whether the input vertex
// degree is larger than the destination vertex index count.
auto *indexed_scan =
GenScanByIndex(last_op, node_symbol, match_context,
GenScanByIndex(last_op, node_symbol, match_context, filters,
FLAGS_query_vertex_count_to_expand_existing);
if (indexed_scan) {
last_op = indexed_scan;
@ -586,9 +407,9 @@ class RuleBasedPlanner {
}
// Bind the expanded edge and node.
impl::BindSymbol(bound_symbols, edge_symbol);
bound_symbols.insert(edge_symbol);
match_context.new_symbols.emplace_back(edge_symbol);
if (impl::BindSymbol(bound_symbols, node_symbol)) {
if (bound_symbols.insert(node_symbol).second) {
match_context.new_symbols.emplace_back(node_symbol);
}
@ -612,14 +433,12 @@ class RuleBasedPlanner {
other_symbols);
}
}
last_op =
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
last_op = impl::GenNamedPaths(last_op, bound_symbols, named_paths);
last_op =
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
}
}
DCHECK(all_filters.empty()) << "Expected to generate all filters";
DCHECK(filters.empty()) << "Expected to generate all filters";
return last_op;
}

View File

@ -410,7 +410,7 @@ TEST(TestLogicalPlanner, MatchLabeledNodes) {
auto dba = dbms.active();
auto label = dba->Label("label");
QUERY(MATCH(PATTERN(NODE("n", label))), RETURN("n"));
CheckPlan(storage, ExpectScanAllByLabel(), ExpectFilter(), ExpectProduce());
CheckPlan(storage, ExpectScanAllByLabel(), ExpectProduce());
}
TEST(TestLogicalPlanner, MatchPathReturn) {
@ -1132,7 +1132,7 @@ TEST(TestLogicalPlanner, WhereIndexedLabelProperty) {
auto plan = MakeLogicalPlan<RuleBasedPlanner>(planning_context);
CheckPlan(*plan, symbol_table,
ExpectScanAllByLabelPropertyValue(label, property, lit_42),
ExpectFilter(), ExpectProduce());
ExpectProduce());
}
TEST(TestLogicalPlanner, BestPropertyIndexed) {
@ -1194,9 +1194,8 @@ TEST(TestLogicalPlanner, MultiPropertyIndexScan) {
auto plan = MakeLogicalPlan<RuleBasedPlanner>(planning_context);
CheckPlan(*plan, symbol_table,
ExpectScanAllByLabelPropertyValue(label1, prop1, lit_1),
ExpectFilter(),
ExpectScanAllByLabelPropertyValue(label2, prop2, lit_2),
ExpectFilter(), ExpectProduce());
ExpectProduce());
}
TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
@ -1223,7 +1222,7 @@ TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
CheckPlan(*plan, symbol_table,
ExpectScanAllByLabelPropertyRange(label, property, lower_bound,
upper_bound),
ExpectFilter(), ExpectProduce());
ExpectProduce());
};
{
// Test relation operators which form an upper bound for range.
@ -1373,7 +1372,7 @@ TEST(TestLogicalPlanner, MatchDoubleScanToExpandExisting) {
// We expect 2x ScanAll and then Expand, since we are guessing that is
// faster (due to low label index vertex count).
CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectScanAllByLabel(),
ExpectExpand(), ExpectFilter(), ExpectProduce());
ExpectExpand(), ExpectProduce());
}
TEST(TestLogicalPlanner, MatchScanToExpand) {