Refactor collecting filters during planning
Summary: Move QueryParts and Filters to a new file. Reorganize FilterInfo struct. Remove label filter if we do indexed scan by label. Remove property filter used in indexed scan. Reviewers: florijan Reviewed By: florijan Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D915
This commit is contained in:
parent
cbf8dacc11
commit
db34cb2a40
@ -204,6 +204,7 @@ set(memgraph_src_files
|
||||
${src_dir}/query/interpret/awesome_memgraph_functions.cpp
|
||||
${src_dir}/query/interpreter.cpp
|
||||
${src_dir}/query/plan/operator.cpp
|
||||
${src_dir}/query/plan/preprocess.cpp
|
||||
${src_dir}/query/plan/rule_based_planner.cpp
|
||||
${src_dir}/query/plan/variable_start_planner.cpp
|
||||
${src_dir}/query/typed_value.cpp
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "query/plan/preprocess.hpp"
|
||||
#include "query/plan/rule_based_planner.hpp"
|
||||
#include "query/plan/variable_start_planner.hpp"
|
||||
|
||||
|
489
src/query/plan/preprocess.cpp
Normal file
489
src/query/plan/preprocess.cpp
Normal file
@ -0,0 +1,489 @@
|
||||
#include "query/plan/preprocess.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <stack>
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
namespace {
|
||||
|
||||
void ForEachPattern(
|
||||
Pattern &pattern, std::function<void(NodeAtom *)> base,
|
||||
std::function<void(NodeAtom *, EdgeAtom *, NodeAtom *)> collect) {
|
||||
DCHECK(!pattern.atoms_.empty()) << "Missing atoms in pattern";
|
||||
auto atoms_it = pattern.atoms_.begin();
|
||||
auto current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
|
||||
DCHECK(current_node) << "First pattern atom is not a node";
|
||||
base(current_node);
|
||||
// Remaining atoms need to follow sequentially as (EdgeAtom, NodeAtom)*
|
||||
while (atoms_it != pattern.atoms_.end()) {
|
||||
auto edge = dynamic_cast<EdgeAtom *>(*atoms_it++);
|
||||
DCHECK(edge) << "Expected an edge atom in pattern.";
|
||||
DCHECK(atoms_it != pattern.atoms_.end())
|
||||
<< "Edge atom should not end the pattern.";
|
||||
auto prev_node = current_node;
|
||||
current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
|
||||
DCHECK(current_node) << "Expected a node atom in pattern.";
|
||||
collect(prev_node, edge, current_node);
|
||||
}
|
||||
}
|
||||
|
||||
// Collects symbols from identifiers found in visited AST nodes.
|
||||
class UsedSymbolsCollector : public HierarchicalTreeVisitor {
|
||||
public:
|
||||
explicit UsedSymbolsCollector(const SymbolTable &symbol_table)
|
||||
: symbol_table_(symbol_table) {}
|
||||
|
||||
using HierarchicalTreeVisitor::PostVisit;
|
||||
using HierarchicalTreeVisitor::PreVisit;
|
||||
using HierarchicalTreeVisitor::Visit;
|
||||
|
||||
bool PostVisit(All &all) override {
|
||||
// Remove the symbol which is bound by all, because we are only interested
|
||||
// in free (unbound) symbols.
|
||||
symbols_.erase(symbol_table_.at(*all.identifier_));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Visit(Identifier &ident) override {
|
||||
symbols_.insert(symbol_table_.at(ident));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Visit(PrimitiveLiteral &) override { return true; }
|
||||
bool Visit(ParameterLookup &) override { return true; }
|
||||
bool Visit(query::CreateIndex &) override { return true; }
|
||||
|
||||
std::unordered_set<Symbol> symbols_;
|
||||
const SymbolTable &symbol_table_;
|
||||
};
|
||||
|
||||
// Converts multiple Patterns to Expansions. Each Pattern can contain an
|
||||
// arbitrarily long chain of nodes and edges. The conversion to an Expansion is
|
||||
// done by splitting a pattern into triplets (node1, edge, node2). The triplets
|
||||
// conserve the semantics of the pattern. For example, in a pattern:
|
||||
// (m) -[e]- (n) -[f]- (o) the same can be achieved with:
|
||||
// (m) -[e]- (n), (n) -[f]- (o).
|
||||
// This representation makes it easier to permute from which node or edge we
|
||||
// want to start expanding.
|
||||
std::vector<Expansion> NormalizePatterns(
|
||||
const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
|
||||
std::vector<Expansion> expansions;
|
||||
auto ignore_node = [&](auto *) {};
|
||||
auto collect_expansion = [&](auto *prev_node, auto *edge,
|
||||
auto *current_node) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
// Remove symbols which are bound by variable expansions.
|
||||
if (edge->IsVariable()) {
|
||||
if (edge->lower_bound_) edge->lower_bound_->Accept(collector);
|
||||
if (edge->upper_bound_) edge->upper_bound_->Accept(collector);
|
||||
collector.symbols_.erase(symbol_table.at(*edge->inner_edge_));
|
||||
collector.symbols_.erase(symbol_table.at(*edge->inner_node_));
|
||||
if (edge->filter_expression_) edge->filter_expression_->Accept(collector);
|
||||
}
|
||||
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, false,
|
||||
collector.symbols_, current_node});
|
||||
};
|
||||
for (const auto &pattern : patterns) {
|
||||
if (pattern->atoms_.size() == 1U) {
|
||||
auto *node = dynamic_cast<NodeAtom *>(pattern->atoms_[0]);
|
||||
DCHECK(node) << "First pattern atom is not a node";
|
||||
expansions.emplace_back(Expansion{node});
|
||||
} else {
|
||||
ForEachPattern(*pattern, ignore_node, collect_expansion);
|
||||
}
|
||||
}
|
||||
return expansions;
|
||||
}
|
||||
|
||||
// Fills the given Matching, by converting the Match patterns to normalized
|
||||
// representation as Expansions. Filters used in the Match are also collected,
|
||||
// as well as edge symbols which determine Cyphermorphism. Collecting filters
|
||||
// will lift them out of a pattern and generate new expressions (just like they
|
||||
// were in a Where clause).
|
||||
void AddMatching(const std::vector<Pattern *> &patterns, Where *where,
|
||||
SymbolTable &symbol_table, AstTreeStorage &storage,
|
||||
Matching &matching) {
|
||||
auto expansions = NormalizePatterns(symbol_table, patterns);
|
||||
std::unordered_set<Symbol> edge_symbols;
|
||||
for (const auto &expansion : expansions) {
|
||||
// Matching may already have some expansions, so offset our index.
|
||||
const int expansion_ix = matching.expansions.size();
|
||||
// Map node1 symbol to expansion
|
||||
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
|
||||
matching.node_symbol_to_expansions[node1_sym].insert(expansion_ix);
|
||||
// Add node1 to all symbols.
|
||||
matching.expansion_symbols.insert(node1_sym);
|
||||
if (expansion.edge) {
|
||||
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
|
||||
// Fill edge symbols for Cyphermorphism.
|
||||
edge_symbols.insert(edge_sym);
|
||||
// Map node2 symbol to expansion
|
||||
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
|
||||
matching.node_symbol_to_expansions[node2_sym].insert(expansion_ix);
|
||||
// Add edge and node2 to all symbols
|
||||
matching.expansion_symbols.insert(edge_sym);
|
||||
matching.expansion_symbols.insert(node2_sym);
|
||||
}
|
||||
matching.expansions.push_back(expansion);
|
||||
}
|
||||
if (!edge_symbols.empty()) {
|
||||
matching.edge_symbols.emplace_back(edge_symbols);
|
||||
}
|
||||
for (auto *pattern : patterns) {
|
||||
matching.filters.CollectPatternFilters(*pattern, symbol_table, storage);
|
||||
if (pattern->identifier_->user_declared_) {
|
||||
std::vector<Symbol> path_elements;
|
||||
for (auto *pattern_atom : pattern->atoms_)
|
||||
path_elements.emplace_back(symbol_table.at(*pattern_atom->identifier_));
|
||||
matching.named_paths.emplace(symbol_table.at(*pattern->identifier_),
|
||||
std::move(path_elements));
|
||||
}
|
||||
}
|
||||
if (where) {
|
||||
matching.filters.CollectWhereFilter(*where, symbol_table);
|
||||
}
|
||||
}
|
||||
void AddMatching(const Match &match, SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage, Matching &matching) {
|
||||
return AddMatching(match.patterns_, match.where_, symbol_table, storage,
|
||||
matching);
|
||||
}
|
||||
|
||||
auto SplitExpressionOnAnd(Expression *expression) {
|
||||
std::vector<Expression *> expressions;
|
||||
std::stack<Expression *> pending_expressions;
|
||||
pending_expressions.push(expression);
|
||||
while (!pending_expressions.empty()) {
|
||||
auto *current_expression = pending_expressions.top();
|
||||
pending_expressions.pop();
|
||||
if (auto *and_op = dynamic_cast<AndOperator *>(current_expression)) {
|
||||
pending_expressions.push(and_op->expression1_);
|
||||
pending_expressions.push(and_op->expression2_);
|
||||
} else {
|
||||
expressions.push_back(current_expression);
|
||||
}
|
||||
}
|
||||
return expressions;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table,
|
||||
const Symbol &symbol,
|
||||
const GraphDbTypes::Property &property,
|
||||
Expression *value)
|
||||
: symbol_(symbol), property_(property), value_(value) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
value->Accept(collector);
|
||||
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
|
||||
}
|
||||
|
||||
PropertyFilter::PropertyFilter(
|
||||
const SymbolTable &symbol_table, const Symbol &symbol,
|
||||
const GraphDbTypes::Property &property,
|
||||
const std::experimental::optional<PropertyFilter::Bound> &lower_bound,
|
||||
const std::experimental::optional<PropertyFilter::Bound> &upper_bound)
|
||||
: symbol_(symbol),
|
||||
property_(property),
|
||||
lower_bound_(lower_bound),
|
||||
upper_bound_(upper_bound) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
if (lower_bound) {
|
||||
lower_bound->value()->Accept(collector);
|
||||
}
|
||||
if (upper_bound) {
|
||||
upper_bound->value()->Accept(collector);
|
||||
}
|
||||
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
|
||||
}
|
||||
|
||||
bool operator==(const PropertyFilter &a, const PropertyFilter &b) {
|
||||
auto bound_eq = [](const auto &a_bound, const auto &b_bound) {
|
||||
if (!a_bound && !b_bound) return true;
|
||||
if (a_bound && b_bound)
|
||||
return a_bound->value() == b_bound->value() &&
|
||||
a_bound->type() == b_bound->type();
|
||||
return false;
|
||||
};
|
||||
return a.symbol_ == b.symbol_ && a.property_ == b.property_ &&
|
||||
a.is_symbol_in_value_ == b.is_symbol_in_value_ &&
|
||||
a.value_ == b.value_ && bound_eq(a.lower_bound_, b.lower_bound_) &&
|
||||
bound_eq(a.upper_bound_, b.upper_bound_);
|
||||
}
|
||||
|
||||
bool operator==(const FilterInfo &a, const FilterInfo &b) {
|
||||
return a.type == b.type && a.expression == b.expression &&
|
||||
a.used_symbols == b.used_symbols && a.labels == b.labels &&
|
||||
a.property_filter == b.property_filter;
|
||||
}
|
||||
|
||||
void Filters::EraseFilter(const FilterInfo &filter) {
|
||||
auto filter_it = std::find(all_filters_.begin(), all_filters_.end(), filter);
|
||||
if (filter_it == all_filters_.end()) return;
|
||||
all_filters_.erase(filter_it);
|
||||
}
|
||||
|
||||
void Filters::EraseLabelFilter(const Symbol &symbol,
|
||||
const GraphDbTypes::Label &label) {
|
||||
for (auto filter_it = all_filters_.begin();
|
||||
filter_it != all_filters_.end();) {
|
||||
if (filter_it->type != FilterInfo::Type::Label) {
|
||||
++filter_it;
|
||||
continue;
|
||||
}
|
||||
if (!utils::Contains(filter_it->used_symbols, symbol)) {
|
||||
++filter_it;
|
||||
continue;
|
||||
}
|
||||
auto label_it =
|
||||
std::find(filter_it->labels.begin(), filter_it->labels.end(), label);
|
||||
if (label_it == filter_it->labels.end()) {
|
||||
++filter_it;
|
||||
continue;
|
||||
}
|
||||
filter_it->labels.erase(label_it);
|
||||
DCHECK(!utils::Contains(filter_it->labels, label))
|
||||
<< "Didn't expect duplicated labels";
|
||||
if (filter_it->labels.empty()) {
|
||||
// If there are no labels to filter, then erase the whole FilterInfo.
|
||||
filter_it = all_filters_.erase(filter_it);
|
||||
} else {
|
||||
++filter_it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
auto add_properties_variable = [&](EdgeAtom *atom) {
|
||||
const auto &symbol = symbol_table.at(*atom->identifier_);
|
||||
for (auto &prop_pair : atom->properties_) {
|
||||
// We need to store two property-lookup filters in all_filters. One is
|
||||
// used for inlining property filters into variable expansion, and
|
||||
// utilizes the inner_edge symbol. The other is used for post-expansion
|
||||
// filtering and does not use the inner_edge symbol, but the edge symbol
|
||||
// (a list of edges).
|
||||
{
|
||||
collector.symbols_.clear();
|
||||
prop_pair.second->Accept(collector);
|
||||
collector.symbols_.emplace(symbol_table.at(*atom->inner_node_));
|
||||
collector.symbols_.emplace(symbol_table.at(*atom->inner_edge_));
|
||||
// First handle the inline property filter.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(atom->inner_edge_, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
// Currently, variable expand has no gains if we set PropertyFilter.
|
||||
all_filters_.emplace_back(FilterInfo{FilterInfo::Type::Generic,
|
||||
prop_equal, collector.symbols_});
|
||||
}
|
||||
{
|
||||
collector.symbols_.clear();
|
||||
prop_pair.second->Accept(collector);
|
||||
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
|
||||
// Now handle the post-expansion filter.
|
||||
// Create a new identifier and a symbol which will be filled in All.
|
||||
auto *identifier = atom->identifier_->Clone(storage);
|
||||
symbol_table[*identifier] =
|
||||
symbol_table.CreateSymbol(identifier->name_, false);
|
||||
// Create an equality expression and store it in all_filters_.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(identifier, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
// Currently, variable expand has no gains if we set PropertyFilter.
|
||||
all_filters_.emplace_back(
|
||||
FilterInfo{FilterInfo::Type::Generic,
|
||||
storage.Create<All>(identifier, atom->identifier_,
|
||||
storage.Create<Where>(prop_equal)),
|
||||
collector.symbols_});
|
||||
}
|
||||
}
|
||||
};
|
||||
auto add_properties = [&](auto *atom) {
|
||||
const auto &symbol = symbol_table.at(*atom->identifier_);
|
||||
for (auto &prop_pair : atom->properties_) {
|
||||
// Create an equality expression and store it in all_filters_.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(atom->identifier_, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
collector.symbols_.clear();
|
||||
prop_equal->Accept(collector);
|
||||
FilterInfo filter_info{FilterInfo::Type::Property, prop_equal,
|
||||
collector.symbols_};
|
||||
// Store a PropertyFilter on the value of the property.
|
||||
filter_info.property_filter.emplace(
|
||||
symbol_table, symbol, prop_pair.first.second, prop_pair.second);
|
||||
all_filters_.emplace_back(filter_info);
|
||||
}
|
||||
};
|
||||
auto add_node_filter = [&](NodeAtom *node) {
|
||||
const auto &node_symbol = symbol_table.at(*node->identifier_);
|
||||
if (!node->labels_.empty()) {
|
||||
// Create a LabelsTest and store it.
|
||||
auto *labels_test =
|
||||
storage.Create<LabelsTest>(node->identifier_, node->labels_);
|
||||
auto label_filter = FilterInfo{FilterInfo::Type::Label, labels_test,
|
||||
std::unordered_set<Symbol>{node_symbol}};
|
||||
label_filter.labels = node->labels_;
|
||||
all_filters_.emplace_back(label_filter);
|
||||
}
|
||||
add_properties(node);
|
||||
};
|
||||
auto add_expand_filter = [&](NodeAtom *, EdgeAtom *edge, NodeAtom *node) {
|
||||
if (edge->IsVariable())
|
||||
add_properties_variable(edge);
|
||||
else
|
||||
add_properties(edge);
|
||||
add_node_filter(node);
|
||||
};
|
||||
ForEachPattern(pattern, add_node_filter, add_expand_filter);
|
||||
}
|
||||
|
||||
// Adds the where filter expression to `all_filters_` and collects additional
|
||||
// information for potential property and label indexing.
|
||||
void Filters::CollectWhereFilter(Where &where,
|
||||
const SymbolTable &symbol_table) {
|
||||
auto where_filters = SplitExpressionOnAnd(where.expression_);
|
||||
for (const auto &filter : where_filters) {
|
||||
all_filters_.emplace_back(AnalyzeFilter(filter, symbol_table));
|
||||
}
|
||||
}
|
||||
|
||||
// Analyzes the filter expression by collecting information on filtering labels
|
||||
// and properties to be used with indexing.
|
||||
FilterInfo Filters::AnalyzeFilter(Expression *expr,
|
||||
const SymbolTable &symbol_table) {
|
||||
using Bound = PropertyFilter::Bound;
|
||||
// Create the base filter info.
|
||||
FilterInfo filter{FilterInfo::Type::Generic, expr};
|
||||
{
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
expr->Accept(collector);
|
||||
filter.used_symbols = collector.symbols_;
|
||||
}
|
||||
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
|
||||
auto *&ident) {
|
||||
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
|
||||
(ident = dynamic_cast<Identifier *>(prop_lookup->expression_));
|
||||
};
|
||||
auto add_prop_equal = [&](auto *maybe_lookup, auto *val_expr) {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
|
||||
filter.type = FilterInfo::Type::Property;
|
||||
filter.property_filter =
|
||||
PropertyFilter(symbol_table, symbol_table.at(*ident),
|
||||
prop_lookup->property_, val_expr);
|
||||
}
|
||||
};
|
||||
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(expr1, prop_lookup, ident)) {
|
||||
// n.prop > value
|
||||
filter.type = FilterInfo::Type::Property;
|
||||
filter.property_filter.emplace(
|
||||
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
|
||||
Bound(expr2, bound_type), std::experimental::nullopt);
|
||||
}
|
||||
if (get_property_lookup(expr2, prop_lookup, ident)) {
|
||||
// value > n.prop
|
||||
filter.type = FilterInfo::Type::Property;
|
||||
filter.property_filter.emplace(
|
||||
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
|
||||
std::experimental::nullopt, Bound(expr1, bound_type));
|
||||
}
|
||||
};
|
||||
// We are only interested to see the insides of And, because Or prevents
|
||||
// indexing since any labels and properties found there may be optional.
|
||||
DCHECK(!dynamic_cast<AndOperator *>(expr))
|
||||
<< "Expected AndOperators have been split.";
|
||||
if (auto *labels_test = dynamic_cast<LabelsTest *>(expr)) {
|
||||
// Since LabelsTest may contain any expression, we can only use the
|
||||
// simplest test on an identifier.
|
||||
if (auto *ident = dynamic_cast<Identifier *>(labels_test->expression_)) {
|
||||
filter.type = FilterInfo::Type::Label;
|
||||
filter.labels = labels_test->labels_;
|
||||
}
|
||||
} else if (auto *eq = dynamic_cast<EqualOperator *>(expr)) {
|
||||
// Try to get property equality test from the top expressions.
|
||||
// Unfortunately, we cannot go deeper inside Equal, because chained equals
|
||||
// need not correspond to And. For example, `(n.prop = value) = false)`:
|
||||
// EQ
|
||||
// / \
|
||||
// EQ false -- top expressions
|
||||
// / \
|
||||
// n.prop value
|
||||
// Here the `prop` may be different than `value` resulting in `false`. This
|
||||
// would compare with the top level `false`, producing `true`. Therefore, it
|
||||
// is incorrect to pick up `n.prop = value` for scanning by property index.
|
||||
add_prop_equal(eq->expression1_, eq->expression2_);
|
||||
// And reversed.
|
||||
add_prop_equal(eq->expression2_, eq->expression1_);
|
||||
// TODO: What about n.prop = m.prop case? Do we generate 2 PropertyFilters?
|
||||
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
|
||||
add_prop_greater(gt->expression1_, gt->expression2_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
|
||||
add_prop_greater(ge->expression1_, ge->expression2_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
|
||||
// Like greater, but in reverse.
|
||||
add_prop_greater(lt->expression2_, lt->expression1_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
|
||||
// Like greater equal, but in reverse.
|
||||
add_prop_greater(le->expression2_, le->expression1_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
}
|
||||
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
|
||||
// indexing by range. Note, that the generated Ast uses AND for chained
|
||||
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
|
||||
// as `expr1 < n.prop AND n.prop < expr2`.
|
||||
return filter;
|
||||
}
|
||||
|
||||
// Converts a Query to multiple QueryParts. In the process new Ast nodes may be
|
||||
// created, e.g. filter expressions.
|
||||
std::vector<QueryPart> CollectQueryParts(SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage) {
|
||||
auto query = storage.query();
|
||||
std::vector<QueryPart> query_parts(1);
|
||||
auto *query_part = &query_parts.back();
|
||||
for (auto &clause : query->clauses_) {
|
||||
if (auto *match = dynamic_cast<Match *>(clause)) {
|
||||
if (match->optional_) {
|
||||
query_part->optional_matching.emplace_back(Matching{});
|
||||
AddMatching(*match, symbol_table, storage,
|
||||
query_part->optional_matching.back());
|
||||
} else {
|
||||
DCHECK(query_part->optional_matching.empty())
|
||||
<< "Match clause cannot follow optional match.";
|
||||
AddMatching(*match, symbol_table, storage, query_part->matching);
|
||||
}
|
||||
} else {
|
||||
query_part->remaining_clauses.push_back(clause);
|
||||
if (auto *merge = dynamic_cast<query::Merge *>(clause)) {
|
||||
query_part->merge_matching.emplace_back(Matching{});
|
||||
AddMatching({merge->pattern_}, nullptr, symbol_table, storage,
|
||||
query_part->merge_matching.back());
|
||||
} else if (dynamic_cast<With *>(clause) ||
|
||||
dynamic_cast<query::Unwind *>(clause)) {
|
||||
// This query part is done, continue with a new one.
|
||||
query_parts.emplace_back(QueryPart{});
|
||||
query_part = &query_parts.back();
|
||||
} else if (dynamic_cast<Return *>(clause)) {
|
||||
// TODO: Support RETURN UNION ...
|
||||
return query_parts;
|
||||
}
|
||||
}
|
||||
}
|
||||
return query_parts;
|
||||
}
|
||||
|
||||
} // namespace query::plan
|
234
src/query/plan/preprocess.hpp
Normal file
234
src/query/plan/preprocess.hpp
Normal file
@ -0,0 +1,234 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <experimental/optional>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
/// Normalized representation of a pattern that needs to be matched.
|
||||
struct Expansion {
|
||||
/// The first node in the expansion, it can be a single node.
|
||||
NodeAtom *node1 = nullptr;
|
||||
/// Optional edge which connects the 2 nodes.
|
||||
EdgeAtom *edge = nullptr;
|
||||
/// Direction of the edge, it may be flipped compared to original
|
||||
/// @c EdgeAtom during plan generation.
|
||||
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
|
||||
/// True if the direction and nodes were flipped.
|
||||
bool is_flipped = false;
|
||||
/// Set of symbols found inside the range expressions of a variable path edge.
|
||||
std::unordered_set<Symbol> symbols_in_range{};
|
||||
/// Optional node at the other end of an edge. If the expansion
|
||||
/// contains an edge, then this node is required.
|
||||
NodeAtom *node2 = nullptr;
|
||||
};
|
||||
|
||||
/// Stores the symbols and expression used to filter a property.
|
||||
class PropertyFilter {
|
||||
public:
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
PropertyFilter(const SymbolTable &, const Symbol &,
|
||||
const GraphDbTypes::Property &, Expression *);
|
||||
PropertyFilter(const SymbolTable &, const Symbol &,
|
||||
const GraphDbTypes::Property &,
|
||||
const std::experimental::optional<Bound> &,
|
||||
const std::experimental::optional<Bound> &);
|
||||
|
||||
/// Symbol whose property is looked up.
|
||||
Symbol symbol_;
|
||||
GraphDbTypes::Property property_;
|
||||
/// True if the same symbol is used in expressions for value or bounds.
|
||||
bool is_symbol_in_value_ = false;
|
||||
/// Expression which when evaluated produces the value a property must
|
||||
/// equal.
|
||||
Expression *value_ = nullptr;
|
||||
/// Expressions which produce lower and upper bounds for a property.
|
||||
std::experimental::optional<Bound> lower_bound_{};
|
||||
std::experimental::optional<Bound> upper_bound_{};
|
||||
};
|
||||
|
||||
bool operator==(const PropertyFilter &, const PropertyFilter &);
|
||||
inline bool operator!=(const PropertyFilter &a, const PropertyFilter &b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
/// Stores additional information for a filter expression.
|
||||
struct FilterInfo {
|
||||
/// A FilterInfo can be a generic filter expression or a specific filtering
|
||||
/// applied for labels or a property. Non generic types contain extra
|
||||
/// information which can be used to produce indexed scans of graph
|
||||
/// elements.
|
||||
enum class Type { Generic, Label, Property };
|
||||
|
||||
Type type;
|
||||
/// The filter expression which must be satisfied.
|
||||
Expression *expression;
|
||||
/// Set of used symbols by the filter @c expression.
|
||||
std::unordered_set<Symbol> used_symbols;
|
||||
/// Labels for Type::Label filtering.
|
||||
std::vector<GraphDbTypes::Label> labels;
|
||||
/// Property information for Type::Property filtering.
|
||||
std::experimental::optional<PropertyFilter> property_filter;
|
||||
};
|
||||
|
||||
bool operator==(const FilterInfo &, const FilterInfo &);
|
||||
inline bool operator!=(const FilterInfo &a, const FilterInfo &b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
|
||||
///
|
||||
/// Info is stored as a list of FilterInfo objects corresponding to all filter
|
||||
/// expressions that should be generated.
|
||||
class Filters {
|
||||
public:
|
||||
using iterator = std::vector<FilterInfo>::iterator;
|
||||
using const_iterator = std::vector<FilterInfo>::const_iterator;
|
||||
|
||||
auto begin() { return all_filters_.begin(); }
|
||||
auto begin() const { return all_filters_.begin(); }
|
||||
auto end() { return all_filters_.end(); }
|
||||
auto end() const { return all_filters_.end(); }
|
||||
|
||||
auto empty() const { return all_filters_.empty(); }
|
||||
|
||||
auto erase(iterator pos) { return all_filters_.erase(pos); }
|
||||
auto erase(const_iterator pos) { return all_filters_.erase(pos); }
|
||||
auto erase(iterator first, iterator last) {
|
||||
return all_filters_.erase(first, last);
|
||||
}
|
||||
auto erase(const_iterator first, const_iterator last) {
|
||||
return all_filters_.erase(first, last);
|
||||
}
|
||||
|
||||
auto FilteredLabels(const Symbol &symbol) const {
|
||||
std::unordered_set<GraphDbTypes::Label> labels;
|
||||
for (const auto &filter : all_filters_) {
|
||||
if (filter.type == FilterInfo::Type::Label &&
|
||||
utils::Contains(filter.used_symbols, symbol)) {
|
||||
DCHECK(filter.used_symbols.size() == 1U)
|
||||
<< "Expected a single used symbol for label filter";
|
||||
labels.insert(filter.labels.begin(), filter.labels.end());
|
||||
}
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
// Remove a filter; may invalidate iterators.
|
||||
void EraseFilter(const FilterInfo &);
|
||||
|
||||
// Remove a label filter for symbol; may invalidate iterators.
|
||||
void EraseLabelFilter(const Symbol &, const GraphDbTypes::Label &);
|
||||
|
||||
// Returns a vector of FilterInfo for properties.
|
||||
auto PropertyFilters(const Symbol &symbol) const {
|
||||
std::vector<FilterInfo> filters;
|
||||
for (const auto &filter : all_filters_) {
|
||||
if (filter.type == FilterInfo::Type::Property) {
|
||||
filters.push_back(filter);
|
||||
}
|
||||
}
|
||||
return filters;
|
||||
}
|
||||
|
||||
/// Collects filtering information from a pattern.
|
||||
///
|
||||
/// Goes through all the atoms in a pattern and generates filter expressions
|
||||
/// for found labels, properties and edge types. The generated expressions are
|
||||
/// stored.
|
||||
void CollectPatternFilters(Pattern &, SymbolTable &, AstTreeStorage &);
|
||||
/// Collects filtering information from a where expression.
|
||||
///
|
||||
/// Takes the where expression and stores it, then analyzes the expression for
|
||||
/// additional information. The additional information is used to populate
|
||||
/// label filters and property filters, so that indexed scanning can use it.
|
||||
void CollectWhereFilter(Where &, const SymbolTable &);
|
||||
|
||||
private:
|
||||
FilterInfo AnalyzeFilter(Expression *, const SymbolTable &);
|
||||
|
||||
std::vector<FilterInfo> all_filters_;
|
||||
};
|
||||
|
||||
/// Normalized representation of a single or multiple Match clauses.
|
||||
///
|
||||
/// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
|
||||
/// WHERE c.prop < 42` will produce the following.
|
||||
/// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and
|
||||
/// `(n) -[e3]- (m)`.
|
||||
/// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
|
||||
/// first `MATCH` and the set `{e3}` for the second.
|
||||
/// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
|
||||
/// other obtained from `WHERE` on symbol `c`.
|
||||
struct Matching {
|
||||
/// All expansions that need to be performed across @c Match clauses.
|
||||
std::vector<Expansion> expansions;
|
||||
/// Symbols for edges established in match, used to ensure Cyphermorphism.
|
||||
///
|
||||
/// There are multiple sets, because each Match clause determines a single
|
||||
/// set.
|
||||
std::vector<std::unordered_set<Symbol>> edge_symbols;
|
||||
/// Information on used filter expressions while matching.
|
||||
Filters filters;
|
||||
/// Maps node symbols to expansions which bind them.
|
||||
std::unordered_map<Symbol, std::set<int>> node_symbol_to_expansions{};
|
||||
/// Maps named path symbols to a vector of Symbols that define its pattern.
|
||||
std::unordered_map<Symbol, std::vector<Symbol>> named_paths{};
|
||||
/// All node and edge symbols across all expansions (from all matches).
|
||||
std::unordered_set<Symbol> expansion_symbols{};
|
||||
};
|
||||
|
||||
/// @brief Represents a read (+ write) part of a query. Parts are split on
|
||||
/// `WITH` clauses.
|
||||
///
|
||||
/// Each part ends with either:
|
||||
///
|
||||
/// * `RETURN` clause;
|
||||
/// * `WITH` clause or
|
||||
/// * any of the write clauses.
|
||||
///
|
||||
/// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
|
||||
/// generated QueryPart will have `matching` generated for the `MATCH`.
|
||||
/// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
|
||||
/// in that exact order. The pattern inside the first `MERGE` will be used to
|
||||
/// generate the first `merge_matching` element, and the second `MERGE` pattern
|
||||
/// will produce the second `merge_matching` element. This way, if someone
|
||||
/// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
|
||||
/// in the same order as their respective `merge_matching` elements.
|
||||
struct QueryPart {
|
||||
/// @brief All `MATCH` clauses merged into one @c Matching.
|
||||
Matching matching;
|
||||
/// @brief Each `OPTIONAL MATCH` converted to @c Matching.
|
||||
std::vector<Matching> optional_matching{};
|
||||
/// @brief @c Matching for each `MERGE` clause.
|
||||
///
|
||||
/// Storing the normalized pattern of a @c Merge does not preclude storing the
|
||||
/// @c Merge clause itself inside `remaining_clauses`. The reason is that we
|
||||
/// need to have access to other parts of the clause, such as `SET` clauses
|
||||
/// which need to be run.
|
||||
///
|
||||
/// Since @c Merge is contained in `remaining_clauses`, this vector contains
|
||||
/// matching in the same order as @c Merge appears.
|
||||
std::vector<Matching> merge_matching{};
|
||||
/// @brief All the remaining clauses (without @c Match).
|
||||
std::vector<Clause *> remaining_clauses{};
|
||||
};
|
||||
|
||||
/// @brief Convert the AST to multiple @c QueryParts.
|
||||
///
|
||||
/// This function will normalize patterns inside @c Match and @c Merge clauses
|
||||
/// and do some other preprocessing in order to generate multiple @c QueryPart
|
||||
/// structures. @c AstTreeStorage and @c SymbolTable may be used to create new
|
||||
/// AST nodes.
|
||||
std::vector<QueryPart> CollectQueryParts(SymbolTable &, AstTreeStorage &);
|
||||
|
||||
} // namespace query::plan
|
@ -62,27 +62,6 @@ auto ReducePattern(
|
||||
return last_res;
|
||||
}
|
||||
|
||||
void ForEachPattern(
|
||||
Pattern &pattern, std::function<void(NodeAtom *)> base,
|
||||
std::function<void(NodeAtom *, EdgeAtom *, NodeAtom *)> collect) {
|
||||
DCHECK(!pattern.atoms_.empty()) << "Missing atoms in pattern";
|
||||
auto atoms_it = pattern.atoms_.begin();
|
||||
auto current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
|
||||
DCHECK(current_node) << "First pattern atom is not a node";
|
||||
base(current_node);
|
||||
// Remaining atoms need to follow sequentially as (EdgeAtom, NodeAtom)*
|
||||
while (atoms_it != pattern.atoms_.end()) {
|
||||
auto edge = dynamic_cast<EdgeAtom *>(*atoms_it++);
|
||||
DCHECK(edge) << "Expected an edge atom in pattern.";
|
||||
DCHECK(atoms_it != pattern.atoms_.end())
|
||||
<< "Edge atom should not end the pattern.";
|
||||
auto prev_node = current_node;
|
||||
current_node = dynamic_cast<NodeAtom *>(*atoms_it++);
|
||||
DCHECK(current_node) << "Expected a node atom in pattern.";
|
||||
collect(prev_node, edge, current_node);
|
||||
}
|
||||
}
|
||||
|
||||
auto GenCreate(Create &create, LogicalOperator *input_op,
|
||||
const SymbolTable &symbol_table,
|
||||
std::unordered_set<Symbol> &bound_symbols) {
|
||||
@ -94,38 +73,8 @@ auto GenCreate(Create &create, LogicalOperator *input_op,
|
||||
return last_op;
|
||||
}
|
||||
|
||||
// Collects symbols from identifiers found in visited AST nodes.
|
||||
class UsedSymbolsCollector : public HierarchicalTreeVisitor {
|
||||
public:
|
||||
explicit UsedSymbolsCollector(const SymbolTable &symbol_table)
|
||||
: symbol_table_(symbol_table) {}
|
||||
|
||||
using HierarchicalTreeVisitor::PostVisit;
|
||||
using HierarchicalTreeVisitor::PreVisit;
|
||||
using HierarchicalTreeVisitor::Visit;
|
||||
|
||||
bool PostVisit(All &all) override {
|
||||
// Remove the symbol which is bound by all, because we are only interested
|
||||
// in free (unbound) symbols.
|
||||
symbols_.erase(symbol_table_.at(*all.identifier_));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Visit(Identifier &ident) override {
|
||||
symbols_.insert(symbol_table_.at(ident));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Visit(PrimitiveLiteral &) override { return true; }
|
||||
bool Visit(ParameterLookup &) override { return true; }
|
||||
bool Visit(query::CreateIndex &) override { return true; }
|
||||
|
||||
std::unordered_set<Symbol> symbols_;
|
||||
const SymbolTable &symbol_table_;
|
||||
};
|
||||
|
||||
bool HasBoundFilterSymbols(const std::unordered_set<Symbol> &bound_symbols,
|
||||
const Filters::FilterInfo &filter) {
|
||||
const FilterInfo &filter) {
|
||||
for (const auto &symbol : filter.used_symbols) {
|
||||
if (bound_symbols.find(symbol) == bound_symbols.end()) {
|
||||
return false;
|
||||
@ -505,137 +454,18 @@ auto GenReturnBody(LogicalOperator *input_op, bool advance_command,
|
||||
return last_op;
|
||||
}
|
||||
|
||||
// Converts multiple Patterns to Expansions. Each Pattern can contain an
|
||||
// arbitrarily long chain of nodes and edges. The conversion to an Expansion is
|
||||
// done by splitting a pattern into triplets (node1, edge, node2). The triplets
|
||||
// conserve the semantics of the pattern. For example, in a pattern:
|
||||
// (m) -[e]- (n) -[f]- (o) the same can be achieved with:
|
||||
// (m) -[e]- (n), (n) -[f]- (o).
|
||||
// This representation makes it easier to permute from which node or edge we
|
||||
// want to start expanding.
|
||||
std::vector<Expansion> NormalizePatterns(
|
||||
const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
|
||||
std::vector<Expansion> expansions;
|
||||
auto ignore_node = [&](auto *) {};
|
||||
auto collect_expansion = [&](auto *prev_node, auto *edge,
|
||||
auto *current_node) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
// Remove symbols which are bound by variable expansions.
|
||||
if (edge->IsVariable()) {
|
||||
if (edge->lower_bound_) edge->lower_bound_->Accept(collector);
|
||||
if (edge->upper_bound_) edge->upper_bound_->Accept(collector);
|
||||
collector.symbols_.erase(symbol_table.at(*edge->inner_edge_));
|
||||
collector.symbols_.erase(symbol_table.at(*edge->inner_node_));
|
||||
if (edge->filter_expression_) edge->filter_expression_->Accept(collector);
|
||||
}
|
||||
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, false,
|
||||
collector.symbols_, current_node});
|
||||
};
|
||||
for (const auto &pattern : patterns) {
|
||||
if (pattern->atoms_.size() == 1U) {
|
||||
auto *node = dynamic_cast<NodeAtom *>(pattern->atoms_[0]);
|
||||
DCHECK(node) << "First pattern atom is not a node";
|
||||
expansions.emplace_back(Expansion{node});
|
||||
} else {
|
||||
ForEachPattern(*pattern, ignore_node, collect_expansion);
|
||||
}
|
||||
}
|
||||
return expansions;
|
||||
}
|
||||
|
||||
// Fills the given Matching, by converting the Match patterns to normalized
|
||||
// representation as Expansions. Filters used in the Match are also collected,
|
||||
// as well as edge symbols which determine Cyphermorphism. Collecting filters
|
||||
// will lift them out of a pattern and generate new expressions (just like they
|
||||
// were in a Where clause).
|
||||
void AddMatching(const std::vector<Pattern *> &patterns, Where *where,
|
||||
SymbolTable &symbol_table, AstTreeStorage &storage,
|
||||
Matching &matching) {
|
||||
auto expansions = NormalizePatterns(symbol_table, patterns);
|
||||
std::unordered_set<Symbol> edge_symbols;
|
||||
for (const auto &expansion : expansions) {
|
||||
// Matching may already have some expansions, so offset our index.
|
||||
const int expansion_ix = matching.expansions.size();
|
||||
// Map node1 symbol to expansion
|
||||
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
|
||||
matching.node_symbol_to_expansions[node1_sym].insert(expansion_ix);
|
||||
// Add node1 to all symbols.
|
||||
matching.expansion_symbols.insert(node1_sym);
|
||||
if (expansion.edge) {
|
||||
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
|
||||
// Fill edge symbols for Cyphermorphism.
|
||||
edge_symbols.insert(edge_sym);
|
||||
// Map node2 symbol to expansion
|
||||
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
|
||||
matching.node_symbol_to_expansions[node2_sym].insert(expansion_ix);
|
||||
// Add edge and node2 to all symbols
|
||||
matching.expansion_symbols.insert(edge_sym);
|
||||
matching.expansion_symbols.insert(node2_sym);
|
||||
}
|
||||
matching.expansions.push_back(expansion);
|
||||
}
|
||||
if (!edge_symbols.empty()) {
|
||||
matching.edge_symbols.emplace_back(edge_symbols);
|
||||
}
|
||||
for (auto *pattern : patterns) {
|
||||
matching.filters.CollectPatternFilters(*pattern, symbol_table, storage);
|
||||
if (pattern->identifier_->user_declared_) {
|
||||
std::vector<Symbol> path_elements;
|
||||
for (auto *pattern_atom : pattern->atoms_)
|
||||
path_elements.emplace_back(symbol_table.at(*pattern_atom->identifier_));
|
||||
matching.named_paths.emplace(symbol_table.at(*pattern->identifier_),
|
||||
std::move(path_elements));
|
||||
}
|
||||
}
|
||||
if (where) {
|
||||
matching.filters.CollectWhereFilter(*where, symbol_table);
|
||||
}
|
||||
}
|
||||
void AddMatching(const Match &match, SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage, Matching &matching) {
|
||||
return AddMatching(match.patterns_, match.where_, symbol_table, storage,
|
||||
matching);
|
||||
}
|
||||
|
||||
auto SplitExpressionOnAnd(Expression *expression) {
|
||||
std::vector<Expression *> expressions;
|
||||
std::stack<Expression *> pending_expressions;
|
||||
pending_expressions.push(expression);
|
||||
while (!pending_expressions.empty()) {
|
||||
auto *current_expression = pending_expressions.top();
|
||||
pending_expressions.pop();
|
||||
if (auto *and_op = dynamic_cast<AndOperator *>(current_expression)) {
|
||||
pending_expressions.push(and_op->expression1_);
|
||||
pending_expressions.push(and_op->expression2_);
|
||||
} else {
|
||||
expressions.push_back(current_expression);
|
||||
}
|
||||
}
|
||||
return expressions;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace impl {
|
||||
|
||||
// Returns false if the symbol was already bound, otherwise binds it and
|
||||
// returns true.
|
||||
bool BindSymbol(std::unordered_set<Symbol> &bound_symbols,
|
||||
const Symbol &symbol) {
|
||||
auto insertion = bound_symbols.insert(symbol);
|
||||
return insertion.second;
|
||||
}
|
||||
|
||||
Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
|
||||
std::vector<Filters::FilterInfo> &all_filters,
|
||||
AstTreeStorage &storage) {
|
||||
Filters &filters, AstTreeStorage &storage) {
|
||||
Expression *filter_expr = nullptr;
|
||||
for (auto filters_it = all_filters.begin();
|
||||
filters_it != all_filters.end();) {
|
||||
for (auto filters_it = filters.begin(); filters_it != filters.end();) {
|
||||
if (HasBoundFilterSymbols(bound_symbols, *filters_it)) {
|
||||
filter_expr = impl::BoolJoin<AndOperator>(storage, filter_expr,
|
||||
filters_it->expression);
|
||||
filters_it = all_filters.erase(filters_it);
|
||||
filters_it = filters.erase(filters_it);
|
||||
} else {
|
||||
filters_it++;
|
||||
}
|
||||
@ -645,9 +475,8 @@ Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
|
||||
|
||||
LogicalOperator *GenFilters(LogicalOperator *last_op,
|
||||
const std::unordered_set<Symbol> &bound_symbols,
|
||||
std::vector<Filters::FilterInfo> &all_filters,
|
||||
AstTreeStorage &storage) {
|
||||
auto *filter_expr = ExtractFilters(bound_symbols, all_filters, storage);
|
||||
Filters &filters, AstTreeStorage &storage) {
|
||||
auto *filter_expr = ExtractFilters(bound_symbols, filters, storage);
|
||||
if (filter_expr) {
|
||||
last_op =
|
||||
new Filter(std::shared_ptr<LogicalOperator>(last_op), filter_expr);
|
||||
@ -700,7 +529,7 @@ LogicalOperator *GenCreateForPattern(
|
||||
const SymbolTable &symbol_table,
|
||||
std::unordered_set<Symbol> &bound_symbols) {
|
||||
auto base = [&](NodeAtom *node) -> LogicalOperator * {
|
||||
if (BindSymbol(bound_symbols, symbol_table.at(*node->identifier_)))
|
||||
if (bound_symbols.insert(symbol_table.at(*node->identifier_)).second)
|
||||
return new CreateNode(node, std::shared_ptr<LogicalOperator>(input_op));
|
||||
else
|
||||
return input_op;
|
||||
@ -713,10 +542,10 @@ LogicalOperator *GenCreateForPattern(
|
||||
// If the expand node was already bound, then we need to indicate this,
|
||||
// so that CreateExpand only creates an edge.
|
||||
bool node_existing = false;
|
||||
if (!BindSymbol(bound_symbols, symbol_table.at(*node->identifier_))) {
|
||||
if (!bound_symbols.insert(symbol_table.at(*node->identifier_)).second) {
|
||||
node_existing = true;
|
||||
}
|
||||
if (!BindSymbol(bound_symbols, symbol_table.at(*edge->identifier_))) {
|
||||
if (!bound_symbols.insert(symbol_table.at(*edge->identifier_)).second) {
|
||||
LOG(FATAL) << "Symbols used for created edges cannot be redeclared.";
|
||||
}
|
||||
return new CreateExpand(node, edge,
|
||||
@ -792,238 +621,11 @@ LogicalOperator *GenWith(With &with, LogicalOperator *input_op,
|
||||
// Reset bound symbols, so that only those in WITH are exposed.
|
||||
bound_symbols.clear();
|
||||
for (const auto &symbol : body.output_symbols()) {
|
||||
BindSymbol(bound_symbols, symbol);
|
||||
bound_symbols.insert(symbol);
|
||||
}
|
||||
return last_op;
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
|
||||
// Analyzes the filter expression by collecting information on filtering labels
|
||||
// and properties to be used with indexing. Note that `all_filters_` are never
|
||||
// updated here, but only `label_filters_` and `property_filters_` are.
|
||||
void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
|
||||
auto *&ident) {
|
||||
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
|
||||
(ident = dynamic_cast<Identifier *>(prop_lookup->expression_));
|
||||
};
|
||||
auto add_prop_equal = [&](auto *maybe_lookup, auto *val_expr) {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
val_expr->Accept(collector);
|
||||
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
|
||||
.emplace_back(PropertyFilter{collector.symbols_, val_expr});
|
||||
}
|
||||
};
|
||||
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(expr1, prop_lookup, ident)) {
|
||||
// n.prop > value
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
expr2->Accept(collector);
|
||||
auto prop_filter = PropertyFilter{collector.symbols_};
|
||||
prop_filter.lower_bound = Bound{expr2, bound_type};
|
||||
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
|
||||
.emplace_back(std::move(prop_filter));
|
||||
}
|
||||
if (get_property_lookup(expr2, prop_lookup, ident)) {
|
||||
// value > n.prop
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
expr1->Accept(collector);
|
||||
auto prop_filter = PropertyFilter{collector.symbols_};
|
||||
prop_filter.upper_bound = Bound{expr1, bound_type};
|
||||
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
|
||||
.emplace_back(std::move(prop_filter));
|
||||
}
|
||||
};
|
||||
// We are only interested to see the insides of And, because Or prevents
|
||||
// indexing since any labels and properties found there may be optional.
|
||||
if (auto *and_op = dynamic_cast<AndOperator *>(expr)) {
|
||||
AnalyzeFilter(and_op->expression1_, symbol_table);
|
||||
AnalyzeFilter(and_op->expression2_, symbol_table);
|
||||
} else if (auto *labels_test = dynamic_cast<LabelsTest *>(expr)) {
|
||||
// Since LabelsTest may contain any expression, we can only use the
|
||||
// simplest test on an identifier.
|
||||
if (auto *ident = dynamic_cast<Identifier *>(labels_test->expression_)) {
|
||||
const auto &symbol = symbol_table.at(*ident);
|
||||
label_filters_[symbol].insert(labels_test->labels_.begin(),
|
||||
labels_test->labels_.end());
|
||||
}
|
||||
} else if (auto *eq = dynamic_cast<EqualOperator *>(expr)) {
|
||||
// Try to get property equality test from the top expressions.
|
||||
// Unfortunately, we cannot go deeper inside Equal, because chained equals
|
||||
// need not correspond to And. For example, `(n.prop = value) = false)`:
|
||||
// EQ
|
||||
// / \
|
||||
// EQ false -- top expressions
|
||||
// / \
|
||||
// n.prop value
|
||||
// Here the `prop` may be different than `value` resulting in `false`. This
|
||||
// would compare with the top level `false`, producing `true`. Therefore, it
|
||||
// is incorrect to pick up `n.prop = value` for scanning by property index.
|
||||
add_prop_equal(eq->expression1_, eq->expression2_);
|
||||
// And reversed.
|
||||
add_prop_equal(eq->expression2_, eq->expression1_);
|
||||
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
|
||||
add_prop_greater(gt->expression1_, gt->expression2_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
|
||||
add_prop_greater(ge->expression1_, ge->expression2_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
|
||||
// Like greater, but in reverse.
|
||||
add_prop_greater(lt->expression2_, lt->expression1_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
|
||||
// Like greater equal, but in reverse.
|
||||
add_prop_greater(le->expression2_, le->expression1_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
}
|
||||
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
|
||||
// indexing by range. Note, that the generated Ast uses AND for chained
|
||||
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
|
||||
// as `expr1 < n.prop AND n.prop < expr2`.
|
||||
}
|
||||
|
||||
void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
|
||||
auto add_properties_variable = [&](EdgeAtom *atom) {
|
||||
const auto &symbol = symbol_table.at(*atom->identifier_);
|
||||
for (auto &prop_pair : atom->properties_) {
|
||||
// We need to store two property-lookup filters in all_filters. One is
|
||||
// used for inlining property filters into variable expansion, and
|
||||
// utilizes the inner_edge symbol. The other is used for post-expansion
|
||||
// filtering and does not use the inner_edge symbol, but the edge symbol
|
||||
// (a list of edges).
|
||||
{
|
||||
collector.symbols_.clear();
|
||||
prop_pair.second->Accept(collector);
|
||||
collector.symbols_.emplace(symbol_table.at(*atom->inner_node_));
|
||||
collector.symbols_.emplace(symbol_table.at(*atom->inner_edge_));
|
||||
// First handle the inline property filter.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(atom->inner_edge_, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
all_filters_.emplace_back(FilterInfo{prop_equal, collector.symbols_});
|
||||
}
|
||||
{
|
||||
collector.symbols_.clear();
|
||||
prop_pair.second->Accept(collector);
|
||||
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
|
||||
// Now handle the post-expansion filter.
|
||||
// Create a new identifier and a symbol which will be filled in All.
|
||||
auto *identifier = atom->identifier_->Clone(storage);
|
||||
symbol_table[*identifier] =
|
||||
symbol_table.CreateSymbol(identifier->name_, false);
|
||||
// Create an equality expression and store it in all_filters_.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(identifier, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
all_filters_.emplace_back(
|
||||
FilterInfo{storage.Create<All>(identifier, atom->identifier_,
|
||||
storage.Create<Where>(prop_equal)),
|
||||
collector.symbols_});
|
||||
}
|
||||
}
|
||||
};
|
||||
auto add_properties = [&](auto *atom) {
|
||||
const auto &symbol = symbol_table.at(*atom->identifier_);
|
||||
for (auto &prop_pair : atom->properties_) {
|
||||
collector.symbols_.clear();
|
||||
prop_pair.second->Accept(collector);
|
||||
// Store a PropertyFilter on the value of the property.
|
||||
property_filters_[symbol][prop_pair.first.second].emplace_back(
|
||||
PropertyFilter{collector.symbols_, prop_pair.second});
|
||||
// Create an equality expression and store it in all_filters_.
|
||||
auto *property_lookup =
|
||||
storage.Create<PropertyLookup>(atom->identifier_, prop_pair.first);
|
||||
auto *prop_equal =
|
||||
storage.Create<EqualOperator>(property_lookup, prop_pair.second);
|
||||
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
|
||||
all_filters_.emplace_back(FilterInfo{prop_equal, collector.symbols_});
|
||||
}
|
||||
};
|
||||
auto add_node_filter = [&](NodeAtom *node) {
|
||||
const auto &node_symbol = symbol_table.at(*node->identifier_);
|
||||
if (!node->labels_.empty()) {
|
||||
// Store the filtered labels.
|
||||
label_filters_[node_symbol].insert(node->labels_.begin(),
|
||||
node->labels_.end());
|
||||
// Create a LabelsTest and store it in all_filters_.
|
||||
all_filters_.emplace_back(FilterInfo{
|
||||
storage.Create<LabelsTest>(node->identifier_, node->labels_),
|
||||
std::unordered_set<Symbol>{node_symbol}});
|
||||
}
|
||||
add_properties(node);
|
||||
};
|
||||
auto add_expand_filter = [&](NodeAtom *, EdgeAtom *edge, NodeAtom *node) {
|
||||
if (edge->IsVariable())
|
||||
add_properties_variable(edge);
|
||||
else
|
||||
add_properties(edge);
|
||||
add_node_filter(node);
|
||||
};
|
||||
ForEachPattern(pattern, add_node_filter, add_expand_filter);
|
||||
}
|
||||
|
||||
// Adds the where filter expression to `all_filters_` and collects additional
|
||||
// information for potential property and label indexing.
|
||||
void Filters::CollectWhereFilter(Where &where,
|
||||
const SymbolTable &symbol_table) {
|
||||
auto where_filters = SplitExpressionOnAnd(where.expression_);
|
||||
for (const auto &filter : where_filters) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
filter->Accept(collector);
|
||||
all_filters_.emplace_back(FilterInfo{filter, collector.symbols_});
|
||||
AnalyzeFilter(filter, symbol_table);
|
||||
}
|
||||
}
|
||||
|
||||
// Converts a Query to multiple QueryParts. In the process new Ast nodes may be
|
||||
// created, e.g. filter expressions.
|
||||
std::vector<QueryPart> CollectQueryParts(SymbolTable &symbol_table,
|
||||
AstTreeStorage &storage) {
|
||||
auto query = storage.query();
|
||||
std::vector<QueryPart> query_parts(1);
|
||||
auto *query_part = &query_parts.back();
|
||||
for (auto &clause : query->clauses_) {
|
||||
if (auto *match = dynamic_cast<Match *>(clause)) {
|
||||
if (match->optional_) {
|
||||
query_part->optional_matching.emplace_back(Matching{});
|
||||
AddMatching(*match, symbol_table, storage,
|
||||
query_part->optional_matching.back());
|
||||
} else {
|
||||
DCHECK(query_part->optional_matching.empty())
|
||||
<< "Match clause cannot follow optional match.";
|
||||
AddMatching(*match, symbol_table, storage, query_part->matching);
|
||||
}
|
||||
} else {
|
||||
query_part->remaining_clauses.push_back(clause);
|
||||
if (auto *merge = dynamic_cast<query::Merge *>(clause)) {
|
||||
query_part->merge_matching.emplace_back(Matching{});
|
||||
AddMatching({merge->pattern_}, nullptr, symbol_table, storage,
|
||||
query_part->merge_matching.back());
|
||||
} else if (dynamic_cast<With *>(clause) ||
|
||||
dynamic_cast<query::Unwind *>(clause)) {
|
||||
// This query part is done, continue with a new one.
|
||||
query_parts.emplace_back(QueryPart{});
|
||||
query_part = &query_parts.back();
|
||||
} else if (dynamic_cast<Return *>(clause)) {
|
||||
// TODO: Support RETURN UNION ...
|
||||
return query_parts;
|
||||
}
|
||||
}
|
||||
}
|
||||
return query_parts;
|
||||
}
|
||||
|
||||
} // namespace query::plan
|
||||
|
@ -7,159 +7,12 @@
|
||||
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/plan/preprocess.hpp"
|
||||
|
||||
DECLARE_int64(query_vertex_count_to_expand_existing);
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
/// Normalized representation of a pattern that needs to be matched.
|
||||
struct Expansion {
|
||||
/// The first node in the expansion, it can be a single node.
|
||||
NodeAtom *node1 = nullptr;
|
||||
/// Optional edge which connects the 2 nodes.
|
||||
EdgeAtom *edge = nullptr;
|
||||
/// Direction of the edge, it may be flipped compared to original
|
||||
/// @c EdgeAtom during plan generation.
|
||||
EdgeAtom::Direction direction = EdgeAtom::Direction::BOTH;
|
||||
/// True if the direction and nodes were flipped.
|
||||
bool is_flipped = false;
|
||||
/// Set of symbols found inside the range expressions of a variable path edge.
|
||||
std::unordered_set<Symbol> symbols_in_range{};
|
||||
/// Optional node at the other end of an edge. If the expansion
|
||||
/// contains an edge, then this node is required.
|
||||
NodeAtom *node2 = nullptr;
|
||||
};
|
||||
|
||||
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
|
||||
class Filters {
|
||||
public:
|
||||
/// Stores the symbols and expression used to filter a property.
|
||||
struct PropertyFilter {
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
/// Set of used symbols in the @c expression.
|
||||
std::unordered_set<Symbol> used_symbols;
|
||||
/// Expression which when evaluated produces the value a property must
|
||||
/// equal.
|
||||
Expression *expression = nullptr;
|
||||
std::experimental::optional<Bound> lower_bound{};
|
||||
std::experimental::optional<Bound> upper_bound{};
|
||||
};
|
||||
|
||||
/// Stores additional information for a filter expression.
|
||||
struct FilterInfo {
|
||||
/// The filter expression which must be satisfied.
|
||||
Expression *expression;
|
||||
/// Set of used symbols by the filter @c expression.
|
||||
std::unordered_set<Symbol> used_symbols;
|
||||
};
|
||||
|
||||
/// List of FilterInfo objects corresponding to all filter expressions that
|
||||
/// should be generated.
|
||||
auto &all_filters() { return all_filters_; }
|
||||
const auto &all_filters() const { return all_filters_; }
|
||||
/// Mapping from a symbol to labels that are filtered on it. These should be
|
||||
/// used only for generating indexed scans.
|
||||
const auto &label_filters() const { return label_filters_; }
|
||||
/// Mapping from a symbol to edge types that are filtered on it. These should
|
||||
/// be used for generating indexed expansions.
|
||||
const auto &edge_type_filters() const { return edge_type_filters_; }
|
||||
/// Mapping from a symbol to properties that are filtered on it. These should
|
||||
/// be used only for generating indexed scans.
|
||||
const auto &property_filters() const { return property_filters_; }
|
||||
|
||||
/// Collects filtering information from a pattern.
|
||||
///
|
||||
/// Goes through all the atoms in a pattern and generates filter expressions
|
||||
/// for found labels, properties and edge types. The generated expressions are
|
||||
/// stored in @c all_filters. Also, @c label_filters and @c property_filters
|
||||
/// are populated.
|
||||
void CollectPatternFilters(Pattern &, SymbolTable &, AstTreeStorage &);
|
||||
/// Collects filtering information from a where expression.
|
||||
///
|
||||
/// Takes the where expression and stores it in @c all_filters, then analyzes
|
||||
/// the expression for additional information. The additional information is
|
||||
/// used to populate @c label_filters and @c property_filters, so that indexed
|
||||
/// scanning can use it.
|
||||
void CollectWhereFilter(Where &, const SymbolTable &);
|
||||
|
||||
private:
|
||||
void AnalyzeFilter(Expression *, const SymbolTable &);
|
||||
|
||||
std::vector<FilterInfo> all_filters_;
|
||||
std::unordered_map<Symbol, std::unordered_set<GraphDbTypes::Label>>
|
||||
label_filters_;
|
||||
std::unordered_map<Symbol, std::unordered_set<GraphDbTypes::EdgeType>>
|
||||
edge_type_filters_;
|
||||
std::unordered_map<Symbol, std::unordered_map<GraphDbTypes::Property,
|
||||
std::vector<PropertyFilter>>>
|
||||
property_filters_;
|
||||
};
|
||||
|
||||
/// Normalized representation of a single or multiple Match clauses.
|
||||
///
|
||||
/// For example, `MATCH (a :Label) -[e1]- (b) -[e2]- (c) MATCH (n) -[e3]- (m)
|
||||
/// WHERE c.prop < 42` will produce the following.
|
||||
/// Expansions will store `(a) -[e1]-(b)`, `(b) -[e2]- (c)` and
|
||||
/// `(n) -[e3]- (m)`.
|
||||
/// Edge symbols for Cyphermorphism will only contain the set `{e1, e2}` for the
|
||||
/// first `MATCH` and the set `{e3}` for the second.
|
||||
/// Filters will contain 2 pairs. One for testing `:Label` on symbol `a` and the
|
||||
/// other obtained from `WHERE` on symbol `c`.
|
||||
struct Matching {
|
||||
/// All expansions that need to be performed across @c Match clauses.
|
||||
std::vector<Expansion> expansions;
|
||||
/// Symbols for edges established in match, used to ensure Cyphermorphism.
|
||||
///
|
||||
/// There are multiple sets, because each Match clause determines a single
|
||||
/// set.
|
||||
std::vector<std::unordered_set<Symbol>> edge_symbols;
|
||||
/// Information on used filter expressions while matching.
|
||||
Filters filters;
|
||||
/// Maps node symbols to expansions which bind them.
|
||||
std::unordered_map<Symbol, std::set<int>> node_symbol_to_expansions{};
|
||||
/// Maps named path symbols to a vector of Symbols that define its pattern.
|
||||
std::unordered_map<Symbol, std::vector<Symbol>> named_paths{};
|
||||
/// All node and edge symbols across all expansions (from all matches).
|
||||
std::unordered_set<Symbol> expansion_symbols{};
|
||||
};
|
||||
|
||||
/// @brief Represents a read (+ write) part of a query. Parts are split on
|
||||
/// `WITH` clauses.
|
||||
///
|
||||
/// Each part ends with either:
|
||||
///
|
||||
/// * `RETURN` clause;
|
||||
/// * `WITH` clause or
|
||||
/// * any of the write clauses.
|
||||
///
|
||||
/// For a query `MATCH (n) MERGE (n) -[e]- (m) SET n.x = 42 MERGE (l)` the
|
||||
/// generated QueryPart will have `matching` generated for the `MATCH`.
|
||||
/// `remaining_clauses` will contain `Merge`, `SetProperty` and `Merge` clauses
|
||||
/// in that exact order. The pattern inside the first `MERGE` will be used to
|
||||
/// generate the first `merge_matching` element, and the second `MERGE` pattern
|
||||
/// will produce the second `merge_matching` element. This way, if someone
|
||||
/// traverses `remaining_clauses`, the order of appearance of `Merge` clauses is
|
||||
/// in the same order as their respective `merge_matching` elements.
|
||||
struct QueryPart {
|
||||
/// @brief All `MATCH` clauses merged into one @c Matching.
|
||||
Matching matching;
|
||||
/// @brief Each `OPTIONAL MATCH` converted to @c Matching.
|
||||
std::vector<Matching> optional_matching{};
|
||||
/// @brief @c Matching for each `MERGE` clause.
|
||||
///
|
||||
/// Storing the normalized pattern of a @c Merge does not preclude storing the
|
||||
/// @c Merge clause itself inside `remaining_clauses`. The reason is that we
|
||||
/// need to have access to other parts of the clause, such as `SET` clauses
|
||||
/// which need to be run.
|
||||
///
|
||||
/// Since @c Merge is contained in `remaining_clauses`, this vector contains
|
||||
/// matching in the same order as @c Merge appears.
|
||||
std::vector<Matching> merge_matching{};
|
||||
/// @brief All the remaining clauses (without @c Match).
|
||||
std::vector<Clause *> remaining_clauses{};
|
||||
};
|
||||
|
||||
/// @brief Context which contains variables commonly used during planning.
|
||||
template <class TDbAccessor>
|
||||
struct PlanningContext {
|
||||
@ -205,40 +58,26 @@ struct MatchContext {
|
||||
std::vector<Symbol> new_symbols{};
|
||||
};
|
||||
|
||||
/// @brief Convert the AST to multiple @c QueryParts.
|
||||
///
|
||||
/// This function will normalize patterns inside @c Match and @c Merge clauses
|
||||
/// and do some other preprocessing in order to generate multiple @c QueryPart
|
||||
/// structures. @c AstTreeStorage and @c SymbolTable may be used to create new
|
||||
/// AST nodes.
|
||||
std::vector<QueryPart> CollectQueryParts(SymbolTable &, AstTreeStorage &);
|
||||
|
||||
namespace impl {
|
||||
|
||||
// These functions are an internal implementation of RuleBasedPlanner. To avoid
|
||||
// writing the whole code inline in this header file, they are declared here and
|
||||
// defined in the cpp file.
|
||||
|
||||
bool BindSymbol(std::unordered_set<Symbol> &bound_symbols,
|
||||
const Symbol &symbol);
|
||||
// Iterates over `Filters` joining them in one expression via
|
||||
// `AndOperator` if symbols they use are bound.. All the joined filters are
|
||||
// removed from `Filters`.
|
||||
Expression *ExtractFilters(const std::unordered_set<Symbol> &, Filters &,
|
||||
AstTreeStorage &);
|
||||
|
||||
// Iterates over `all_filters` joining them in one expression via
|
||||
// `AndOperator`. Filters which use unbound symbols are skipped
|
||||
// The function takes a single argument, `FilterInfo`. All the joined filters
|
||||
// are removed from `all_filters`.
|
||||
Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols,
|
||||
std::vector<Filters::FilterInfo> &all_filters,
|
||||
AstTreeStorage &storage);
|
||||
LogicalOperator *GenFilters(LogicalOperator *,
|
||||
const std::unordered_set<Symbol> &, Filters &,
|
||||
AstTreeStorage &);
|
||||
|
||||
LogicalOperator *GenFilters(LogicalOperator *last_op,
|
||||
const std::unordered_set<Symbol> &bound_symbols,
|
||||
std::vector<Filters::FilterInfo> &all_filters,
|
||||
AstTreeStorage &storage);
|
||||
//
|
||||
/// For all given `named_paths` checks if the all it's symbols have been bound.
|
||||
/// If so it creates a logical operator for named path generation, binds it's
|
||||
/// symbol, removes that path from the collection of unhandled ones and returns
|
||||
/// the new op. Otherwise it returns nullptr.
|
||||
// For all given `named_paths` checks if all its symbols have been bound.
|
||||
// If so, it creates a logical operator for named path generation, binds its
|
||||
// symbol, removes that path from the collection of unhandled ones and returns
|
||||
// the new op. Otherwise, returns `nullptr`.
|
||||
LogicalOperator *GenNamedPaths(
|
||||
LogicalOperator *last_op, std::unordered_set<Symbol> &bound_symbols,
|
||||
std::unordered_map<Symbol, std::vector<Symbol>> &named_paths);
|
||||
@ -306,7 +145,7 @@ class RuleBasedPlanner {
|
||||
}
|
||||
int merge_id = 0;
|
||||
for (auto &clause : query_part.remaining_clauses) {
|
||||
DCHECK(dynamic_cast<Match *>(clause) == nullptr)
|
||||
DCHECK(!dynamic_cast<Match *>(clause))
|
||||
<< "Unexpected Match in remaining clauses";
|
||||
if (auto *ret = dynamic_cast<Return *>(clause)) {
|
||||
input_op =
|
||||
@ -332,7 +171,7 @@ class RuleBasedPlanner {
|
||||
} else if (auto *unwind = dynamic_cast<query::Unwind *>(clause)) {
|
||||
const auto &symbol =
|
||||
context.symbol_table.at(*unwind->named_expression_);
|
||||
impl::BindSymbol(context.bound_symbols, symbol);
|
||||
context.bound_symbols.insert(symbol);
|
||||
input_op =
|
||||
new plan::Unwind(std::shared_ptr<LogicalOperator>(input_op),
|
||||
unwind->named_expression_->expression_, symbol);
|
||||
@ -352,59 +191,49 @@ class RuleBasedPlanner {
|
||||
private:
|
||||
TPlanningContext &context_;
|
||||
|
||||
struct LabelPropertyIndex {
|
||||
GraphDbTypes::Label label;
|
||||
// FilterInfo with PropertyFilter.
|
||||
FilterInfo filter;
|
||||
int64_t vertex_count;
|
||||
};
|
||||
|
||||
// Finds the label-property combination which has indexed the lowest amount of
|
||||
// vertices. `best_label` and `best_property` will be set to that combination
|
||||
// and the function will return (`true`, vertex count in index). If the index
|
||||
// cannot be found, the function will return (`false`, maximum int64_t), while
|
||||
// leaving `best_label` and `best_property` unchanged.
|
||||
std::pair<bool, int64_t> FindBestLabelPropertyIndex(
|
||||
const std::unordered_set<GraphDbTypes::Label> &labels,
|
||||
const std::unordered_map<GraphDbTypes::Property,
|
||||
std::vector<Filters::PropertyFilter>>
|
||||
&property_filters,
|
||||
const Symbol &symbol, const std::unordered_set<Symbol> &bound_symbols,
|
||||
GraphDbTypes::Label &best_label,
|
||||
std::pair<GraphDbTypes::Property, Filters::PropertyFilter>
|
||||
&best_property) {
|
||||
// vertices. If the index cannot be found, nullopt is returned.
|
||||
std::experimental::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(
|
||||
const Symbol &symbol, const Filters &filters,
|
||||
const std::unordered_set<Symbol> &bound_symbols) {
|
||||
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
||||
for (const auto &used_symbol : used_symbols) {
|
||||
if (bound_symbols.find(used_symbol) == bound_symbols.end()) {
|
||||
if (!utils::Contains(bound_symbols, used_symbol)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
bool found = false;
|
||||
int64_t min_count = std::numeric_limits<int64_t>::max();
|
||||
for (const auto &label : labels) {
|
||||
for (const auto &prop_pair : property_filters) {
|
||||
const auto &property = prop_pair.first;
|
||||
std::experimental::optional<LabelPropertyIndex> found;
|
||||
for (const auto &label : filters.FilteredLabels(symbol)) {
|
||||
for (const auto &filter : filters.PropertyFilters(symbol)) {
|
||||
const auto &property = filter.property_filter->property_;
|
||||
if (context_.db.LabelPropertyIndexExists(label, property)) {
|
||||
int64_t vertices_count = context_.db.VerticesCount(label, property);
|
||||
if (vertices_count < min_count) {
|
||||
for (const auto &prop_filter : prop_pair.second) {
|
||||
if (prop_filter.used_symbols.find(symbol) !=
|
||||
prop_filter.used_symbols.end()) {
|
||||
// Skip filter expressions which use the symbol whose property
|
||||
// we are looking up. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
|
||||
// so we cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
if (are_bound(prop_filter.used_symbols)) {
|
||||
// Take the first property filter which uses bound symbols.
|
||||
best_label = label;
|
||||
best_property = {property, prop_filter};
|
||||
min_count = vertices_count;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
int64_t vertex_count = context_.db.VerticesCount(label, property);
|
||||
if (!found || vertex_count < found->vertex_count) {
|
||||
if (filter.property_filter->is_symbol_in_value_) {
|
||||
// Skip filter expressions which use the symbol whose property
|
||||
// we are looking up. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
|
||||
// so we cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
if (are_bound(filter.used_symbols)) {
|
||||
// Take the property filter which uses bound symbols.
|
||||
found = LabelPropertyIndex{label, filter, vertex_count};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return {found, min_count};
|
||||
return found;
|
||||
}
|
||||
|
||||
const GraphDbTypes::Label &FindBestLabelIndex(
|
||||
@ -426,45 +255,37 @@ class RuleBasedPlanner {
|
||||
// vertex count in the best index exceeds this number. In such a case,
|
||||
// `nullptr` is returned and `last_op` is not chained.
|
||||
ScanAll *GenScanByIndex(LogicalOperator *last_op, const Symbol &node_symbol,
|
||||
const MatchContext &match_ctx,
|
||||
const MatchContext &match_ctx, Filters &filters,
|
||||
const std::experimental::optional<int64_t>
|
||||
&max_vertex_count = std::experimental::nullopt) {
|
||||
const auto labels =
|
||||
utils::FindOr(match_ctx.matching.filters.label_filters(), node_symbol,
|
||||
std::unordered_set<GraphDbTypes::Label>())
|
||||
.first;
|
||||
const auto labels = filters.FilteredLabels(node_symbol);
|
||||
if (labels.empty()) {
|
||||
// Without labels, we cannot generated any indexed ScanAll.
|
||||
return nullptr;
|
||||
}
|
||||
const auto properties =
|
||||
utils::FindOr(
|
||||
match_ctx.matching.filters.property_filters(), node_symbol,
|
||||
std::unordered_map<GraphDbTypes::Property,
|
||||
std::vector<Filters::PropertyFilter>>())
|
||||
.first;
|
||||
// First, try to see if we can use label+property index. If not, use just
|
||||
// the label index (which ought to exist).
|
||||
GraphDbTypes::Label best_label;
|
||||
std::pair<GraphDbTypes::Property, Filters::PropertyFilter> best_property;
|
||||
auto found_index = FindBestLabelPropertyIndex(
|
||||
labels, properties, node_symbol, match_ctx.bound_symbols, best_label,
|
||||
best_property);
|
||||
if (found_index.first &&
|
||||
auto found_index = FindBestLabelPropertyIndex(node_symbol, filters,
|
||||
match_ctx.bound_symbols);
|
||||
if (found_index &&
|
||||
// Use label+property index if we satisfy max_vertex_count.
|
||||
(!max_vertex_count || *max_vertex_count >= found_index.second)) {
|
||||
const auto &prop_filter = best_property.second;
|
||||
if (prop_filter.lower_bound || prop_filter.upper_bound) {
|
||||
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
|
||||
// Copy the property filter and then erase it from filters.
|
||||
const auto prop_filter = *found_index->filter.property_filter;
|
||||
filters.EraseFilter(found_index->filter);
|
||||
filters.EraseLabelFilter(node_symbol, found_index->label);
|
||||
if (prop_filter.lower_bound_ || prop_filter.upper_bound_) {
|
||||
return new ScanAllByLabelPropertyRange(
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
|
||||
best_property.first, prop_filter.lower_bound,
|
||||
prop_filter.upper_bound, match_ctx.graph_view);
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol,
|
||||
found_index->label, prop_filter.property_, prop_filter.lower_bound_,
|
||||
prop_filter.upper_bound_, match_ctx.graph_view);
|
||||
} else {
|
||||
DCHECK(prop_filter.expression)
|
||||
<< "Property filter should either have bounds or an expression.";
|
||||
DCHECK(prop_filter.value_) << "Property filter should either have "
|
||||
"bounds or a value expression.";
|
||||
return new ScanAllByLabelPropertyValue(
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
|
||||
best_property.first, prop_filter.expression, match_ctx.graph_view);
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol,
|
||||
found_index->label, prop_filter.property_, prop_filter.value_,
|
||||
match_ctx.graph_view);
|
||||
}
|
||||
}
|
||||
auto label = FindBestLabelIndex(labels);
|
||||
@ -474,6 +295,7 @@ class RuleBasedPlanner {
|
||||
// than the allowed count.
|
||||
return nullptr;
|
||||
}
|
||||
filters.EraseLabelFilter(node_symbol, label);
|
||||
return new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
|
||||
node_symbol, label, match_ctx.graph_view);
|
||||
}
|
||||
@ -484,21 +306,20 @@ class RuleBasedPlanner {
|
||||
auto &storage = context_.ast_storage;
|
||||
const auto &symbol_table = match_context.symbol_table;
|
||||
const auto &matching = match_context.matching;
|
||||
// Copy all_filters, because we will modify the list as we generate Filters.
|
||||
auto all_filters = matching.filters.all_filters();
|
||||
// Copy filters, because we will modify them as we generate Filters.
|
||||
auto filters = matching.filters;
|
||||
// Copy the named_paths for the same reason.
|
||||
auto named_paths = matching.named_paths;
|
||||
// Try to generate any filters even before the 1st match operator. This
|
||||
// optimizes the optional match which filters only on symbols bound in
|
||||
// regular match.
|
||||
auto *last_op =
|
||||
impl::GenFilters(input_op, bound_symbols, all_filters, storage);
|
||||
auto *last_op = impl::GenFilters(input_op, bound_symbols, filters, storage);
|
||||
for (const auto &expansion : matching.expansions) {
|
||||
const auto &node1_symbol = symbol_table.at(*expansion.node1->identifier_);
|
||||
if (impl::BindSymbol(bound_symbols, node1_symbol)) {
|
||||
if (bound_symbols.insert(node1_symbol).second) {
|
||||
// We have just bound this symbol, so generate ScanAll which fills it.
|
||||
if (auto *indexed_scan =
|
||||
GenScanByIndex(last_op, node1_symbol, match_context)) {
|
||||
GenScanByIndex(last_op, node1_symbol, match_context, filters)) {
|
||||
// First, try to get an indexed scan.
|
||||
last_op = indexed_scan;
|
||||
} else {
|
||||
@ -508,11 +329,9 @@ class RuleBasedPlanner {
|
||||
node1_symbol, match_context.graph_view);
|
||||
}
|
||||
match_context.new_symbols.emplace_back(node1_symbol);
|
||||
last_op =
|
||||
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
|
||||
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
|
||||
last_op = impl::GenNamedPaths(last_op, bound_symbols, named_paths);
|
||||
last_op =
|
||||
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
|
||||
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
|
||||
}
|
||||
// We have an edge, so generate Expand.
|
||||
if (expansion.edge) {
|
||||
@ -533,28 +352,30 @@ class RuleBasedPlanner {
|
||||
// Bind the inner edge and node symbols so they're available for
|
||||
// inline filtering in ExpandVariable.
|
||||
bool inner_edge_bound =
|
||||
impl::BindSymbol(bound_symbols, inner_edge_symbol);
|
||||
bound_symbols.insert(inner_edge_symbol).second;
|
||||
bool inner_node_bound =
|
||||
impl::BindSymbol(bound_symbols, inner_node_symbol);
|
||||
bound_symbols.insert(inner_node_symbol).second;
|
||||
DCHECK(inner_edge_bound && inner_node_bound)
|
||||
<< "An inner edge and node can't be bound from before";
|
||||
}
|
||||
// Join regular filters with lambda filter expression, so that they
|
||||
// are done inline together. Semantic analysis should guarantee that
|
||||
// lambda filtering uses bound symbols.
|
||||
auto *filter_expr = impl::BoolJoin<AndOperator>(
|
||||
storage,
|
||||
impl::ExtractFilters(bound_symbols, all_filters, storage),
|
||||
storage, impl::ExtractFilters(bound_symbols, filters, storage),
|
||||
edge->filter_expression_);
|
||||
// At this point it's possible we have leftover filters for inline
|
||||
// filtering (they use the inner symbols. If they were not collected,
|
||||
// we have to remove them manually because no other filter-extraction
|
||||
// will ever bind them again.
|
||||
all_filters.erase(
|
||||
std::remove_if(all_filters.begin(), all_filters.end(),
|
||||
[ e = inner_edge_symbol, n = inner_node_symbol ](
|
||||
Filters::FilterInfo & fi) {
|
||||
filters.erase(
|
||||
std::remove_if(filters.begin(), filters.end(),
|
||||
[ e = inner_edge_symbol,
|
||||
n = inner_node_symbol ](FilterInfo & fi) {
|
||||
return utils::Contains(fi.used_symbols, e) ||
|
||||
utils::Contains(fi.used_symbols, n);
|
||||
}),
|
||||
all_filters.end());
|
||||
filters.end());
|
||||
|
||||
last_op = new ExpandVariable(
|
||||
node_symbol, edge_symbol, edge->type_, expansion.direction,
|
||||
@ -572,7 +393,7 @@ class RuleBasedPlanner {
|
||||
// It would be better to somehow test whether the input vertex
|
||||
// degree is larger than the destination vertex index count.
|
||||
auto *indexed_scan =
|
||||
GenScanByIndex(last_op, node_symbol, match_context,
|
||||
GenScanByIndex(last_op, node_symbol, match_context, filters,
|
||||
FLAGS_query_vertex_count_to_expand_existing);
|
||||
if (indexed_scan) {
|
||||
last_op = indexed_scan;
|
||||
@ -586,9 +407,9 @@ class RuleBasedPlanner {
|
||||
}
|
||||
|
||||
// Bind the expanded edge and node.
|
||||
impl::BindSymbol(bound_symbols, edge_symbol);
|
||||
bound_symbols.insert(edge_symbol);
|
||||
match_context.new_symbols.emplace_back(edge_symbol);
|
||||
if (impl::BindSymbol(bound_symbols, node_symbol)) {
|
||||
if (bound_symbols.insert(node_symbol).second) {
|
||||
match_context.new_symbols.emplace_back(node_symbol);
|
||||
}
|
||||
|
||||
@ -612,14 +433,12 @@ class RuleBasedPlanner {
|
||||
other_symbols);
|
||||
}
|
||||
}
|
||||
last_op =
|
||||
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
|
||||
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
|
||||
last_op = impl::GenNamedPaths(last_op, bound_symbols, named_paths);
|
||||
last_op =
|
||||
impl::GenFilters(last_op, bound_symbols, all_filters, storage);
|
||||
last_op = impl::GenFilters(last_op, bound_symbols, filters, storage);
|
||||
}
|
||||
}
|
||||
DCHECK(all_filters.empty()) << "Expected to generate all filters";
|
||||
DCHECK(filters.empty()) << "Expected to generate all filters";
|
||||
return last_op;
|
||||
}
|
||||
|
||||
|
@ -410,7 +410,7 @@ TEST(TestLogicalPlanner, MatchLabeledNodes) {
|
||||
auto dba = dbms.active();
|
||||
auto label = dba->Label("label");
|
||||
QUERY(MATCH(PATTERN(NODE("n", label))), RETURN("n"));
|
||||
CheckPlan(storage, ExpectScanAllByLabel(), ExpectFilter(), ExpectProduce());
|
||||
CheckPlan(storage, ExpectScanAllByLabel(), ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, MatchPathReturn) {
|
||||
@ -1132,7 +1132,7 @@ TEST(TestLogicalPlanner, WhereIndexedLabelProperty) {
|
||||
auto plan = MakeLogicalPlan<RuleBasedPlanner>(planning_context);
|
||||
CheckPlan(*plan, symbol_table,
|
||||
ExpectScanAllByLabelPropertyValue(label, property, lit_42),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, BestPropertyIndexed) {
|
||||
@ -1194,9 +1194,8 @@ TEST(TestLogicalPlanner, MultiPropertyIndexScan) {
|
||||
auto plan = MakeLogicalPlan<RuleBasedPlanner>(planning_context);
|
||||
CheckPlan(*plan, symbol_table,
|
||||
ExpectScanAllByLabelPropertyValue(label1, prop1, lit_1),
|
||||
ExpectFilter(),
|
||||
ExpectScanAllByLabelPropertyValue(label2, prop2, lit_2),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
|
||||
@ -1223,7 +1222,7 @@ TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
|
||||
CheckPlan(*plan, symbol_table,
|
||||
ExpectScanAllByLabelPropertyRange(label, property, lower_bound,
|
||||
upper_bound),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
ExpectProduce());
|
||||
};
|
||||
{
|
||||
// Test relation operators which form an upper bound for range.
|
||||
@ -1373,7 +1372,7 @@ TEST(TestLogicalPlanner, MatchDoubleScanToExpandExisting) {
|
||||
// We expect 2x ScanAll and then Expand, since we are guessing that is
|
||||
// faster (due to low label index vertex count).
|
||||
CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectScanAllByLabel(),
|
||||
ExpectExpand(), ExpectFilter(), ExpectProduce());
|
||||
ExpectExpand(), ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, MatchScanToExpand) {
|
||||
|
Loading…
Reference in New Issue
Block a user