memgraph/src/query/plan/preprocess.cpp
Gareth Andrew Lloyd 9f7118d893
Performance tuning based on stress test (#1572)
Minor changes that speedup the large stress test.
Also now uses a stop token for a more productive shutdown. No need to wait for expensive GC runs.
2024-01-25 17:14:58 +00:00

727 lines
34 KiB
C++

// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include <algorithm>
#include <functional>
#include <stack>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <variant>
#include "query/exceptions.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/ast_visitor.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/preprocess.hpp"
#include "utils/typeinfo.hpp"
namespace memgraph::query::plan {
namespace {
void ForEachPattern(Pattern &pattern, std::function<void(NodeAtom *)> base,
std::function<void(NodeAtom *, EdgeAtom *, NodeAtom *)> collect) {
DMG_ASSERT(!pattern.atoms_.empty(), "Missing atoms in pattern");
auto atoms_it = pattern.atoms_.begin();
auto current_node = utils::Downcast<NodeAtom>(*atoms_it++);
DMG_ASSERT(current_node, "First pattern atom is not a node");
base(current_node);
// Remaining atoms need to follow sequentially as (EdgeAtom, NodeAtom)*
while (atoms_it != pattern.atoms_.end()) {
auto edge = utils::Downcast<EdgeAtom>(*atoms_it++);
DMG_ASSERT(edge, "Expected an edge atom in pattern.");
DMG_ASSERT(atoms_it != pattern.atoms_.end(), "Edge atom should not end the pattern.");
auto prev_node = current_node;
current_node = utils::Downcast<NodeAtom>(*atoms_it++);
DMG_ASSERT(current_node, "Expected a node atom in pattern.");
collect(prev_node, edge, current_node);
}
}
// Converts multiple Patterns to Expansions. Each Pattern can contain an
// arbitrarily long chain of nodes and edges. The conversion to an Expansion is
// done by splitting a pattern into triplets (node1, edge, node2). The triplets
// conserve the semantics of the pattern. For example, in a pattern:
// (m) -[e]- (n) -[f]- (o) the same can be achieved with:
// (m) -[e]- (n), (n) -[f]- (o).
// This representation makes it easier to permute from which node or edge we
// want to start expanding.
std::vector<Expansion> NormalizePatterns(const SymbolTable &symbol_table, const std::vector<Pattern *> &patterns) {
std::vector<Expansion> expansions;
ExpansionGroupId unknown_expansion_group_id = ExpansionGroupId::FromInt(-1);
auto ignore_node = [&](auto *) {};
for (const auto &pattern : patterns) {
if (pattern->atoms_.size() == 1U) {
auto *node = utils::Downcast<NodeAtom>(pattern->atoms_[0]);
DMG_ASSERT(node, "First pattern atom is not a node");
expansions.emplace_back(Expansion{.node1 = node, .expansion_group_id = unknown_expansion_group_id});
} else {
auto collect_expansion = [&](auto *prev_node, auto *edge, auto *current_node) {
UsedSymbolsCollector collector(symbol_table);
if (edge->IsVariable()) {
if (edge->lower_bound_) edge->lower_bound_->Accept(collector);
if (edge->upper_bound_) edge->upper_bound_->Accept(collector);
if (edge->filter_lambda_.expression) edge->filter_lambda_.expression->Accept(collector);
// Remove symbols which are bound by lambda arguments.
collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.inner_edge));
collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.inner_node));
if (edge->filter_lambda_.accumulated_path) {
collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.accumulated_path));
if (edge->filter_lambda_.accumulated_weight) {
collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.accumulated_weight));
}
}
if (edge->type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH ||
edge->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) {
collector.symbols_.erase(symbol_table.at(*edge->weight_lambda_.inner_edge));
collector.symbols_.erase(symbol_table.at(*edge->weight_lambda_.inner_node));
}
}
expansions.emplace_back(Expansion{prev_node, edge, edge->direction_, false, collector.symbols_, current_node,
unknown_expansion_group_id});
};
ForEachPattern(*pattern, ignore_node, collect_expansion);
}
}
return expansions;
}
void AssignExpansionGroupIds(std::vector<Expansion> &expansions, Matching &matching, const SymbolTable &symbol_table) {
ExpansionGroupId next_expansion_group_id = ExpansionGroupId::FromUint(matching.number_of_expansion_groups + 1);
auto assign_expansion_group_id = [&matching, &next_expansion_group_id](Symbol symbol, Expansion &expansion) {
ExpansionGroupId expansion_group_id_to_assign = next_expansion_group_id;
if (matching.node_symbol_to_expansion_group_id.contains(symbol)) {
expansion_group_id_to_assign = matching.node_symbol_to_expansion_group_id[symbol];
}
if (expansion.expansion_group_id.AsInt() == -1 ||
expansion_group_id_to_assign.AsInt() < expansion.expansion_group_id.AsInt()) {
expansion.expansion_group_id = expansion_group_id_to_assign;
}
matching.node_symbol_to_expansion_group_id[symbol] = expansion.expansion_group_id;
};
for (auto &expansion : expansions) {
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
assign_expansion_group_id(node1_sym, expansion);
if (expansion.edge) {
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
assign_expansion_group_id(edge_sym, expansion);
assign_expansion_group_id(node2_sym, expansion);
}
matching.number_of_expansion_groups = matching.number_of_expansion_groups < expansion.expansion_group_id.AsUint()
? expansion.expansion_group_id.AsUint()
: matching.number_of_expansion_groups;
next_expansion_group_id = ExpansionGroupId::FromUint(matching.number_of_expansion_groups + 1);
}
// By the time we finished assigning expansions, no expansion should have its expansion group ID unassigned
for (const auto &expansion : matching.expansions) {
MG_ASSERT(expansion.expansion_group_id.AsInt() != -1, "Expansion group ID is not assigned to the pattern!");
}
}
void CollectEdgeSymbols(std::vector<Expansion> &expansions, Matching &matching, const SymbolTable &symbol_table) {
std::unordered_set<Symbol> edge_symbols;
for (auto &expansion : expansions) {
if (expansion.edge) {
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
// Fill edge symbols for Cyphermorphism.
edge_symbols.insert(edge_sym);
}
}
if (!edge_symbols.empty()) {
matching.edge_symbols.emplace_back(edge_symbols);
}
}
void CollectExpansionSymbols(std::vector<Expansion> &expansions, Matching &matching, const SymbolTable &symbol_table) {
for (auto &expansion : expansions) {
// Map node1 symbol to expansion
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
matching.expansion_symbols.insert(node1_sym);
if (expansion.edge) {
const auto &edge_sym = symbol_table.at(*expansion.edge->identifier_);
matching.expansion_symbols.insert(edge_sym);
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
matching.expansion_symbols.insert(node2_sym);
}
}
}
void AddExpansionsToMatching(std::vector<Expansion> &expansions, Matching &matching, const SymbolTable &symbol_table) {
for (auto &expansion : expansions) {
// Matching may already have some expansions, so offset our index.
const size_t expansion_ix = matching.expansions.size();
const auto &node1_sym = symbol_table.at(*expansion.node1->identifier_);
matching.node_symbol_to_expansions[node1_sym].insert(expansion_ix);
if (expansion.edge) {
const auto &node2_sym = symbol_table.at(*expansion.node2->identifier_);
matching.node_symbol_to_expansions[node2_sym].insert(expansion_ix);
}
matching.expansions.push_back(expansion);
}
}
auto SplitExpressionOnAnd(Expression *expression) {
// TODO: Think about converting all filtering expression into CNF to improve
// the granularity of filters which can be stand alone.
std::vector<Expression *> expressions;
std::stack<Expression *> pending_expressions;
pending_expressions.push(expression);
while (!pending_expressions.empty()) {
auto *current_expression = pending_expressions.top();
pending_expressions.pop();
if (auto *and_op = utils::Downcast<AndOperator>(current_expression)) {
pending_expressions.push(and_op->expression1_);
pending_expressions.push(and_op->expression2_);
} else {
expressions.push_back(current_expression);
}
}
return expressions;
}
} // namespace
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table, const Symbol &symbol, PropertyIx property,
Expression *value, Type type)
: symbol_(symbol), property_(std::move(property)), type_(type), value_(value) {
MG_ASSERT(type != Type::RANGE);
UsedSymbolsCollector collector(symbol_table);
value->Accept(collector);
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table, const Symbol &symbol, PropertyIx property,
const std::optional<PropertyFilter::Bound> &lower_bound,
const std::optional<PropertyFilter::Bound> &upper_bound)
: symbol_(symbol),
property_(std::move(property)),
type_(Type::RANGE),
lower_bound_(lower_bound),
upper_bound_(upper_bound) {
UsedSymbolsCollector collector(symbol_table);
if (lower_bound) {
lower_bound->value()->Accept(collector);
}
if (upper_bound) {
upper_bound->value()->Accept(collector);
}
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
PropertyFilter::PropertyFilter(Symbol symbol, PropertyIx property, Type type)
: symbol_(std::move(symbol)), property_(std::move(property)), type_(type) {
// As this constructor is used for property filters where
// we don't have to evaluate the filter expression, we set
// the is_symbol_in_value_ to false, although the filter
// expression may actually contain the symbol whose property
// we may be looking up.
}
IdFilter::IdFilter(const SymbolTable &symbol_table, const Symbol &symbol, Expression *value)
: symbol_(symbol), value_(value) {
MG_ASSERT(value);
UsedSymbolsCollector collector(symbol_table);
value->Accept(collector);
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
void Filters::EraseFilter(const FilterInfo &filter) {
// TODO: Ideally, we want to determine the equality of both expression trees,
// instead of a simple pointer compare.
all_filters_.erase(std::remove_if(all_filters_.begin(), all_filters_.end(),
[&filter](const auto &f) { return f.expression == filter.expression; }),
all_filters_.end());
}
void Filters::EraseLabelFilter(const Symbol &symbol, const LabelIx &label, std::vector<Expression *> *removed_filters) {
for (auto filter_it = all_filters_.begin(); filter_it != all_filters_.end();) {
if (filter_it->type != FilterInfo::Type::Label) {
++filter_it;
continue;
}
if (!utils::Contains(filter_it->used_symbols, symbol)) {
++filter_it;
continue;
}
auto label_it = std::find(filter_it->labels.begin(), filter_it->labels.end(), label);
if (label_it == filter_it->labels.end()) {
++filter_it;
continue;
}
filter_it->labels.erase(label_it);
DMG_ASSERT(!utils::Contains(filter_it->labels, label), "Didn't expect duplicated labels");
if (filter_it->labels.empty()) {
// If there are no labels to filter, then erase the whole FilterInfo.
if (removed_filters) {
removed_filters->push_back(filter_it->expression);
}
filter_it = all_filters_.erase(filter_it);
} else {
++filter_it;
}
}
}
void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table, AstStorage &storage) {
UsedSymbolsCollector collector(symbol_table);
auto add_properties_variable = [&](EdgeAtom *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
if (auto *properties = std::get_if<std::unordered_map<PropertyIx, Expression *>>(&atom->properties_)) {
for (auto &prop_pair : *properties) {
// We need to store two property-lookup filters in all_filters. One is
// used for inlining property filters into variable expansion, and
// utilizes the inner_edge symbol. The other is used for post-expansion
// filtering and does not use the inner_edge symbol, but the edge symbol
// (a list of edges).
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.inner_node));
collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.inner_edge));
if (atom->filter_lambda_.accumulated_path) {
collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.accumulated_path));
if (atom->filter_lambda_.accumulated_weight) {
collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.accumulated_weight));
}
}
// First handle the inline property filter.
auto *property_lookup = storage.Create<PropertyLookup>(atom->filter_lambda_.inner_edge, prop_pair.first);
auto *prop_equal = storage.Create<EqualOperator>(property_lookup, prop_pair.second);
// Currently, variable expand has no gains if we set PropertyFilter.
all_filters_.emplace_back(FilterInfo{FilterInfo::Type::Generic, prop_equal, collector.symbols_});
}
{
collector.symbols_.clear();
prop_pair.second->Accept(collector);
collector.symbols_.insert(symbol); // PropertyLookup uses the symbol.
// Now handle the post-expansion filter.
// Create a new identifier and a symbol which will be filled in All.
auto *identifier = storage.Create<Identifier>(atom->identifier_->name_, atom->identifier_->user_declared_)
->MapTo(symbol_table.CreateSymbol(atom->identifier_->name_, false));
// Create an equality expression and store it in all_filters_.
auto *property_lookup = storage.Create<PropertyLookup>(identifier, prop_pair.first);
auto *prop_equal = storage.Create<EqualOperator>(property_lookup, prop_pair.second);
// Currently, variable expand has no gains if we set PropertyFilter.
all_filters_.emplace_back(
FilterInfo{FilterInfo::Type::Generic,
storage.Create<All>(identifier, atom->identifier_, storage.Create<Where>(prop_equal)),
collector.symbols_});
}
}
return;
}
throw SemanticException("Property map matching not supported in MATCH/MERGE clause!");
};
auto add_properties = [&](auto *atom) {
const auto &symbol = symbol_table.at(*atom->identifier_);
if (auto *properties = std::get_if<std::unordered_map<PropertyIx, Expression *>>(&atom->properties_)) {
for (auto &prop_pair : *properties) {
// Create an equality expression and store it in all_filters_.
auto *property_lookup = storage.Create<PropertyLookup>(atom->identifier_, prop_pair.first);
auto *prop_equal = storage.Create<EqualOperator>(property_lookup, prop_pair.second);
collector.symbols_.clear();
prop_equal->Accept(collector);
FilterInfo filter_info{FilterInfo::Type::Property, prop_equal, collector.symbols_};
// Store a PropertyFilter on the value of the property.
filter_info.property_filter.emplace(symbol_table, symbol, prop_pair.first, prop_pair.second,
PropertyFilter::Type::EQUAL);
all_filters_.emplace_back(filter_info);
}
return;
}
throw SemanticException("Property map matching not supported in MATCH/MERGE clause!");
};
auto add_node_filter = [&](NodeAtom *node) {
const auto &node_symbol = symbol_table.at(*node->identifier_);
if (!node->labels_.empty()) {
// Create a LabelsTest and store it.
auto *labels_test = storage.Create<LabelsTest>(node->identifier_, node->labels_);
auto label_filter = FilterInfo{FilterInfo::Type::Label, labels_test, std::unordered_set<Symbol>{node_symbol}};
label_filter.labels = node->labels_;
all_filters_.emplace_back(label_filter);
}
add_properties(node);
};
auto add_expand_filter = [&](NodeAtom *, EdgeAtom *edge, NodeAtom *node) {
if (edge->IsVariable())
add_properties_variable(edge);
else
add_properties(edge);
add_node_filter(node);
};
ForEachPattern(pattern, add_node_filter, add_expand_filter);
}
// Adds the where filter expression to `all_filters_` and collects additional
// information for potential property and label indexing.
void Filters::CollectWhereFilter(Where &where, const SymbolTable &symbol_table) {
CollectFilterExpression(where.expression_, symbol_table);
}
// Adds the expression to `all_filters_` and collects additional
// information for potential property and label indexing.
void Filters::CollectFilterExpression(Expression *expr, const SymbolTable &symbol_table) {
auto filters = SplitExpressionOnAnd(expr);
for (const auto &filter : filters) {
AnalyzeAndStoreFilter(filter, symbol_table);
}
}
// Analyzes the filter expression by collecting information on filtering labels
// and properties to be used with indexing.
void Filters::AnalyzeAndStoreFilter(Expression *expr, const SymbolTable &symbol_table) {
using Bound = PropertyFilter::Bound;
UsedSymbolsCollector collector(symbol_table);
expr->Accept(collector);
auto make_filter = [&collector, &expr](FilterInfo::Type type) { return FilterInfo{type, expr, collector.symbols_}; };
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup, auto *&ident) -> bool {
return (prop_lookup = utils::Downcast<PropertyLookup>(maybe_lookup)) &&
(ident = utils::Downcast<Identifier>(prop_lookup->expression_));
};
// Checks if maybe_lookup is a property lookup, stores it as a
// PropertyFilter and returns true. If it isn't, returns false.
auto add_prop_equal = [&](auto *maybe_lookup, auto *val_expr) -> bool {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter = PropertyFilter(symbol_table, symbol_table.at(*ident), prop_lookup->property_, val_expr,
PropertyFilter::Type::EQUAL);
all_filters_.emplace_back(filter);
return true;
}
return false;
};
// Like add_prop_equal, but for adding regex match property filter.
auto add_prop_regex_match = [&](auto *maybe_lookup, auto *val_expr) -> bool {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter = PropertyFilter(symbol_table, symbol_table.at(*ident), prop_lookup->property_, val_expr,
PropertyFilter::Type::REGEX_MATCH);
all_filters_.emplace_back(filter);
return true;
}
return false;
};
// Checks if either the expr1 and expr2 are property lookups, adds them as
// PropertyFilter and returns true. Otherwise, returns false.
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) -> bool {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
bool is_prop_filter = false;
if (get_property_lookup(expr1, prop_lookup, ident)) {
// n.prop > value
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter.emplace(symbol_table, symbol_table.at(*ident), prop_lookup->property_,
Bound(expr2, bound_type), std::nullopt);
all_filters_.emplace_back(filter);
is_prop_filter = true;
}
if (get_property_lookup(expr2, prop_lookup, ident)) {
// value > n.prop
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter.emplace(symbol_table, symbol_table.at(*ident), prop_lookup->property_, std::nullopt,
Bound(expr1, bound_type));
all_filters_.emplace_back(filter);
is_prop_filter = true;
}
return is_prop_filter;
};
// Check if maybe_id_fun is ID invocation on an indentifier and add it as
// IdFilter.
auto add_id_equal = [&](auto *maybe_id_fun, auto *val_expr) -> bool {
auto *id_fun = utils::Downcast<Function>(maybe_id_fun);
if (!id_fun) return false;
if (id_fun->function_name_ != kId) return false;
if (id_fun->arguments_.size() != 1U) return false;
auto *ident = utils::Downcast<Identifier>(id_fun->arguments_.front());
if (!ident) return false;
auto filter = make_filter(FilterInfo::Type::Id);
filter.id_filter.emplace(symbol_table, symbol_table.at(*ident), val_expr);
all_filters_.emplace_back(filter);
return true;
};
// Checks if maybe_lookup is a property lookup, stores it as a
// PropertyFilter and returns true. If it isn't, returns false.
auto add_prop_in_list = [&](auto *maybe_lookup, auto *val_expr) -> bool {
if (!utils::Downcast<ListLiteral>(val_expr)) return false;
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter = PropertyFilter(symbol_table, symbol_table.at(*ident), prop_lookup->property_, val_expr,
PropertyFilter::Type::IN);
all_filters_.emplace_back(filter);
return true;
}
return false;
};
// Checks whether maybe_prop_not_null_check is the null check on a property,
// ("prop IS NOT NULL"), stores it as a PropertyFilter if it is, and returns
// true. If it isn't returns false.
auto add_prop_is_not_null_check = [&](auto *maybe_is_not_null_check) -> bool {
// Strip away the outer NOT operator, and figure out
// whether the inner expression is of the form "prop IS NULL"
if (!maybe_is_not_null_check) {
return false;
}
auto *maybe_is_null_check = utils::Downcast<IsNullOperator>(maybe_is_not_null_check->expression_);
if (!maybe_is_null_check) {
return false;
}
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (!get_property_lookup(maybe_is_null_check->expression_, prop_lookup, ident)) {
return false;
}
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter =
PropertyFilter(symbol_table.at(*ident), prop_lookup->property_, PropertyFilter::Type::IS_NOT_NULL);
all_filters_.emplace_back(filter);
return true;
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
DMG_ASSERT(!utils::IsSubtype(*expr, AndOperator::kType), "Expected AndOperators have been split.");
if (auto *labels_test = utils::Downcast<LabelsTest>(expr)) {
// Since LabelsTest may contain any expression, we can only use the
// simplest test on an identifier.
if (utils::Downcast<Identifier>(labels_test->expression_)) {
auto filter = make_filter(FilterInfo::Type::Label);
filter.labels = labels_test->labels_;
all_filters_.emplace_back(filter);
} else {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *eq = utils::Downcast<EqualOperator>(expr)) {
// Try to get property equality test from the top expressions.
// Unfortunately, we cannot go deeper inside Equal, because chained equals
// need not correspond to And. For example, `(n.prop = value) = false)`:
// EQ
// / \
// EQ false -- top expressions
// / \
// n.prop value
// Here the `prop` may be different than `value` resulting in `false`. This
// would compare with the top level `false`, producing `true`. Therefore, it
// is incorrect to pick up `n.prop = value` for scanning by property index.
bool is_prop_filter = add_prop_equal(eq->expression1_, eq->expression2_);
// And reversed.
is_prop_filter |= add_prop_equal(eq->expression2_, eq->expression1_);
// Try to get ID equality filter.
bool is_id_filter = add_id_equal(eq->expression1_, eq->expression2_);
is_id_filter |= add_id_equal(eq->expression2_, eq->expression1_);
if (!is_prop_filter && !is_id_filter) {
// No special filter was added, so just store a generic filter.
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *regex_match = utils::Downcast<RegexMatch>(expr)) {
if (!add_prop_regex_match(regex_match->string_expr_, regex_match->regex_)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *gt = utils::Downcast<GreaterOperator>(expr)) {
if (!add_prop_greater(gt->expression1_, gt->expression2_, Bound::Type::EXCLUSIVE)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *ge = utils::Downcast<GreaterEqualOperator>(expr)) {
if (!add_prop_greater(ge->expression1_, ge->expression2_, Bound::Type::INCLUSIVE)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *lt = utils::Downcast<LessOperator>(expr)) {
// Like greater, but in reverse.
if (!add_prop_greater(lt->expression2_, lt->expression1_, Bound::Type::EXCLUSIVE)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *le = utils::Downcast<LessEqualOperator>(expr)) {
// Like greater equal, but in reverse.
if (!add_prop_greater(le->expression2_, le->expression1_, Bound::Type::INCLUSIVE)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *in = utils::Downcast<InListOperator>(expr)) {
// IN isn't equivalent to Equal because IN isn't a symmetric operator. The
// IN filter is captured here only if the property lookup occurs on the
// left side of the operator. In that case, it's valid to do the IN list
// optimization during the index lookup rewrite phase.
if (!add_prop_in_list(in->expression1_, in->expression2_)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *is_not_null = utils::Downcast<NotOperator>(expr)) {
if (!add_prop_is_not_null_check(is_not_null)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *exists = utils::Downcast<Exists>(expr)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Pattern));
} else {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
// indexing by range. Note, that the generated Ast uses AND for chained
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
// as `expr1 < n.prop AND n.prop < expr2`.
}
// Fills the given Matching, by converting the Match patterns to normalized
// representation as Expansions. Filters used in the Match are also collected,
// as well as edge symbols which determine Cyphermorphism. Collecting filters
// will lift them out of a pattern and generate new expressions (just like they
// were in a Where clause).
void AddMatching(const std::vector<Pattern *> &patterns, Where *where, SymbolTable &symbol_table, AstStorage &storage,
Matching &matching) {
std::vector<Expansion> expansions = NormalizePatterns(symbol_table, patterns);
// At this point, all of the expansions have the expansion group id of -1
// By the time the assigning is done, all the expansions should have their expansion group id adjusted
AssignExpansionGroupIds(expansions, matching, symbol_table);
// Add edge symbols for every expansion to ensure edge uniqueness
CollectEdgeSymbols(expansions, matching, symbol_table);
// Add all the symbols found in these expansions
CollectExpansionSymbols(expansions, matching, symbol_table);
// Matching is of reference type and needs to append the expansions
AddExpansionsToMatching(expansions, matching, symbol_table);
for (auto *const pattern : patterns) {
matching.filters.CollectPatternFilters(*pattern, symbol_table, storage);
if (pattern->identifier_->user_declared_) {
std::vector<Symbol> path_elements;
for (auto *const pattern_atom : pattern->atoms_)
path_elements.push_back(symbol_table.at(*pattern_atom->identifier_));
matching.named_paths.emplace(symbol_table.at(*pattern->identifier_), std::move(path_elements));
}
}
if (where) {
matching.filters.CollectWhereFilter(*where, symbol_table);
}
}
void AddMatching(const Match &match, SymbolTable &symbol_table, AstStorage &storage, Matching &matching) {
AddMatching(match.patterns_, match.where_, symbol_table, storage, matching);
// If there are any pattern filters, we add those as well
for (auto &filter : matching.filters) {
PatternFilterVisitor visitor(symbol_table, storage);
filter.expression->Accept(visitor);
filter.matchings = visitor.getMatchings();
}
}
void PatternFilterVisitor::Visit(Exists &op) {
std::vector<Pattern *> patterns;
patterns.push_back(op.pattern_);
FilterMatching filter_matching;
AddMatching(patterns, nullptr, symbol_table_, storage_, filter_matching);
filter_matching.type = PatternFilterType::EXISTS;
filter_matching.symbol = std::make_optional<Symbol>(symbol_table_.at(op));
matchings_.push_back(std::move(filter_matching));
}
static void ParseForeach(query::Foreach &foreach, SingleQueryPart &query_part, AstStorage &storage,
SymbolTable &symbol_table) {
for (auto *clause : foreach.clauses_) {
if (auto *merge = utils::Downcast<query::Merge>(clause)) {
query_part.merge_matching.emplace_back(Matching{});
AddMatching({merge->pattern_}, nullptr, symbol_table, storage, query_part.merge_matching.back());
} else if (auto *nested = utils::Downcast<query::Foreach>(clause)) {
ParseForeach(*nested, query_part, storage, symbol_table);
}
}
}
// Converts a Query to multiple QueryParts. In the process new Ast nodes may be
// created, e.g. filter expressions.
std::vector<SingleQueryPart> CollectSingleQueryParts(SymbolTable &symbol_table, AstStorage &storage,
SingleQuery *single_query) {
std::vector<SingleQueryPart> query_parts(1);
auto *query_part = &query_parts.back();
for (auto &clause : single_query->clauses_) {
if (auto *match = utils::Downcast<Match>(clause)) {
if (match->optional_) {
query_part->optional_matching.emplace_back(Matching{});
AddMatching(*match, symbol_table, storage, query_part->optional_matching.back());
} else {
DMG_ASSERT(query_part->optional_matching.empty(), "Match clause cannot follow optional match.");
AddMatching(*match, symbol_table, storage, query_part->matching);
}
} else {
query_part->remaining_clauses.push_back(clause);
if (auto *merge = utils::Downcast<query::Merge>(clause)) {
query_part->merge_matching.emplace_back(Matching{});
AddMatching({merge->pattern_}, nullptr, symbol_table, storage, query_part->merge_matching.back());
} else if (auto *call_subquery = utils::Downcast<query::CallSubquery>(clause)) {
query_part->subqueries.emplace_back(
std::make_shared<QueryParts>(CollectQueryParts(symbol_table, storage, call_subquery->cypher_query_)));
} else if (auto *foreach = utils::Downcast<query::Foreach>(clause)) {
ParseForeach(*foreach, *query_part, storage, symbol_table);
} else if (utils::IsSubtype(*clause, With::kType) || utils::IsSubtype(*clause, query::Unwind::kType) ||
utils::IsSubtype(*clause, query::CallProcedure::kType) ||
utils::IsSubtype(*clause, query::LoadCsv::kType)) {
// This query part is done, continue with a new one.
query_parts.emplace_back(SingleQueryPart{});
query_part = &query_parts.back();
} else if (utils::IsSubtype(*clause, Return::kType)) {
return query_parts;
}
}
}
return query_parts;
}
QueryParts CollectQueryParts(SymbolTable &symbol_table, AstStorage &storage, CypherQuery *query) {
std::vector<QueryPart> query_parts;
auto *single_query = query->single_query_;
MG_ASSERT(single_query, "Expected at least a single query");
query_parts.push_back(QueryPart{CollectSingleQueryParts(symbol_table, storage, single_query)});
bool distinct = false;
for (auto *cypher_union : query->cypher_unions_) {
if (cypher_union->distinct_) {
distinct = true;
}
auto *single_query = cypher_union->single_query_;
MG_ASSERT(single_query, "Expected UNION to have a query");
query_parts.push_back(QueryPart{CollectSingleQueryParts(symbol_table, storage, single_query), cypher_union});
}
return QueryParts{query_parts, distinct};
}
} // namespace memgraph::query::plan