742 lines
25 KiB
C++
742 lines
25 KiB
C++
// Copyright 2023 Memgraph Ltd.
|
|
//
|
|
// Use of this software is governed by the Business Source License
|
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
// License, and you may not use this file except in compliance with the Business Source License.
|
|
//
|
|
// As of the Change Date specified in that file, in accordance with
|
|
// the Business Source License, use of this software will be governed
|
|
// by the Apache License, Version 2.0, included in the file
|
|
// licenses/APL.txt.
|
|
|
|
/// @file
|
|
/// This file provides a plan rewriter which replaces `Filter` and `ScanAll`
|
|
/// operations with `ScanAllBy<Index>` if possible. The public entrypoint is
|
|
/// `RewriteWithIndexLookup`.
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include <gflags/gflags.h>
|
|
|
|
#include "query/plan/operator.hpp"
|
|
#include "query/plan/preprocess.hpp"
|
|
#include "storage/v2/indices.hpp"
|
|
|
|
DECLARE_int64(query_vertex_count_to_expand_existing);
|
|
|
|
namespace memgraph::query::plan {
|
|
|
|
namespace impl {
|
|
|
|
// Return the new root expression after removing the given expressions from the
|
|
// given expression tree.
|
|
Expression *RemoveAndExpressions(Expression *expr, const std::unordered_set<Expression *> &exprs_to_remove);
|
|
|
|
template <class TDbAccessor>
|
|
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|
public:
|
|
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db)
|
|
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
|
|
|
|
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
|
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
|
using HierarchicalLogicalOperatorVisitor::Visit;
|
|
|
|
bool Visit(Once &) override { return true; }
|
|
|
|
bool PreVisit(Filter &op) override {
|
|
prev_ops_.push_back(&op);
|
|
filters_.CollectFilterExpression(op.expression_, *symbol_table_);
|
|
return true;
|
|
}
|
|
|
|
// Remove no longer needed Filter in PostVisit, this should be the last thing
|
|
// Filter::Accept does, so it should be safe to remove the last reference and
|
|
// free the memory.
|
|
bool PostVisit(Filter &op) override {
|
|
prev_ops_.pop_back();
|
|
op.expression_ = RemoveAndExpressions(op.expression_, filter_exprs_for_removal_);
|
|
if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) {
|
|
SetOnParent(op.input());
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAll &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
|
|
// Replace ScanAll with ScanAllBy<Index> in PostVisit, because removal of
|
|
// ScanAll may remove the last reference and thus free the memory. PostVisit
|
|
// should be the last thing ScanAll::Accept does, so it should be safe.
|
|
bool PostVisit(ScanAll &scan) override {
|
|
prev_ops_.pop_back();
|
|
auto indexed_scan = GenScanByIndex(scan);
|
|
if (indexed_scan) {
|
|
SetOnParent(std::move(indexed_scan));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Expand &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
|
|
// See if it might be better to do ScanAllBy<Index> of the destination and
|
|
// then do Expand to existing.
|
|
bool PostVisit(Expand &expand) override {
|
|
prev_ops_.pop_back();
|
|
if (expand.common_.existing_node) {
|
|
return true;
|
|
}
|
|
ScanAll dst_scan(expand.input(), expand.common_.node_symbol, expand.view_);
|
|
auto indexed_scan = GenScanByIndex(dst_scan, FLAGS_query_vertex_count_to_expand_existing);
|
|
if (indexed_scan) {
|
|
expand.set_input(std::move(indexed_scan));
|
|
expand.common_.existing_node = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ExpandVariable &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
|
|
// See if it might be better to do ScanAllBy<Index> of the destination and
|
|
// then do ExpandVariable to existing.
|
|
bool PostVisit(ExpandVariable &expand) override {
|
|
prev_ops_.pop_back();
|
|
if (expand.common_.existing_node) {
|
|
return true;
|
|
}
|
|
std::unique_ptr<ScanAll> indexed_scan;
|
|
ScanAll dst_scan(expand.input(), expand.common_.node_symbol, storage::View::OLD);
|
|
// With expand to existing we only get real gains with BFS, because we use a
|
|
// different algorithm then, so prefer expand to existing.
|
|
if (expand.type_ == EdgeAtom::Type::BREADTH_FIRST) {
|
|
// TODO: Perhaps take average node degree into consideration, instead of
|
|
// unconditionally creating an indexed scan.
|
|
indexed_scan = GenScanByIndex(dst_scan);
|
|
} else {
|
|
indexed_scan = GenScanByIndex(dst_scan, FLAGS_query_vertex_count_to_expand_existing);
|
|
}
|
|
if (indexed_scan) {
|
|
expand.set_input(std::move(indexed_scan));
|
|
expand.common_.existing_node = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// The following operators may only use index lookup in filters inside of
|
|
// their own branches. So we handle them all the same.
|
|
// * Input operator is visited with the current visitor.
|
|
// * Custom operator branches are visited with a new visitor.
|
|
|
|
bool PreVisit(Merge &op) override {
|
|
prev_ops_.push_back(&op);
|
|
op.input()->Accept(*this);
|
|
RewriteBranch(&op.merge_match_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Merge &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Optional &op) override {
|
|
prev_ops_.push_back(&op);
|
|
op.input()->Accept(*this);
|
|
RewriteBranch(&op.optional_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Optional &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
// Rewriting Cartesian assumes that the input plan will have Filter operations
|
|
// as soon as they are possible. Therefore we do not track filters above
|
|
// Cartesian because they should be irrelevant.
|
|
//
|
|
// For example, the following plan is not expected to be an input to
|
|
// IndexLookupRewriter.
|
|
//
|
|
// Filter n.prop = 16
|
|
// |
|
|
// Cartesian
|
|
// |
|
|
// |\
|
|
// | ScanAll (n)
|
|
// |
|
|
// ScanAll (m)
|
|
//
|
|
// Instead, the equivalent set of operations should be done this way:
|
|
//
|
|
// Cartesian
|
|
// |
|
|
// |\
|
|
// | Filter n.prop = 16
|
|
// | |
|
|
// | ScanAll (n)
|
|
// |
|
|
// ScanAll (m)
|
|
bool PreVisit(Cartesian &op) override {
|
|
prev_ops_.push_back(&op);
|
|
RewriteBranch(&op.left_op_);
|
|
RewriteBranch(&op.right_op_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Cartesian &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Union &op) override {
|
|
prev_ops_.push_back(&op);
|
|
RewriteBranch(&op.left_op_);
|
|
RewriteBranch(&op.right_op_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Union &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
// The remaining operators should work by just traversing into their input.
|
|
|
|
bool PreVisit(CreateNode &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(CreateNode &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(CreateExpand &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(CreateExpand &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAllByLabel &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ScanAllByLabel &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAllByLabelPropertyRange &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ScanAllByLabelPropertyRange &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAllByLabelPropertyValue &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ScanAllByLabelPropertyValue &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAllByLabelProperty &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ScanAllByLabelProperty &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ScanAllById &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ScanAllById &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(ConstructNamedPath &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(ConstructNamedPath &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Produce &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Produce &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(EmptyResult &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(EmptyResult &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Delete &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Delete &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(SetProperty &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(SetProperty &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(SetProperties &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(SetProperties &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(SetLabels &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(SetLabels &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(RemoveProperty &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(RemoveProperty &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(RemoveLabels &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(RemoveLabels &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(EdgeUniquenessFilter &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(EdgeUniquenessFilter &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Accumulate &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Accumulate &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Aggregate &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Aggregate &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Skip &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Skip &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Limit &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Limit &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(OrderBy &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(OrderBy &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Unwind &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Unwind &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Distinct &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(Distinct &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(CallProcedure &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
bool PostVisit(CallProcedure &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Foreach &op) override {
|
|
prev_ops_.push_back(&op);
|
|
op.input()->Accept(*this);
|
|
RewriteBranch(&op.update_clauses_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Foreach &) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(EvaluatePatternFilter &op) override {
|
|
prev_ops_.push_back(&op);
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(EvaluatePatternFilter & /*op*/) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
bool PreVisit(Apply &op) override {
|
|
prev_ops_.push_back(&op);
|
|
op.input()->Accept(*this);
|
|
RewriteBranch(&op.subquery_);
|
|
return false;
|
|
}
|
|
|
|
bool PostVisit(Apply & /*op*/) override {
|
|
prev_ops_.pop_back();
|
|
return true;
|
|
}
|
|
|
|
std::shared_ptr<LogicalOperator> new_root_;
|
|
|
|
private:
|
|
SymbolTable *symbol_table_;
|
|
AstStorage *ast_storage_;
|
|
TDbAccessor *db_;
|
|
// Collected filters, pending for examination if they can be used for advanced
|
|
// lookup operations (by index, node ID, ...).
|
|
Filters filters_;
|
|
// Expressions which no longer need a plain Filter operator.
|
|
std::unordered_set<Expression *> filter_exprs_for_removal_;
|
|
std::vector<LogicalOperator *> prev_ops_;
|
|
|
|
struct LabelPropertyIndex {
|
|
LabelIx label;
|
|
// FilterInfo with PropertyFilter.
|
|
FilterInfo filter;
|
|
int64_t vertex_count;
|
|
std::optional<storage::IndexStats> index_stats;
|
|
};
|
|
|
|
bool DefaultPreVisit() override { throw utils::NotYetImplemented("optimizing index lookup"); }
|
|
|
|
void SetOnParent(const std::shared_ptr<LogicalOperator> &input) {
|
|
MG_ASSERT(input);
|
|
if (prev_ops_.empty()) {
|
|
MG_ASSERT(!new_root_);
|
|
new_root_ = input;
|
|
return;
|
|
}
|
|
prev_ops_.back()->set_input(input);
|
|
}
|
|
|
|
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
|
|
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_);
|
|
(*branch)->Accept(rewriter);
|
|
if (rewriter.new_root_) {
|
|
*branch = rewriter.new_root_;
|
|
}
|
|
}
|
|
|
|
storage::LabelId GetLabel(LabelIx label) { return db_->NameToLabel(label.name); }
|
|
|
|
storage::PropertyId GetProperty(PropertyIx prop) { return db_->NameToProperty(prop.name); }
|
|
|
|
std::optional<LabelIx> FindBestLabelIndex(const std::unordered_set<LabelIx> &labels) {
|
|
MG_ASSERT(!labels.empty(), "Trying to find the best label without any labels.");
|
|
std::optional<LabelIx> best_label;
|
|
for (const auto &label : labels) {
|
|
if (!db_->LabelIndexExists(GetLabel(label))) continue;
|
|
if (!best_label) {
|
|
best_label = label;
|
|
continue;
|
|
}
|
|
if (db_->VerticesCount(GetLabel(label)) < db_->VerticesCount(GetLabel(*best_label))) best_label = label;
|
|
}
|
|
return best_label;
|
|
}
|
|
|
|
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
|
|
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average group
|
|
// size based on key distribution. If average group size is equal, choose the index that has distribution closer to
|
|
// uniform distribution. Conditions based on average group size and key distribution can be only taken into account if
|
|
// the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
|
|
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
|
|
const std::unordered_set<Symbol> &bound_symbols) {
|
|
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
|
for (const auto &used_symbol : used_symbols) {
|
|
if (!utils::Contains(bound_symbols, used_symbol)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
|
|
/*
|
|
* Comparator function between two indices. If new index has >= 10x vertices than the existing, it cannot be better.
|
|
* If it is <= 10x in number of vertices, check average group size of property values. The index with smaller
|
|
* average group size is better. If the average group size is the same, choose the one closer to the uniform
|
|
* distribution
|
|
* @param found: Current best label-property index.
|
|
* @param new_stats: Label-property index candidate.
|
|
* @param vertex_count: New index's number of vertices.
|
|
* @return -1 if the new index is better, 0 if they are equal and 1 if the existing one is better.
|
|
*/
|
|
auto compare_indices = [](std::optional<LabelPropertyIndex> &found, std::optional<storage::IndexStats> &new_stats,
|
|
int vertex_count) {
|
|
if (!new_stats.has_value() || vertex_count / 10.0 > found->vertex_count) {
|
|
return 1;
|
|
}
|
|
int cmp_avg_group = utils::CompareDecimal(new_stats->avg_group_size, found->index_stats->avg_group_size);
|
|
if (cmp_avg_group != 0) return cmp_avg_group;
|
|
return utils::CompareDecimal(new_stats->statistic, found->index_stats->statistic);
|
|
};
|
|
|
|
std::optional<LabelPropertyIndex> found;
|
|
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
|
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
|
if (filter.property_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) {
|
|
// Skip filter expressions which use the symbol whose property we are
|
|
// looking up or aren't bound. We cannot scan by such expressions. For
|
|
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
|
|
// cannot scan `n` by property index.
|
|
continue;
|
|
}
|
|
const auto &property = filter.property_filter->property_;
|
|
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
|
|
continue;
|
|
}
|
|
auto is_better_type = [&found](PropertyFilter::Type type) {
|
|
// Order the types by the most preferred index lookup type.
|
|
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
|
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, PropertyFilter::Type::REGEX_MATCH};
|
|
auto *found_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, found->filter.property_filter->type_);
|
|
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
|
return type_sort_ix < found_sort_ix;
|
|
};
|
|
|
|
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
|
std::optional<storage::IndexStats> new_stats = db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
|
|
|
// Conditions, from more to less important:
|
|
// the index with 10x less vertices is better.
|
|
// the index with smaller average group size is better.
|
|
// the index with equal avg group size and distribution closer to the uniform is better.
|
|
// the index with less vertices is better.
|
|
// the index with same number of vertices but more optimized filter is better.
|
|
if (!found || vertex_count * 10 < found->vertex_count) {
|
|
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
|
continue;
|
|
}
|
|
|
|
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
|
|
cmp_res == -1 ||
|
|
cmp_res == 0 && (found->vertex_count > vertex_count ||
|
|
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
|
|
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
|
}
|
|
}
|
|
}
|
|
return found;
|
|
}
|
|
// Creates a ScanAll by the best possible index for the `node_symbol`. If the node
|
|
// does not have at least a label, no indexed lookup can be created and
|
|
// `nullptr` is returned. The operator is chained after `input`. Optional
|
|
// `max_vertex_count` controls, whether no operator should be created if the
|
|
// vertex count in the best index exceeds this number. In such a case,
|
|
// `nullptr` is returned and `input` is not chained.
|
|
std::unique_ptr<ScanAll> GenScanByIndex(const ScanAll &scan,
|
|
const std::optional<int64_t> &max_vertex_count = std::nullopt) {
|
|
const auto &input = scan.input();
|
|
const auto &node_symbol = scan.output_symbol_;
|
|
const auto &view = scan.view_;
|
|
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
|
|
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(), modified_symbols.end());
|
|
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
|
for (const auto &used_symbol : used_symbols) {
|
|
if (!utils::Contains(bound_symbols, used_symbol)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
// First, try to see if we can find a vertex by ID.
|
|
if (!max_vertex_count || *max_vertex_count >= 1) {
|
|
for (const auto &filter : filters_.IdFilters(node_symbol)) {
|
|
if (filter.id_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) continue;
|
|
auto *value = filter.id_filter->value_;
|
|
filter_exprs_for_removal_.insert(filter.expression);
|
|
filters_.EraseFilter(filter);
|
|
return std::make_unique<ScanAllById>(input, node_symbol, value, view);
|
|
}
|
|
}
|
|
// Now try to see if we can use label+property index. If not, try to use
|
|
// just the label index.
|
|
const auto labels = filters_.FilteredLabels(node_symbol);
|
|
if (labels.empty()) {
|
|
// Without labels, we cannot generate any indexed ScanAll.
|
|
return nullptr;
|
|
}
|
|
auto found_index = FindBestLabelPropertyIndex(node_symbol, bound_symbols);
|
|
if (found_index &&
|
|
// Use label+property index if we satisfy max_vertex_count.
|
|
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
|
|
// Copy the property filter and then erase it from filters.
|
|
const auto prop_filter = *found_index->filter.property_filter;
|
|
if (prop_filter.type_ != PropertyFilter::Type::REGEX_MATCH) {
|
|
// Remove the original expression from Filter operation only if it's not
|
|
// a regex match. In such a case we need to perform the matching even
|
|
// after we've scanned the index.
|
|
filter_exprs_for_removal_.insert(found_index->filter.expression);
|
|
}
|
|
filters_.EraseFilter(found_index->filter);
|
|
std::vector<Expression *> removed_expressions;
|
|
filters_.EraseLabelFilter(node_symbol, found_index->label, &removed_expressions);
|
|
filter_exprs_for_removal_.insert(removed_expressions.begin(), removed_expressions.end());
|
|
if (prop_filter.lower_bound_ || prop_filter.upper_bound_) {
|
|
return std::make_unique<ScanAllByLabelPropertyRange>(
|
|
input, node_symbol, GetLabel(found_index->label), GetProperty(prop_filter.property_),
|
|
prop_filter.property_.name, prop_filter.lower_bound_, prop_filter.upper_bound_, view);
|
|
} else if (prop_filter.type_ == PropertyFilter::Type::REGEX_MATCH) {
|
|
// Generate index scan using the empty string as a lower bound.
|
|
Expression *empty_string = ast_storage_->Create<PrimitiveLiteral>("");
|
|
auto lower_bound = utils::MakeBoundInclusive(empty_string);
|
|
return std::make_unique<ScanAllByLabelPropertyRange>(
|
|
input, node_symbol, GetLabel(found_index->label), GetProperty(prop_filter.property_),
|
|
prop_filter.property_.name, std::make_optional(lower_bound), std::nullopt, view);
|
|
} else if (prop_filter.type_ == PropertyFilter::Type::IN) {
|
|
// TODO(buda): ScanAllByLabelProperty + Filter should be considered
|
|
// here once the operator and the right cardinality estimation exist.
|
|
auto const &symbol = symbol_table_->CreateAnonymousSymbol();
|
|
auto *expression = ast_storage_->Create<Identifier>(symbol.name_);
|
|
expression->MapTo(symbol);
|
|
auto unwind_operator = std::make_unique<Unwind>(input, prop_filter.value_, symbol);
|
|
return std::make_unique<ScanAllByLabelPropertyValue>(
|
|
std::move(unwind_operator), node_symbol, GetLabel(found_index->label), GetProperty(prop_filter.property_),
|
|
prop_filter.property_.name, expression, view);
|
|
} else if (prop_filter.type_ == PropertyFilter::Type::IS_NOT_NULL) {
|
|
return std::make_unique<ScanAllByLabelProperty>(input, node_symbol, GetLabel(found_index->label),
|
|
GetProperty(prop_filter.property_), prop_filter.property_.name,
|
|
view);
|
|
} else {
|
|
MG_ASSERT(prop_filter.value_, "Property filter should either have bounds or a value expression.");
|
|
return std::make_unique<ScanAllByLabelPropertyValue>(input, node_symbol, GetLabel(found_index->label),
|
|
GetProperty(prop_filter.property_),
|
|
prop_filter.property_.name, prop_filter.value_, view);
|
|
}
|
|
}
|
|
auto maybe_label = FindBestLabelIndex(labels);
|
|
if (!maybe_label) return nullptr;
|
|
const auto &label = *maybe_label;
|
|
if (max_vertex_count && db_->VerticesCount(GetLabel(label)) > *max_vertex_count) {
|
|
// Don't create an indexed lookup, since we have more labeled vertices
|
|
// than the allowed count.
|
|
return nullptr;
|
|
}
|
|
std::vector<Expression *> removed_expressions;
|
|
filters_.EraseLabelFilter(node_symbol, label, &removed_expressions);
|
|
filter_exprs_for_removal_.insert(removed_expressions.begin(), removed_expressions.end());
|
|
return std::make_unique<ScanAllByLabel>(input, node_symbol, GetLabel(label), view);
|
|
}
|
|
};
|
|
|
|
} // namespace impl
|
|
|
|
template <class TDbAccessor>
|
|
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(std::unique_ptr<LogicalOperator> root_op,
|
|
SymbolTable *symbol_table, AstStorage *ast_storage,
|
|
TDbAccessor *db) {
|
|
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db);
|
|
root_op->Accept(rewriter);
|
|
if (rewriter.new_root_) {
|
|
// This shouldn't happen in real use case, because IndexLookupRewriter
|
|
// removes Filter operations and they cannot be the root op. In case we
|
|
// somehow missed this, raise NotYetImplemented instead of MG_ASSERT
|
|
// crashing the application.
|
|
throw utils::NotYetImplemented("optimizing index lookup");
|
|
}
|
|
return root_op;
|
|
}
|
|
|
|
} // namespace memgraph::query::plan
|