Move creating indexed lookup to a rewrite pass

Summary:
RuleBasedPlanner now generates only the regular ScanAll operations, and
Filter operations are appended as soon as possible. The newly added
Rewrite step, takes this operator tree and replaces viable Filter &
ScanAll operators with appropriate ScanAllBy<Index> operator. This
change ought to simplify the behaviour of DistributedPlanner when that
stage is moved before the indexed lookup rewrite.

Showing unoptimized plan in interactive planner is also supported.

Reviewers: mtomic, llugovic

Reviewed By: mtomic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1839
This commit is contained in:
Teon Banek 2019-02-05 16:12:02 +01:00
parent 8a508a1cb6
commit e461a08340
14 changed files with 709 additions and 192 deletions

View File

@ -38,6 +38,7 @@ set(mg_single_node_sources
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rewrite/index_lookup.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp
@ -144,6 +145,7 @@ set(mg_distributed_sources
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rewrite/index_lookup.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp
@ -274,6 +276,7 @@ set(mg_single_node_ha_sources
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rewrite/index_lookup.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp

View File

@ -73,7 +73,8 @@ class DistributedPostProcessor final {
template <class TPlanningContext>
plan::DistributedPlan Rewrite(std::unique_ptr<plan::LogicalOperator> plan,
TPlanningContext *context) {
original_plan_ = std::move(plan);
plan::PostProcessor post_processor(parameters_);
original_plan_ = post_processor.Rewrite(std::move(plan), context);
const auto &property_names = context->ast_storage->properties_;
std::vector<storage::Property> properties_by_ix;
properties_by_ix.reserve(property_names.size());

View File

@ -10,6 +10,7 @@
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
#include "query/plan/pretty_print.hpp"
#include "query/plan/rewrite/index_lookup.hpp"
#include "query/plan/rule_based_planner.hpp"
#include "query/plan/variable_start_planner.hpp"
#include "query/plan/vertex_count_cache.hpp"
@ -33,7 +34,8 @@ class PostProcessor final {
template <class TPlanningContext>
std::unique_ptr<LogicalOperator> Rewrite(
std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
return plan;
return RewriteWithIndexLookup(std::move(plan), *context->symbol_table,
context->db);
}
template <class TVertexCounts>

View File

@ -188,7 +188,8 @@ void Filters::EraseFilter(const FilterInfo &filter) {
all_filters_.end());
}
void Filters::EraseLabelFilter(const Symbol &symbol, LabelIx label) {
void Filters::EraseLabelFilter(const Symbol &symbol, LabelIx label,
std::vector<Expression *> *removed_filters) {
for (auto filter_it = all_filters_.begin();
filter_it != all_filters_.end();) {
if (filter_it->type != FilterInfo::Type::Label) {
@ -210,6 +211,9 @@ void Filters::EraseLabelFilter(const Symbol &symbol, LabelIx label) {
<< "Didn't expect duplicated labels";
if (filter_it->labels.empty()) {
// If there are no labels to filter, then erase the whole FilterInfo.
if (removed_filters) {
removed_filters->push_back(filter_it->expression);
}
filter_it = all_filters_.erase(filter_it);
} else {
++filter_it;
@ -315,8 +319,15 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
// information for potential property and label indexing.
void Filters::CollectWhereFilter(Where &where,
const SymbolTable &symbol_table) {
auto where_filters = SplitExpressionOnAnd(where.expression_);
for (const auto &filter : where_filters) {
CollectFilterExpression(where.expression_, symbol_table);
}
// Adds the expression to `all_filters_` and collects additional
// information for potential property and label indexing.
void Filters::CollectFilterExpression(Expression *expr,
const SymbolTable &symbol_table) {
auto filters = SplitExpressionOnAnd(expr);
for (const auto &filter : filters) {
AnalyzeAndStoreFilter(filter, symbol_table);
}
}

View File

@ -121,7 +121,7 @@ struct FilterInfo {
///
/// Info is stored as a list of FilterInfo objects corresponding to all filter
/// expressions that should be generated.
class Filters {
class Filters final {
public:
using iterator = std::vector<FilterInfo>::iterator;
using const_iterator = std::vector<FilterInfo>::const_iterator;
@ -147,7 +147,7 @@ class Filters {
for (const auto &filter : all_filters_) {
if (filter.type == FilterInfo::Type::Label &&
utils::Contains(filter.used_symbols, symbol)) {
DCHECK(filter.used_symbols.size() == 1U)
CHECK(filter.used_symbols.size() == 1U)
<< "Expected a single used symbol for label filter";
labels.insert(filter.labels.begin(), filter.labels.end());
}
@ -155,15 +155,18 @@ class Filters {
return labels;
}
// Remove a filter; may invalidate iterators.
// Removal is done by comparing only the expression, so that multiple
// FilterInfo objects using the same original expression are removed.
/// Remove a filter; may invalidate iterators.
/// Removal is done by comparing only the expression, so that multiple
/// FilterInfo objects using the same original expression are removed.
void EraseFilter(const FilterInfo &);
// Remove a label filter for symbol; may invalidate iterators.
void EraseLabelFilter(const Symbol &, LabelIx);
/// Remove a label filter for symbol; may invalidate iterators.
/// If removed_filters is not nullptr, fills the vector with original
/// `Expression *` which are now completely removed.
void EraseLabelFilter(const Symbol &, LabelIx,
std::vector<Expression *> *removed_filters = nullptr);
// Returns a vector of FilterInfo for properties.
/// Returns a vector of FilterInfo for properties.
auto PropertyFilters(const Symbol &symbol) const {
std::vector<FilterInfo> filters;
for (const auto &filter : all_filters_) {
@ -181,6 +184,7 @@ class Filters {
/// for found labels, properties and edge types. The generated expressions are
/// stored.
void CollectPatternFilters(Pattern &, SymbolTable &, AstStorage &);
/// Collects filtering information from a where expression.
///
/// Takes the where expression and stores it, then analyzes the expression for
@ -188,6 +192,13 @@ class Filters {
/// label filters and property filters, so that indexed scanning can use it.
void CollectWhereFilter(Where &, const SymbolTable &);
/// Collects filtering information from an expression.
///
/// Takes the where expression and stores it, then analyzes the expression for
/// additional information. The additional information is used to populate
/// label filters and property filters, so that indexed scanning can use it.
void CollectFilterExpression(Expression *, const SymbolTable &);
private:
void AnalyzeAndStoreFilter(Expression *, const SymbolTable &);

View File

@ -0,0 +1,43 @@
#include "query/plan/rewrite/index_lookup.hpp"
#include "utils/flag_validation.hpp"
DEFINE_VALIDATED_HIDDEN_int64(
query_vertex_count_to_expand_existing, 10,
"Maximum count of indexed vertices which provoke "
"indexed lookup and then expand to existing, instead of "
"a regular expand. Default is 10, to turn off use -1.",
FLAG_IN_RANGE(-1, std::numeric_limits<std::int64_t>::max()));
namespace query::plan::impl {
Expression *RemoveAndExpressions(
Expression *expr, const std::unordered_set<Expression *> &exprs_to_remove) {
auto *and_op = utils::Downcast<AndOperator>(expr);
if (!and_op) return expr;
if (utils::Contains(exprs_to_remove, and_op)) {
return nullptr;
}
if (utils::Contains(exprs_to_remove, and_op->expression1_)) {
and_op->expression1_ = nullptr;
}
if (utils::Contains(exprs_to_remove, and_op->expression2_)) {
and_op->expression2_ = nullptr;
}
and_op->expression1_ =
RemoveAndExpressions(and_op->expression1_, exprs_to_remove);
and_op->expression2_ =
RemoveAndExpressions(and_op->expression2_, exprs_to_remove);
if (!and_op->expression1_ && !and_op->expression2_) {
return nullptr;
}
if (and_op->expression1_ && !and_op->expression2_) {
return and_op->expression1_;
}
if (and_op->expression2_ && !and_op->expression1_) {
return and_op->expression2_;
}
return and_op;
}
} // namespace query::plan::impl

View File

@ -0,0 +1,560 @@
/// @file
/// This file provides a plan rewriter which replaces `Filter` and `ScanAll`
/// operations with `ScanAllBy<Index>` if possible. The public entrypoint is
/// `RewriteWithIndexLookup`.
#pragma once
#include <algorithm>
#include <experimental/optional>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <gflags/gflags.h>
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
DECLARE_int64(query_vertex_count_to_expand_existing);
namespace query::plan {
namespace impl {
// Return the new root expression after removing the given expressions from the
// given expression tree.
Expression *RemoveAndExpressions(
Expression *expr, const std::unordered_set<Expression *> &exprs_to_remove);
template <class TDbAccessor>
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
public:
IndexLookupRewriter(const SymbolTable *symbol_table, TDbAccessor *db)
: symbol_table_(symbol_table), db_(db) {}
using HierarchicalLogicalOperatorVisitor::PostVisit;
using HierarchicalLogicalOperatorVisitor::PreVisit;
using HierarchicalLogicalOperatorVisitor::Visit;
bool Visit(Once &) override { return true; }
bool PreVisit(Filter &op) override {
prev_ops_.push_back(&op);
filters_.CollectFilterExpression(op.expression_, *symbol_table_);
return true;
}
// Remove no longer needed Filter in PostVisit, this should be the last thing
// Filter::Accept does, so it should be safe to remove the last reference and
// free the memory.
bool PostVisit(Filter &op) override {
prev_ops_.pop_back();
op.expression_ =
RemoveAndExpressions(op.expression_, filter_exprs_for_removal_);
if (!op.expression_ ||
utils::Contains(filter_exprs_for_removal_, op.expression_)) {
SetOnParent(op.input());
}
return true;
}
bool PreVisit(ScanAll &op) override {
prev_ops_.push_back(&op);
return true;
}
// Replace ScanAll with ScanAllBy<Index> in PostVisit, because removal of
// ScanAll may remove the last reference and thus free the memory. PostVisit
// should be the last thing ScanAll::Accept does, so it should be safe.
bool PostVisit(ScanAll &scan) override {
prev_ops_.pop_back();
auto indexed_scan = GenScanByIndex(scan);
if (indexed_scan) {
SetOnParent(std::move(indexed_scan));
}
return true;
}
bool PreVisit(Expand &op) override {
prev_ops_.push_back(&op);
return true;
}
// See if it might be better to do ScanAllBy<Index> of the destination and
// then do Expand to existing.
bool PostVisit(Expand &expand) override {
prev_ops_.pop_back();
if (expand.common_.existing_node) {
return true;
}
ScanAll dst_scan(expand.input(), expand.common_.node_symbol,
expand.graph_view_);
auto indexed_scan =
GenScanByIndex(dst_scan, FLAGS_query_vertex_count_to_expand_existing);
if (indexed_scan) {
expand.set_input(std::move(indexed_scan));
expand.common_.existing_node = true;
}
return true;
}
// The following operators may only use index lookup in filters inside of
// their own branches. So we handle them all the same.
// * Input operator is visited with the current visitor.
// * Custom operator branches are visited with a new visitor.
bool PreVisit(Merge &op) override {
prev_ops_.push_back(&op);
op.input()->Accept(*this);
RewriteBranch(&op.merge_match_);
return false;
}
bool PostVisit(Merge &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Optional &op) override {
prev_ops_.push_back(&op);
op.input()->Accept(*this);
RewriteBranch(&op.optional_);
return false;
}
bool PostVisit(Optional &) override {
prev_ops_.pop_back();
return true;
}
// Rewriting Cartesian assumes that the input plan will have Filter operations
// as soon as they are possible. Therefore we do not track filters above
// Cartesian because they should be irrelevant.
//
// For example, the following plan is not expected to be an input to
// IndexLookupRewriter.
//
// Filter n.prop = 16
// |
// Cartesian
// |
// |\
// | ScanAll (n)
// |
// ScanAll (m)
//
// Instead, the equivalent set of operations should be done this way:
//
// Cartesian
// |
// |\
// | Filter n.prop = 16
// | |
// | ScanAll (n)
// |
// ScanAll (m)
bool PreVisit(Cartesian &op) override {
prev_ops_.push_back(&op);
RewriteBranch(&op.left_op_);
RewriteBranch(&op.right_op_);
return false;
}
bool PostVisit(Cartesian &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Union &op) override {
prev_ops_.push_back(&op);
RewriteBranch(&op.left_op_);
RewriteBranch(&op.right_op_);
return false;
}
bool PostVisit(Union &) override {
prev_ops_.pop_back();
return true;
}
// The remaining operators should work by just traversing into their input.
bool PreVisit(CreateNode &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(CreateNode &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(CreateExpand &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(CreateExpand &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ScanAllByLabel &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ScanAllByLabel &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ScanAllByLabelPropertyRange &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ScanAllByLabelPropertyRange &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ScanAllByLabelPropertyValue &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ScanAllByLabelPropertyValue &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ExpandVariable &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ExpandVariable &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ConstructNamedPath &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ConstructNamedPath &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Produce &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Produce &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Delete &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Delete &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(SetProperty &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(SetProperty &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(SetProperties &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(SetProperties &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(SetLabels &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(SetLabels &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(RemoveProperty &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(RemoveProperty &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(RemoveLabels &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(RemoveLabels &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(EdgeUniquenessFilter &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(EdgeUniquenessFilter &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Accumulate &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Accumulate &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Aggregate &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Aggregate &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Skip &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Skip &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Limit &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Limit &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(OrderBy &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(OrderBy &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Unwind &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Unwind &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(Distinct &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(Distinct &) override {
prev_ops_.pop_back();
return true;
}
std::shared_ptr<LogicalOperator> new_root_;
private:
const SymbolTable *symbol_table_;
TDbAccessor *db_;
Filters filters_;
std::unordered_set<Expression *> filter_exprs_for_removal_;
std::vector<LogicalOperator *> prev_ops_;
struct LabelPropertyIndex {
LabelIx label;
// FilterInfo with PropertyFilter.
FilterInfo filter;
int64_t vertex_count;
};
bool DefaultPreVisit() override {
throw utils::NotYetImplemented("optimizing index lookup");
}
void SetOnParent(const std::shared_ptr<LogicalOperator> &input) {
CHECK(input);
if (prev_ops_.empty()) {
CHECK(!new_root_);
new_root_ = input;
return;
}
prev_ops_.back()->set_input(input);
}
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, db_);
(*branch)->Accept(rewriter);
if (rewriter.new_root_) {
*branch = rewriter.new_root_;
}
}
storage::Label GetLabel(LabelIx label) { return db_->Label(label.name); }
storage::Property GetProperty(PropertyIx prop) {
return db_->Property(prop.name);
}
LabelIx FindBestLabelIndex(const std::unordered_set<LabelIx> &labels) {
CHECK(!labels.empty())
<< "Trying to find the best label without any labels.";
return *std::min_element(labels.begin(), labels.end(),
[this](const auto &label1, const auto &label2) {
return db_->VerticesCount(GetLabel(label1)) <
db_->VerticesCount(GetLabel(label2));
});
}
// Finds the label-property combination which has indexed the lowest amount of
// vertices. If the index cannot be found, nullopt is returned.
std::experimental::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(
const Symbol &symbol, const std::unordered_set<Symbol> &bound_symbols) {
auto are_bound = [&bound_symbols](const auto &used_symbols) {
for (const auto &used_symbol : used_symbols) {
if (!utils::Contains(bound_symbols, used_symbol)) {
return false;
}
}
return true;
};
std::experimental::optional<LabelPropertyIndex> found;
for (const auto &label : filters_.FilteredLabels(symbol)) {
for (const auto &filter : filters_.PropertyFilters(symbol)) {
const auto &property = filter.property_filter->property_;
if (db_->LabelPropertyIndexExists(GetLabel(label),
GetProperty(property))) {
int64_t vertex_count =
db_->VerticesCount(GetLabel(label), GetProperty(property));
if (!found || vertex_count < found->vertex_count) {
if (filter.property_filter->is_symbol_in_value_) {
// Skip filter expressions which use the symbol whose property
// we are looking up. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
// so we cannot scan `n` by property index.
continue;
}
if (are_bound(filter.used_symbols)) {
// Take the property filter which uses bound symbols.
found = LabelPropertyIndex{label, filter, vertex_count};
}
}
}
}
}
return found;
}
// Creates a ScanAll by the best possible index for the `node_symbol`. Best
// index is defined as the index with least number of vertices. If the node
// does not have at least a label, no indexed lookup can be created and
// `nullptr` is returned. The operator is chained after `input`. Optional
// `max_vertex_count` controls, whether no operator should be created if the
// vertex count in the best index exceeds this number. In such a case,
// `nullptr` is returned and `input` is not chained.
std::unique_ptr<ScanAll> GenScanByIndex(
const ScanAll &scan,
const std::experimental::optional<int64_t> &max_vertex_count =
std::experimental::nullopt) {
const auto &input = scan.input();
const auto &node_symbol = scan.output_symbol_;
const auto &graph_view = scan.graph_view_;
const auto labels = filters_.FilteredLabels(node_symbol);
if (labels.empty()) {
// Without labels, we cannot generate any indexed ScanAll.
return nullptr;
}
// First, try to see if we can use label+property index. If not, use just
// the label index (which ought to exist).
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(),
modified_symbols.end());
auto found_index = FindBestLabelPropertyIndex(node_symbol, bound_symbols);
if (found_index &&
// Use label+property index if we satisfy max_vertex_count.
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
// Copy the property filter and then erase it from filters.
const auto prop_filter = *found_index->filter.property_filter;
filter_exprs_for_removal_.insert(found_index->filter.expression);
filters_.EraseFilter(found_index->filter);
std::vector<Expression *> removed_expressions;
filters_.EraseLabelFilter(node_symbol, found_index->label,
&removed_expressions);
filter_exprs_for_removal_.insert(removed_expressions.begin(),
removed_expressions.end());
if (prop_filter.lower_bound_ || prop_filter.upper_bound_) {
return std::make_unique<ScanAllByLabelPropertyRange>(
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
prop_filter.lower_bound_, prop_filter.upper_bound_, graph_view);
} else {
CHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";
return std::make_unique<ScanAllByLabelPropertyValue>(
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
prop_filter.value_, graph_view);
}
}
auto label = FindBestLabelIndex(labels);
if (max_vertex_count &&
db_->VerticesCount(GetLabel(label)) > *max_vertex_count) {
// Don't create an indexed lookup, since we have more labeled vertices
// than the allowed count.
return nullptr;
}
std::vector<Expression *> removed_expressions;
filters_.EraseLabelFilter(node_symbol, label, &removed_expressions);
filter_exprs_for_removal_.insert(removed_expressions.begin(),
removed_expressions.end());
return std::make_unique<ScanAllByLabel>(input, node_symbol, GetLabel(label),
graph_view);
}
};
} // namespace impl
template <class TDbAccessor>
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(
std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table,
TDbAccessor *db) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, db);
root_op->Accept(rewriter);
if (rewriter.new_root_) {
// This shouldn't happen in real use case, because IndexLookupRewriter
// removes Filter operations and they cannot be the root op. In case we
// somehow missed this, raise NotYetImplemented instead of CHECK crashing
// the application.
throw utils::NotYetImplemented("optimizing index lookup");
}
return root_op;
}
} // namespace query::plan

View File

@ -8,14 +8,6 @@
#include "utils/algorithm.hpp"
#include "utils/exceptions.hpp"
#include "utils/flag_validation.hpp"
DEFINE_VALIDATED_HIDDEN_int64(
query_vertex_count_to_expand_existing, 10,
"Maximum count of indexed vertices which provoke "
"indexed lookup and then expand to existing, instead of "
"a regular expand. Default is 10, to turn off use -1.",
FLAG_IN_RANGE(-1, std::numeric_limits<std::int64_t>::max()));
namespace query::plan {

View File

@ -9,8 +9,6 @@
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
DECLARE_int64(query_vertex_count_to_expand_existing);
namespace query::plan {
/// @brief Context which contains variables commonly used during planning.
@ -222,24 +220,16 @@ class RuleBasedPlanner {
private:
TPlanningContext *context_;
struct LabelPropertyIndex {
LabelIx label;
// FilterInfo with PropertyFilter.
FilterInfo filter;
int64_t vertex_count;
};
storage::Label GetLabel(LabelIx label) {
return context_->db->Label(context_->ast_storage->labels_[label.ix]);
return context_->db->Label(label.name);
}
storage::Property GetProperty(PropertyIx prop) {
return context_->db->Property(context_->ast_storage->properties_[prop.ix]);
return context_->db->Property(prop.name);
}
storage::EdgeType GetEdgeType(EdgeTypeIx edge_type) {
return context_->db->EdgeType(
context_->ast_storage->edge_types_[edge_type.ix]);
return context_->db->EdgeType(edge_type.name);
}
std::unique_ptr<LogicalOperator> GenCreate(
@ -378,112 +368,6 @@ class RuleBasedPlanner {
return nullptr;
}
// Finds the label-property combination which has indexed the lowest amount of
// vertices. If the index cannot be found, nullopt is returned.
std::experimental::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(
const Symbol &symbol, const Filters &filters,
const std::unordered_set<Symbol> &bound_symbols) {
auto are_bound = [&bound_symbols](const auto &used_symbols) {
for (const auto &used_symbol : used_symbols) {
if (!utils::Contains(bound_symbols, used_symbol)) {
return false;
}
}
return true;
};
std::experimental::optional<LabelPropertyIndex> found;
for (const auto &label : filters.FilteredLabels(symbol)) {
for (const auto &filter : filters.PropertyFilters(symbol)) {
const auto &property = filter.property_filter->property_;
if (context_->db->LabelPropertyIndexExists(GetLabel(label),
GetProperty(property))) {
int64_t vertex_count = context_->db->VerticesCount(
GetLabel(label), GetProperty(property));
if (!found || vertex_count < found->vertex_count) {
if (filter.property_filter->is_symbol_in_value_) {
// Skip filter expressions which use the symbol whose property
// we are looking up. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
// so we cannot scan `n` by property index.
continue;
}
if (are_bound(filter.used_symbols)) {
// Take the property filter which uses bound symbols.
found = LabelPropertyIndex{label, filter, vertex_count};
}
}
}
}
}
return found;
}
LabelIx FindBestLabelIndex(const std::unordered_set<LabelIx> &labels) {
CHECK(!labels.empty())
<< "Trying to find the best label without any labels.";
return *std::min_element(
labels.begin(), labels.end(),
[this](const auto &label1, const auto &label2) {
return context_->db->VerticesCount(GetLabel(label1)) <
context_->db->VerticesCount(GetLabel(label2));
});
}
// Creates a ScanAll by the best possible index for the `node_symbol`. Best
// index is defined as the index with least number of vertices. If the node
// does not have at least a label, no indexed lookup can be created and
// `nullptr` is returned. The operator is chained after `last_op`. Optional
// `max_vertex_count` controls, whether no operator should be created if the
// vertex count in the best index exceeds this number. In such a case,
// `nullptr` is returned and `last_op` is not chained.
std::unique_ptr<ScanAll> GenScanByIndex(
std::unique_ptr<LogicalOperator> &last_op, const Symbol &node_symbol,
const MatchContext &match_ctx, Filters &filters,
const std::experimental::optional<int64_t> &max_vertex_count =
std::experimental::nullopt) {
const auto labels = filters.FilteredLabels(node_symbol);
if (labels.empty()) {
// Without labels, we cannot generated any indexed ScanAll.
return nullptr;
}
// First, try to see if we can use label+property index. If not, use just
// the label index (which ought to exist).
auto found_index = FindBestLabelPropertyIndex(node_symbol, filters,
match_ctx.bound_symbols);
if (found_index &&
// Use label+property index if we satisfy max_vertex_count.
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
// Copy the property filter and then erase it from filters.
const auto prop_filter = *found_index->filter.property_filter;
filters.EraseFilter(found_index->filter);
filters.EraseLabelFilter(node_symbol, found_index->label);
if (prop_filter.lower_bound_ || prop_filter.upper_bound_) {
return std::make_unique<ScanAllByLabelPropertyRange>(
std::move(last_op), node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
prop_filter.lower_bound_, prop_filter.upper_bound_,
match_ctx.graph_view);
} else {
DCHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";
return std::make_unique<ScanAllByLabelPropertyValue>(
std::move(last_op), node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
prop_filter.value_, match_ctx.graph_view);
}
}
auto label = FindBestLabelIndex(labels);
if (max_vertex_count &&
context_->db->VerticesCount(GetLabel(label)) > *max_vertex_count) {
// Don't create an indexed lookup, since we have more labeled vertices
// than the allowed count.
return nullptr;
}
filters.EraseLabelFilter(node_symbol, label);
return std::make_unique<ScanAllByLabel>(
std::move(last_op), node_symbol, GetLabel(label), match_ctx.graph_view);
}
std::unique_ptr<LogicalOperator> PlanMatching(
MatchContext &match_context, std::unique_ptr<LogicalOperator> input_op) {
auto &bound_symbols = match_context.bound_symbols;
@ -503,16 +387,8 @@ class RuleBasedPlanner {
const auto &node1_symbol = symbol_table.at(*expansion.node1->identifier_);
if (bound_symbols.insert(node1_symbol).second) {
// We have just bound this symbol, so generate ScanAll which fills it.
if (auto indexed_scan =
GenScanByIndex(last_op, node1_symbol, match_context, filters)) {
// First, try to get an indexed scan.
last_op = std::move(indexed_scan);
} else {
// If indexed scan is not possible, we can only generate ScanAll of
// everything.
last_op = std::make_unique<ScanAll>(std::move(last_op), node1_symbol,
match_context.graph_view);
}
last_op = std::make_unique<ScanAll>(std::move(last_op), node1_symbol,
match_context.graph_view);
match_context.new_symbols.emplace_back(node1_symbol);
last_op = impl::GenFilters(std::move(last_op), bound_symbols, filters,
storage);
@ -602,22 +478,6 @@ class RuleBasedPlanner {
expansion.is_flipped, edge->lower_bound_, edge->upper_bound_,
existing_node, filter_lambda, weight_lambda, total_weight);
} else {
if (!existing_node) {
// Try to get better behaviour by creating an indexed scan and then
// expanding into existing, instead of letting the Expand iterate
// over all the edges.
// Currently, just use the maximum vertex count flag, below which we
// want to replace Expand with index ScanAll + Expand into existing.
// It would be better to somehow test whether the input vertex
// degree is larger than the destination vertex index count.
auto indexed_scan =
GenScanByIndex(last_op, node_symbol, match_context, filters,
FLAGS_query_vertex_count_to_expand_existing);
if (indexed_scan) {
last_op = std::move(indexed_scan);
existing_node = true;
}
}
last_op = std::make_unique<Expand>(
std::move(last_op), node1_symbol, node_symbol, edge_symbol,
expansion.direction, edge_types, existing_node,

View File

@ -15,7 +15,7 @@ DEFCOMMAND(ShowDistributed) {
std::stringstream ss(args[0]);
ss >> plan_ix;
if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return;
const auto &plan = plans[plan_ix].first;
const auto &plan = plans[plan_ix].final_plan;
std::atomic<int64_t> plan_id{0};
std::vector<storage::Property> properties_by_ix =
query::NamesToProperties(ast_storage.properties_, &dba);

View File

@ -365,10 +365,9 @@ DEFCOMMAND(Top) {
if (ss.fail() || !ss.eof()) return;
n_plans = std::min(static_cast<int64_t>(plans.size()), n_plans);
for (int64_t i = 0; i < n_plans; ++i) {
auto &plan_pair = plans[i];
std::cout << "---- Plan #" << i << " ---- " << std::endl;
std::cout << "cost: " << plan_pair.second << std::endl;
query::plan::PrettyPrint(dba, plan_pair.first.get());
std::cout << "cost: " << plans[i].cost << std::endl;
query::plan::PrettyPrint(dba, plans[i].final_plan.get());
std::cout << std::endl;
}
}
@ -378,17 +377,29 @@ DEFCOMMAND(Show) {
std::stringstream ss(args[0]);
ss >> plan_ix;
if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return;
const auto &plan = plans[plan_ix].first;
auto cost = plans[plan_ix].second;
const auto &plan = plans[plan_ix].final_plan;
auto cost = plans[plan_ix].cost;
std::cout << "Plan cost: " << cost << std::endl;
query::plan::PrettyPrint(dba, plan.get());
}
DEFCOMMAND(ShowUnoptimized) {
int64_t plan_ix = 0;
std::stringstream ss(args[0]);
ss >> plan_ix;
if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return;
const auto &plan = plans[plan_ix].unoptimized_plan;
query::plan::PrettyPrint(dba, plan.get());
}
DEFCOMMAND(Help);
std::map<std::string, Command> commands = {
{"top", {TopCommand, 1, "Show top N plans"}},
{"show", {ShowCommand, 1, "Show the Nth plan"}},
{"show-unoptimized",
{ShowUnoptimizedCommand, 1,
"Show the Nth plan in its original, unoptimized form"}},
{"help", {HelpCommand, 0, "Show available commands"}},
};
@ -407,11 +418,10 @@ DEFCOMMAND(Help) {
}
}
void ExaminePlans(
database::GraphDbAccessor &dba, const query::SymbolTable &symbol_table,
std::vector<std::pair<std::unique_ptr<query::plan::LogicalOperator>,
double>> &plans,
const query::AstStorage &ast) {
void ExaminePlans(database::GraphDbAccessor &dba,
const query::SymbolTable &symbol_table,
std::vector<InteractivePlan> &plans,
const query::AstStorage &ast) {
while (true) {
auto line = ReadLine("plan? ");
if (!line || *line == "quit") break;
@ -446,31 +456,36 @@ query::Query *MakeAst(const std::string &query, query::AstStorage *storage) {
return visitor.query();
}
// Returns a list of pairs (plan, estimated cost), sorted in the ascending
// order by cost.
// Returns a list of InteractivePlan instances, sorted in the ascending order by
// cost.
auto MakeLogicalPlans(query::CypherQuery *query, query::AstStorage &ast,
query::SymbolTable &symbol_table,
InteractiveDbAccessor *dba) {
auto query_parts = query::plan::CollectQueryParts(symbol_table, ast, query);
std::vector<std::pair<std::unique_ptr<query::plan::LogicalOperator>, double>>
plans_with_cost;
std::vector<InteractivePlan> interactive_plans;
auto ctx = query::plan::MakePlanningContext(&ast, &symbol_table, query, dba);
if (query_parts.query_parts.size() <= 0) {
std::cerr << "Failed to extract query parts" << std::endl;
std::exit(EXIT_FAILURE);
}
query::Parameters parameters;
query::plan::PostProcessor post_process(parameters);
auto plans = query::plan::MakeLogicalPlanForSingleQuery<
query::plan::VariableStartPlanner>(
query_parts.query_parts.at(0).single_query_parts, &ctx);
query::Parameters parameters;
for (auto plan : plans) {
auto cost = query::plan::EstimatePlanCost(dba, parameters, *plan);
plans_with_cost.emplace_back(std::move(plan), cost);
query::AstStorage ast_copy;
auto unoptimized_plan = plan->Clone(&ast_copy);
auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx);
double cost = post_process.EstimatePlanCost(rewritten_plan, dba);
interactive_plans.push_back(
InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy),
std::move(rewritten_plan), cost});
}
std::stable_sort(
plans_with_cost.begin(), plans_with_cost.end(),
[](const auto &a, const auto &b) { return a.second < b.second; });
return plans_with_cost;
interactive_plans.begin(), interactive_plans.end(),
[](const auto &a, const auto &b) { return a.cost < b.cost; });
return interactive_plans;
}
void RunInteractivePlanning(database::GraphDbAccessor *dba) {

View File

@ -13,10 +13,18 @@ namespace database {
class GraphDbAccessor;
}
// Shorthand for a vector of pairs (logical_plan, cost).
typedef std::vector<
std::pair<std::unique_ptr<query::plan::LogicalOperator>, double>>
PlansWithCost;
struct InteractivePlan {
// Original plan after going only through the RuleBasedPlanner.
std::unique_ptr<query::plan::LogicalOperator> unoptimized_plan;
// Storage for the AST used in unoptimized_plan
query::AstStorage ast_storage;
// Final plan after being rewritten and optimized.
std::unique_ptr<query::plan::LogicalOperator> final_plan;
// Cost of the final plan.
double cost;
};
typedef std::vector<InteractivePlan> PlansWithCost;
// Encapsulates a consoles command function.
struct Command {

View File

@ -594,8 +594,12 @@ class CapnpPlanner {
PlanningContext<TDbAccessor> context) {
::capnp::MallocMessageBuilder message;
{
query::Parameters parameters;
PostProcessor post_processor(parameters);
auto original_plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(
single_query_parts, &context);
original_plan =
post_processor.Rewrite(std::move(original_plan), &context);
SavePlan(*original_plan, &message);
}
{
@ -710,7 +714,11 @@ class Planner {
Planner(std::vector<SingleQueryPart> single_query_parts,
PlanningContext<TDbAccessor> context)
: plan_(MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(
single_query_parts, &context)) {}
single_query_parts, &context)) {
query::Parameters parameters;
PostProcessor post_processor(parameters);
plan_ = post_processor.Rewrite(std::move(plan_), &context);
}
auto &plan() { return *plan_; }

View File

@ -44,8 +44,11 @@ class Planner {
template <class TDbAccessor>
Planner(std::vector<SingleQueryPart> single_query_parts,
PlanningContext<TDbAccessor> context) {
query::Parameters parameters;
PostProcessor post_processor(parameters);
plan_ = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(single_query_parts,
&context);
plan_ = post_processor.Rewrite(std::move(plan_), &context);
}
auto &plan() { return *plan_; }