269 lines
10 KiB
C++
269 lines
10 KiB
C++
// Copyright 2022 Memgraph Ltd.
|
|
//
|
|
// Use of this software is governed by the Business Source License
|
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
// License, and you may not use this file except in compliance with the Business Source License.
|
|
//
|
|
// As of the Change Date specified in that file, in accordance with
|
|
// the Business Source License, use of this software will be governed
|
|
// by the Apache License, Version 2.0, included in the file
|
|
// licenses/APL.txt.
|
|
|
|
#pragma once
|
|
|
|
#include "query/v2/bindings/typed_value.hpp"
|
|
#include "query/v2/frontend/ast/ast.hpp"
|
|
#include "query/v2/parameters.hpp"
|
|
#include "query/v2/plan/operator.hpp"
|
|
#include "storage/v3/conversions.hpp"
|
|
|
|
namespace memgraph::query::v2::plan {
|
|
|
|
/**
|
|
* Query plan execution time cost estimator, for comparing and choosing optimal
|
|
* execution plans.
|
|
*
|
|
* In Cypher the write part of the query always executes in the same
|
|
* cardinality. It is not allowed to execute a write operation before all the
|
|
* expansion for that query part (WITH splits a query into parts) have executed.
|
|
* For that reason cost estimation comes down to cardinality estimation for the
|
|
* read parts of the query, and their expansion. We want to compare different
|
|
* plans and try to figure out which has the optimal organization of scans,
|
|
* expansions and filters.
|
|
*
|
|
* Note that expansions and filtering can also happen during Merge, which is a
|
|
* write operation. We let that get evaluated like all other cardinality
|
|
* influencing ops. Also, Merge cardinality modification should be contained (it
|
|
* can never reduce it's input cardinality), but since Merge always happens
|
|
* after the read part, and can't be reoredered, we can ignore that.
|
|
*
|
|
* Limiting and accumulating (Aggregate, OrderBy, Accumulate) operations are
|
|
* cardinality modifiers that always execute at the end of the query part. Their
|
|
* cardinality influence is irrelevant because they execute the same
|
|
* for all plans for a single query part, and query part reordering is not
|
|
* allowed.
|
|
*
|
|
* This kind of cost estimation can only be used for comparing logical plans.
|
|
* It's aim is to estimate cost(A) to be less then cost(B) in every case where
|
|
* actual query execution for plan A is less then that of plan B. It can NOT be
|
|
* used to estimate how MUCH execution between A and B will differ.
|
|
*/
|
|
template <class TDbAccessor>
|
|
class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|
public:
|
|
struct CostParam {
|
|
static constexpr double kScanAll{1.0};
|
|
static constexpr double kScanAllByLabel{1.1};
|
|
static constexpr double MakeScanAllByLabelPropertyValue{1.1};
|
|
static constexpr double MakeScanAllByLabelPropertyRange{1.1};
|
|
static constexpr double MakeScanAllByLabelProperty{1.1};
|
|
static constexpr double kExpand{2.0};
|
|
static constexpr double kExpandVariable{3.0};
|
|
static constexpr double kFilter{1.5};
|
|
static constexpr double kEdgeUniquenessFilter{1.5};
|
|
static constexpr double kUnwind{1.3};
|
|
static constexpr double kForeach{1.0};
|
|
};
|
|
|
|
struct CardParam {
|
|
static constexpr double kExpand{3.0};
|
|
static constexpr double kExpandVariable{9.0};
|
|
static constexpr double kFilter{0.25};
|
|
static constexpr double kEdgeUniquenessFilter{0.95};
|
|
};
|
|
|
|
struct MiscParam {
|
|
static constexpr double kUnwindNoLiteral{10.0};
|
|
static constexpr double kForeachNoLiteral{10.0};
|
|
};
|
|
|
|
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
|
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
|
|
|
CostEstimator(TDbAccessor *db_accessor, const Parameters ¶meters)
|
|
: db_accessor_(db_accessor), parameters(parameters) {}
|
|
|
|
bool PostVisit(ScanAll &) override {
|
|
cardinality_ *= db_accessor_->VerticesCount();
|
|
// ScanAll performs some work for every element that is produced
|
|
IncrementCost(CostParam::kScanAll);
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(ScanAllByLabel &scan_all_by_label) override {
|
|
cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_);
|
|
// ScanAll performs some work for every element that is produced
|
|
IncrementCost(CostParam::kScanAllByLabel);
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(ScanAllByLabelPropertyValue &logical_op) override {
|
|
// This cardinality estimation depends on the property value (expression).
|
|
// If it's a constant, we can evaluate cardinality exactly, otherwise
|
|
// we estimate
|
|
auto property_value = ConstPropertyValue(logical_op.expression_);
|
|
double factor = 1.0;
|
|
if (property_value)
|
|
// get the exact influence based on ScanAll(label, property, value)
|
|
factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_, property_value.value());
|
|
else
|
|
// estimate the influence as ScanAll(label, property) * filtering
|
|
factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_) * CardParam::kFilter;
|
|
|
|
cardinality_ *= factor;
|
|
|
|
// ScanAll performs some work for every element that is produced
|
|
IncrementCost(CostParam::MakeScanAllByLabelPropertyValue);
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(ScanAllByLabelPropertyRange &logical_op) override {
|
|
// this cardinality estimation depends on Bound expressions.
|
|
// if they are literals we can evaluate cardinality properly
|
|
auto lower = BoundToPropertyValue(logical_op.lower_bound_);
|
|
auto upper = BoundToPropertyValue(logical_op.upper_bound_);
|
|
|
|
int64_t factor = 1;
|
|
if (upper || lower)
|
|
// if we have either Bound<PropertyValue>, use the value index
|
|
factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_, lower, upper);
|
|
else
|
|
// no values, but we still have the label
|
|
factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
|
|
|
|
// if we failed to take either bound from the op into account, then apply
|
|
// the filtering constant to the factor
|
|
if ((logical_op.upper_bound_ && !upper) || (logical_op.lower_bound_ && !lower)) factor *= CardParam::kFilter;
|
|
|
|
cardinality_ *= factor;
|
|
|
|
// ScanAll performs some work for every element that is produced
|
|
IncrementCost(CostParam::MakeScanAllByLabelPropertyRange);
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(ScanAllByLabelProperty &logical_op) override {
|
|
const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
|
|
cardinality_ *= factor;
|
|
IncrementCost(CostParam::MakeScanAllByLabelProperty);
|
|
return true;
|
|
}
|
|
|
|
// TODO: Cost estimate ScanAllById?
|
|
|
|
// For the given op first increments the cardinality and then cost.
|
|
#define POST_VISIT_CARD_FIRST(NAME) \
|
|
bool PostVisit(NAME &) override { \
|
|
cardinality_ *= CardParam::k##NAME; \
|
|
IncrementCost(CostParam::k##NAME); \
|
|
return true; \
|
|
}
|
|
|
|
POST_VISIT_CARD_FIRST(Expand);
|
|
POST_VISIT_CARD_FIRST(ExpandVariable);
|
|
|
|
#undef POST_VISIT_CARD_FIRST
|
|
|
|
// For the given op first increments the cost and then cardinality.
|
|
#define POST_VISIT_COST_FIRST(LOGICAL_OP, PARAM_NAME) \
|
|
bool PostVisit(LOGICAL_OP &) override { \
|
|
IncrementCost(CostParam::PARAM_NAME); \
|
|
cardinality_ *= CardParam::PARAM_NAME; \
|
|
return true; \
|
|
}
|
|
|
|
POST_VISIT_COST_FIRST(Filter, kFilter)
|
|
POST_VISIT_COST_FIRST(EdgeUniquenessFilter, kEdgeUniquenessFilter);
|
|
|
|
#undef POST_VISIT_COST_FIRST
|
|
|
|
bool PostVisit(Unwind &unwind) override {
|
|
// Unwind cost depends more on the number of lists that get unwound
|
|
// much less on the number of outputs
|
|
// for that reason first increment cost, then modify cardinality
|
|
IncrementCost(CostParam::kUnwind);
|
|
|
|
// try to determine how many values will be yielded by Unwind
|
|
// if the Unwind expression is a list literal, we can deduce cardinality
|
|
// exactly, otherwise we approximate
|
|
double unwind_value;
|
|
if (auto *literal = utils::Downcast<query::v2::ListLiteral>(unwind.input_expression_))
|
|
unwind_value = literal->elements_.size();
|
|
else
|
|
unwind_value = MiscParam::kUnwindNoLiteral;
|
|
|
|
cardinality_ *= unwind_value;
|
|
return true;
|
|
}
|
|
|
|
bool PostVisit(Foreach &foreach) override {
|
|
// Foreach cost depends both on the number elements in the list that get unwound
|
|
// as well as the total clauses that get called for each unwounded element.
|
|
// First estimate cardinality and then increment the cost.
|
|
|
|
double foreach_elements{0};
|
|
if (auto *literal = utils::Downcast<query::v2::ListLiteral>(foreach.expression_)) {
|
|
foreach_elements = literal->elements_.size();
|
|
} else {
|
|
foreach_elements = MiscParam::kForeachNoLiteral;
|
|
}
|
|
|
|
cardinality_ *= foreach_elements;
|
|
IncrementCost(CostParam::kForeach);
|
|
return true;
|
|
}
|
|
|
|
bool Visit(Once &) override { return true; }
|
|
|
|
auto cost() const { return cost_; }
|
|
auto cardinality() const { return cardinality_; }
|
|
|
|
private:
|
|
// cost estimation that gets accumulated as the visitor
|
|
// tours the logical plan
|
|
double cost_{0};
|
|
|
|
// cardinality estimation (how many times an operator gets executed)
|
|
// cardinality is a double to make it easier to work with
|
|
double cardinality_{1};
|
|
|
|
// accessor used for cardinality estimates in ScanAll and ScanAllByLabel
|
|
TDbAccessor *db_accessor_;
|
|
const Parameters ¶meters;
|
|
|
|
void IncrementCost(double param) { cost_ += param * cardinality_; }
|
|
|
|
// converts an optional ScanAll range bound into a property value
|
|
// if the bound is present and is a constant expression convertible to
|
|
// a property value. otherwise returns nullopt
|
|
std::optional<utils::Bound<storage::v3::PropertyValue>> BoundToPropertyValue(
|
|
std::optional<ScanAllByLabelPropertyRange::Bound> bound) {
|
|
if (bound) {
|
|
auto property_value = ConstPropertyValue(bound->value());
|
|
if (property_value) return utils::Bound<storage::v3::PropertyValue>(*property_value, bound->type());
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
// If the expression is a constant property value, it is returned. Otherwise,
|
|
// return nullopt.
|
|
std::optional<storage::v3::PropertyValue> ConstPropertyValue(const Expression *expression) {
|
|
if (auto *literal = utils::Downcast<const PrimitiveLiteral>(expression)) {
|
|
return storage::v3::TypedToPropertyValue(literal->value_);
|
|
} else if (auto *param_lookup = utils::Downcast<const ParameterLookup>(expression)) {
|
|
return parameters.AtTokenPosition(param_lookup->token_position_);
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
};
|
|
|
|
/** Returns the estimated cost of the given plan. */
|
|
template <class TDbAccessor>
|
|
double EstimatePlanCost(TDbAccessor *db, const Parameters ¶meters, LogicalOperator &plan) {
|
|
CostEstimator<TDbAccessor> estimator(db, parameters);
|
|
plan.Accept(estimator);
|
|
return estimator.cost();
|
|
}
|
|
|
|
} // namespace memgraph::query::v2::plan
|