Fix index hints (#1606)
This commit is contained in:
parent
b3d0c2ccc2
commit
31f15b3651
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -14,6 +14,7 @@
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/parameters.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/plan/rewrite/index_lookup.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
#include "utils/algorithm.hpp"
|
||||
#include "utils/math.hpp"
|
||||
@ -46,6 +47,11 @@ struct CostEstimation {
|
||||
double cardinality;
|
||||
};
|
||||
|
||||
struct PlanCost {
|
||||
double cost;
|
||||
bool use_index_hints;
|
||||
};
|
||||
|
||||
/**
|
||||
* Query plan execution time cost estimator, for comparing and choosing optimal
|
||||
* execution plans.
|
||||
@ -109,11 +115,13 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||
|
||||
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters)
|
||||
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()} {}
|
||||
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters,
|
||||
const IndexHints &index_hints)
|
||||
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()}, index_hints_(index_hints) {}
|
||||
|
||||
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope)
|
||||
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope} {}
|
||||
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope,
|
||||
const IndexHints &index_hints)
|
||||
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope}, index_hints_(index_hints) {}
|
||||
|
||||
bool PostVisit(ScanAll &) override {
|
||||
cardinality_ *= db_accessor_->VerticesCount();
|
||||
@ -129,7 +137,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
}
|
||||
|
||||
cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_);
|
||||
// ScanAll performs some work for every element that is produced
|
||||
if (index_hints_.HasLabelIndex(db_accessor_, scan_all_by_label.label_)) {
|
||||
use_index_hints_ = true;
|
||||
}
|
||||
|
||||
IncrementCost(CostParam::kScanAllByLabel);
|
||||
return true;
|
||||
}
|
||||
@ -154,6 +165,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
cardinality_ *= factor;
|
||||
|
||||
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
|
||||
use_index_hints_ = true;
|
||||
}
|
||||
|
||||
// ScanAll performs some work for every element that is produced
|
||||
IncrementCost(CostParam::MakeScanAllByLabelPropertyValue);
|
||||
return true;
|
||||
@ -184,6 +199,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
cardinality_ *= factor;
|
||||
|
||||
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
|
||||
use_index_hints_ = true;
|
||||
}
|
||||
|
||||
// ScanAll performs some work for every element that is produced
|
||||
IncrementCost(CostParam::MakeScanAllByLabelPropertyRange);
|
||||
return true;
|
||||
@ -197,6 +216,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
|
||||
cardinality_ *= factor;
|
||||
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
|
||||
use_index_hints_ = true;
|
||||
}
|
||||
|
||||
IncrementCost(CostParam::MakeScanAllByLabelProperty);
|
||||
return true;
|
||||
}
|
||||
@ -375,6 +398,7 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
auto cost() const { return cost_; }
|
||||
auto cardinality() const { return cardinality_; }
|
||||
auto use_index_hints() const { return use_index_hints_; }
|
||||
|
||||
private:
|
||||
// cost estimation that gets accumulated as the visitor
|
||||
@ -390,17 +414,19 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
const SymbolTable &table_;
|
||||
const Parameters ¶meters;
|
||||
std::vector<Scope> scopes_;
|
||||
IndexHints index_hints_;
|
||||
bool use_index_hints_{false};
|
||||
|
||||
void IncrementCost(double param) { cost_ += param * cardinality_; }
|
||||
|
||||
CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch) {
|
||||
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters);
|
||||
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, index_hints_);
|
||||
(*branch)->Accept(cost_estimator);
|
||||
return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()};
|
||||
}
|
||||
|
||||
CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch, Scope scope) {
|
||||
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope);
|
||||
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope, index_hints_);
|
||||
(*branch)->Accept(cost_estimator);
|
||||
return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()};
|
||||
}
|
||||
@ -450,11 +476,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
/** Returns the estimated cost of the given plan. */
|
||||
template <class TDbAccessor>
|
||||
double EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters,
|
||||
LogicalOperator &plan) {
|
||||
CostEstimator<TDbAccessor> estimator(db, table, parameters);
|
||||
PlanCost EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters,
|
||||
LogicalOperator &plan, const IndexHints &index_hints) {
|
||||
CostEstimator<TDbAccessor> estimator(db, table, parameters, index_hints);
|
||||
plan.Accept(estimator);
|
||||
return estimator.cost();
|
||||
return PlanCost{.cost = estimator.cost(), .use_index_hints = estimator.use_index_hints()};
|
||||
}
|
||||
|
||||
} // namespace memgraph::query::plan
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -59,9 +59,9 @@ class PostProcessor final {
|
||||
}
|
||||
|
||||
template <class TVertexCounts>
|
||||
double EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts,
|
||||
const SymbolTable &table) {
|
||||
return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan);
|
||||
PlanCost EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts,
|
||||
const SymbolTable &table) {
|
||||
return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan, index_hints_);
|
||||
}
|
||||
};
|
||||
|
||||
@ -99,6 +99,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
|
||||
auto query_parts = CollectQueryParts(*context->symbol_table, *context->ast_storage, context->query);
|
||||
auto &vertex_counts = *context->db;
|
||||
double total_cost = std::numeric_limits<double>::max();
|
||||
bool curr_uses_index_hint = false;
|
||||
|
||||
using ProcessedPlan = typename TPlanPostProcess::ProcessedPlan;
|
||||
ProcessedPlan plan_with_least_cost;
|
||||
@ -110,16 +111,28 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
|
||||
// Plans are generated lazily and the current plan will disappear, so
|
||||
// it's ok to move it.
|
||||
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
||||
double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
|
||||
if (!curr_plan || cost < total_cost) {
|
||||
auto plan_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
|
||||
// if we have a plan that uses index hints, we reject all the plans that don't use index hinting because we want
|
||||
// to force the plan using the index hints to be executed
|
||||
if (curr_uses_index_hint && !plan_cost.use_index_hints) continue;
|
||||
// if a plan uses index hints, and there is currently not yet a plan that utilizes it, we will take it regardless
|
||||
if (plan_cost.use_index_hints && !curr_uses_index_hint) {
|
||||
curr_uses_index_hint = plan_cost.use_index_hints;
|
||||
curr_plan.emplace(std::move(rewritten_plan));
|
||||
total_cost = cost;
|
||||
total_cost = plan_cost.cost;
|
||||
continue;
|
||||
}
|
||||
// if both plans either use or don't use index hints, we want to use the one with the least cost
|
||||
if (!curr_plan || plan_cost.cost < total_cost) {
|
||||
curr_uses_index_hint = plan_cost.use_index_hints;
|
||||
curr_plan.emplace(std::move(rewritten_plan));
|
||||
total_cost = plan_cost.cost;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(query_parts, context);
|
||||
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
||||
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
|
||||
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table).cost;
|
||||
curr_plan.emplace(std::move(rewritten_plan));
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/plan/preprocess.hpp"
|
||||
#include "storage/v2/id_types.hpp"
|
||||
|
||||
DECLARE_int64(query_vertex_count_to_expand_existing);
|
||||
|
||||
@ -59,6 +60,29 @@ struct IndexHints {
|
||||
}
|
||||
}
|
||||
|
||||
template <class TDbAccessor>
|
||||
bool HasLabelIndex(TDbAccessor *db, storage::LabelId label) const {
|
||||
for (const auto &[index_type, label_hint, _] : label_index_hints_) {
|
||||
auto label_id = db->NameToLabel(label_hint.name);
|
||||
if (label_id == label) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class TDbAccessor>
|
||||
bool HasLabelPropertyIndex(TDbAccessor *db, storage::LabelId label, storage::PropertyId property) const {
|
||||
for (const auto &[index_type, label_hint, property_hint] : label_property_index_hints_) {
|
||||
auto label_id = db->NameToLabel(label_hint.name);
|
||||
auto property_id = db->NameToProperty(property_hint->name);
|
||||
if (label_id == label && property_id == property) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<IndexHint> label_index_hints_{};
|
||||
std::vector<IndexHint> label_property_index_hints_{};
|
||||
};
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/rewrite/index_lookup.hpp"
|
||||
#include "query/plan/vertex_count_cache.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
|
||||
@ -136,7 +137,8 @@ static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
|
||||
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
||||
query_parts, &ctx);
|
||||
for (auto plan : plans) {
|
||||
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan);
|
||||
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan,
|
||||
memgraph::query::plan::IndexHints());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -166,7 +168,8 @@ static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State &
|
||||
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
||||
query_parts, &ctx);
|
||||
for (auto plan : plans) {
|
||||
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan);
|
||||
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan,
|
||||
memgraph::query::plan::IndexHints());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -478,5 +478,44 @@ def test_nonexistent_label_property_index(memgraph):
|
||||
assert False
|
||||
|
||||
|
||||
def test_index_hint_on_expand(memgraph):
|
||||
# Prefer expanding from the node with the given hint even if estimator estimates higher cost for that plan
|
||||
|
||||
memgraph.execute("FOREACH (i IN range(1, 1000) | CREATE (n:Label1 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_without_hint = [
|
||||
" * Produce {n, m}",
|
||||
" * Filter (n :Label1)",
|
||||
" * Expand (m)<-[anon1:rel]-(n)",
|
||||
" * ScanAllByLabel (m :Label2)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n, m}",
|
||||
" * Filter (m :Label2)",
|
||||
" * Expand (n)-[anon1:rel]->(m)",
|
||||
" * ScanAllByLabel (n :Label1)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_without_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;")
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label1 MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_without_hint == expected_explain_without_hint and explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -463,7 +463,7 @@ auto MakeLogicalPlans(memgraph::query::CypherQuery *query, memgraph::query::AstS
|
||||
memgraph::query::AstStorage ast_copy;
|
||||
auto unoptimized_plan = plan->Clone(&ast_copy);
|
||||
auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx);
|
||||
double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table);
|
||||
double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table).cost;
|
||||
interactive_plans.push_back(
|
||||
InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost});
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/plan/rewrite/index_lookup.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "storage/v2/storage.hpp"
|
||||
|
||||
@ -83,7 +84,8 @@ class QueryCostEstimator : public ::testing::Test {
|
||||
}
|
||||
|
||||
auto Cost() {
|
||||
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_);
|
||||
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_,
|
||||
memgraph::query::plan::IndexHints());
|
||||
last_op_->Accept(cost_estimator);
|
||||
return cost_estimator.cost();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user