Fix index hints (#1606)

This commit is contained in:
DavIvek 2024-01-11 10:10:06 +01:00 committed by GitHub
parent b3d0c2ccc2
commit 31f15b3651
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 132 additions and 25 deletions

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -14,6 +14,7 @@
#include "query/frontend/ast/ast.hpp"
#include "query/parameters.hpp"
#include "query/plan/operator.hpp"
#include "query/plan/rewrite/index_lookup.hpp"
#include "query/typed_value.hpp"
#include "utils/algorithm.hpp"
#include "utils/math.hpp"
@ -46,6 +47,11 @@ struct CostEstimation {
double cardinality;
};
struct PlanCost {
double cost;
bool use_index_hints;
};
/**
* Query plan execution time cost estimator, for comparing and choosing optimal
* execution plans.
@ -109,11 +115,13 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
using HierarchicalLogicalOperatorVisitor::PostVisit;
using HierarchicalLogicalOperatorVisitor::PreVisit;
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters &parameters)
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()} {}
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters &parameters,
const IndexHints &index_hints)
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()}, index_hints_(index_hints) {}
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters &parameters, Scope scope)
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope} {}
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters &parameters, Scope scope,
const IndexHints &index_hints)
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope}, index_hints_(index_hints) {}
bool PostVisit(ScanAll &) override {
cardinality_ *= db_accessor_->VerticesCount();
@ -129,7 +137,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
}
cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_);
// ScanAll performs some work for every element that is produced
if (index_hints_.HasLabelIndex(db_accessor_, scan_all_by_label.label_)) {
use_index_hints_ = true;
}
IncrementCost(CostParam::kScanAllByLabel);
return true;
}
@ -154,6 +165,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
cardinality_ *= factor;
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
use_index_hints_ = true;
}
// ScanAll performs some work for every element that is produced
IncrementCost(CostParam::MakeScanAllByLabelPropertyValue);
return true;
@ -184,6 +199,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
cardinality_ *= factor;
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
use_index_hints_ = true;
}
// ScanAll performs some work for every element that is produced
IncrementCost(CostParam::MakeScanAllByLabelPropertyRange);
return true;
@ -197,6 +216,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
cardinality_ *= factor;
if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) {
use_index_hints_ = true;
}
IncrementCost(CostParam::MakeScanAllByLabelProperty);
return true;
}
@ -375,6 +398,7 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
auto cost() const { return cost_; }
auto cardinality() const { return cardinality_; }
auto use_index_hints() const { return use_index_hints_; }
private:
// cost estimation that gets accumulated as the visitor
@ -390,17 +414,19 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
const SymbolTable &table_;
const Parameters &parameters;
std::vector<Scope> scopes_;
IndexHints index_hints_;
bool use_index_hints_{false};
void IncrementCost(double param) { cost_ += param * cardinality_; }
CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch) {
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters);
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, index_hints_);
(*branch)->Accept(cost_estimator);
return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()};
}
CostEstimation EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch, Scope scope) {
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope);
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope, index_hints_);
(*branch)->Accept(cost_estimator);
return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()};
}
@ -450,11 +476,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
/** Returns the estimated cost of the given plan. */
template <class TDbAccessor>
double EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters &parameters,
LogicalOperator &plan) {
CostEstimator<TDbAccessor> estimator(db, table, parameters);
PlanCost EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters &parameters,
LogicalOperator &plan, const IndexHints &index_hints) {
CostEstimator<TDbAccessor> estimator(db, table, parameters, index_hints);
plan.Accept(estimator);
return estimator.cost();
return PlanCost{.cost = estimator.cost(), .use_index_hints = estimator.use_index_hints()};
}
} // namespace memgraph::query::plan

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -59,9 +59,9 @@ class PostProcessor final {
}
template <class TVertexCounts>
double EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts,
const SymbolTable &table) {
return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan);
PlanCost EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts,
const SymbolTable &table) {
return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan, index_hints_);
}
};
@ -99,6 +99,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
auto query_parts = CollectQueryParts(*context->symbol_table, *context->ast_storage, context->query);
auto &vertex_counts = *context->db;
double total_cost = std::numeric_limits<double>::max();
bool curr_uses_index_hint = false;
using ProcessedPlan = typename TPlanPostProcess::ProcessedPlan;
ProcessedPlan plan_with_least_cost;
@ -110,16 +111,28 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
// Plans are generated lazily and the current plan will disappear, so
// it's ok to move it.
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
if (!curr_plan || cost < total_cost) {
auto plan_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
// if we have a plan that uses index hints, we reject all the plans that don't use index hinting because we want
// to force the plan using the index hints to be executed
if (curr_uses_index_hint && !plan_cost.use_index_hints) continue;
// if a plan uses index hints, and there is currently not yet a plan that utilizes it, we will take it regardless
if (plan_cost.use_index_hints && !curr_uses_index_hint) {
curr_uses_index_hint = plan_cost.use_index_hints;
curr_plan.emplace(std::move(rewritten_plan));
total_cost = cost;
total_cost = plan_cost.cost;
continue;
}
// if both plans either use or don't use index hints, we want to use the one with the least cost
if (!curr_plan || plan_cost.cost < total_cost) {
curr_uses_index_hint = plan_cost.use_index_hints;
curr_plan.emplace(std::move(rewritten_plan));
total_cost = plan_cost.cost;
}
}
} else {
auto plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(query_parts, context);
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table).cost;
curr_plan.emplace(std::move(rewritten_plan));
}

View File

@ -28,6 +28,7 @@
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
#include "storage/v2/id_types.hpp"
DECLARE_int64(query_vertex_count_to_expand_existing);
@ -59,6 +60,29 @@ struct IndexHints {
}
}
template <class TDbAccessor>
bool HasLabelIndex(TDbAccessor *db, storage::LabelId label) const {
for (const auto &[index_type, label_hint, _] : label_index_hints_) {
auto label_id = db->NameToLabel(label_hint.name);
if (label_id == label) {
return true;
}
}
return false;
}
template <class TDbAccessor>
bool HasLabelPropertyIndex(TDbAccessor *db, storage::LabelId label, storage::PropertyId property) const {
for (const auto &[index_type, label_hint, property_hint] : label_property_index_hints_) {
auto label_id = db->NameToLabel(label_hint.name);
auto property_id = db->NameToProperty(property_hint->name);
if (label_id == label && property_id == property) {
return true;
}
}
return false;
}
std::vector<IndexHint> label_index_hints_{};
std::vector<IndexHint> label_property_index_hints_{};
};

View File

@ -16,6 +16,7 @@
#include "query/frontend/semantic/symbol_generator.hpp"
#include "query/plan/cost_estimator.hpp"
#include "query/plan/planner.hpp"
#include "query/plan/rewrite/index_lookup.hpp"
#include "query/plan/vertex_count_cache.hpp"
#include "storage/v2/inmemory/storage.hpp"
@ -136,7 +137,8 @@ static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
query_parts, &ctx);
for (auto plan : plans) {
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan);
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan,
memgraph::query::plan::IndexHints());
}
}
}
@ -166,7 +168,8 @@ static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State &
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
query_parts, &ctx);
for (auto plan : plans) {
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan);
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan,
memgraph::query::plan::IndexHints());
}
}
}

View File

@ -478,5 +478,44 @@ def test_nonexistent_label_property_index(memgraph):
assert False
def test_index_hint_on_expand(memgraph):
# Prefer expanding from the node with the given hint even if estimator estimates higher cost for that plan
memgraph.execute("FOREACH (i IN range(1, 1000) | CREATE (n:Label1 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_without_hint = [
" * Produce {n, m}",
" * Filter (n :Label1)",
" * Expand (m)<-[anon1:rel]-(n)",
" * ScanAllByLabel (m :Label2)",
" * Once",
]
expected_explain_with_hint = [
" * Produce {n, m}",
" * Filter (m :Label2)",
" * Expand (n)-[anon1:rel]->(m)",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
explain_without_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;")
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label1 MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;"
)
]
assert explain_without_hint == expected_explain_without_hint and explain_with_hint == expected_explain_with_hint
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -463,7 +463,7 @@ auto MakeLogicalPlans(memgraph::query::CypherQuery *query, memgraph::query::AstS
memgraph::query::AstStorage ast_copy;
auto unoptimized_plan = plan->Clone(&ast_copy);
auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx);
double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table);
double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table).cost;
interactive_plans.push_back(
InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost});
}

View File

@ -17,6 +17,7 @@
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/cost_estimator.hpp"
#include "query/plan/operator.hpp"
#include "query/plan/rewrite/index_lookup.hpp"
#include "storage/v2/inmemory/storage.hpp"
#include "storage/v2/storage.hpp"
@ -83,7 +84,8 @@ class QueryCostEstimator : public ::testing::Test {
}
auto Cost() {
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_);
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_,
memgraph::query::plan::IndexHints());
last_op_->Accept(cost_estimator);
return cost_estimator.cost();
}