From 31f15b36514e81d0bf0386268707bfe62284b2da Mon Sep 17 00:00:00 2001 From: DavIvek Date: Thu, 11 Jan 2024 10:10:06 +0100 Subject: [PATCH] Fix index hints (#1606) --- src/query/plan/cost_estimator.hpp | 50 +++++++++++++++++++------ src/query/plan/planner.hpp | 29 ++++++++++---- src/query/plan/rewrite/index_lookup.hpp | 24 ++++++++++++ tests/benchmark/query/planner.cpp | 7 +++- tests/e2e/index_hints/index_hints.py | 39 +++++++++++++++++++ tests/manual/interactive_planning.cpp | 4 +- tests/unit/query_cost_estimator.cpp | 4 +- 7 files changed, 132 insertions(+), 25 deletions(-) diff --git a/src/query/plan/cost_estimator.hpp b/src/query/plan/cost_estimator.hpp index 47da0a23b..ede4a89fc 100644 --- a/src/query/plan/cost_estimator.hpp +++ b/src/query/plan/cost_estimator.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -14,6 +14,7 @@ #include "query/frontend/ast/ast.hpp" #include "query/parameters.hpp" #include "query/plan/operator.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "query/typed_value.hpp" #include "utils/algorithm.hpp" #include "utils/math.hpp" @@ -46,6 +47,11 @@ struct CostEstimation { double cardinality; }; +struct PlanCost { + double cost; + bool use_index_hints; +}; + /** * Query plan execution time cost estimator, for comparing and choosing optimal * execution plans. @@ -109,11 +115,13 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { using HierarchicalLogicalOperatorVisitor::PostVisit; using HierarchicalLogicalOperatorVisitor::PreVisit; - CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters) - : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()} {} + CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, + const IndexHints &index_hints) + : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()}, index_hints_(index_hints) {} - CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope) - : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope} {} + CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope, + const IndexHints &index_hints) + : db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope}, index_hints_(index_hints) {} bool PostVisit(ScanAll &) override { cardinality_ *= db_accessor_->VerticesCount(); @@ -129,7 +137,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { } cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_); - // ScanAll performs some work for every element that is produced + if (index_hints_.HasLabelIndex(db_accessor_, scan_all_by_label.label_)) { + use_index_hints_ = true; + } + IncrementCost(CostParam::kScanAllByLabel); return true; } @@ -154,6 +165,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + // ScanAll performs some work for every element that is produced IncrementCost(CostParam::MakeScanAllByLabelPropertyValue); return true; @@ -184,6 +199,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + // ScanAll performs some work for every element that is produced IncrementCost(CostParam::MakeScanAllByLabelPropertyRange); return true; @@ -197,6 +216,10 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_); cardinality_ *= factor; + if (index_hints_.HasLabelPropertyIndex(db_accessor_, logical_op.label_, logical_op.property_)) { + use_index_hints_ = true; + } + IncrementCost(CostParam::MakeScanAllByLabelProperty); return true; } @@ -375,6 +398,7 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { auto cost() const { return cost_; } auto cardinality() const { return cardinality_; } + auto use_index_hints() const { return use_index_hints_; } private: // cost estimation that gets accumulated as the visitor @@ -390,17 +414,19 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { const SymbolTable &table_; const Parameters ¶meters; std::vector scopes_; + IndexHints index_hints_; + bool use_index_hints_{false}; void IncrementCost(double param) { cost_ += param * cardinality_; } CostEstimation EstimateCostOnBranch(std::shared_ptr *branch) { - CostEstimator cost_estimator(db_accessor_, table_, parameters); + CostEstimator cost_estimator(db_accessor_, table_, parameters, index_hints_); (*branch)->Accept(cost_estimator); return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()}; } CostEstimation EstimateCostOnBranch(std::shared_ptr *branch, Scope scope) { - CostEstimator cost_estimator(db_accessor_, table_, parameters, scope); + CostEstimator cost_estimator(db_accessor_, table_, parameters, scope, index_hints_); (*branch)->Accept(cost_estimator); return CostEstimation{.cost = cost_estimator.cost(), .cardinality = cost_estimator.cardinality()}; } @@ -450,11 +476,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor { /** Returns the estimated cost of the given plan. */ template -double EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters, - LogicalOperator &plan) { - CostEstimator estimator(db, table, parameters); +PlanCost EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters, + LogicalOperator &plan, const IndexHints &index_hints) { + CostEstimator estimator(db, table, parameters, index_hints); plan.Accept(estimator); - return estimator.cost(); + return PlanCost{.cost = estimator.cost(), .use_index_hints = estimator.use_index_hints()}; } } // namespace memgraph::query::plan diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index 10318e6b9..e8ca80e39 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -59,9 +59,9 @@ class PostProcessor final { } template - double EstimatePlanCost(const std::unique_ptr &plan, TVertexCounts *vertex_counts, - const SymbolTable &table) { - return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan); + PlanCost EstimatePlanCost(const std::unique_ptr &plan, TVertexCounts *vertex_counts, + const SymbolTable &table) { + return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan, index_hints_); } }; @@ -99,6 +99,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process, auto query_parts = CollectQueryParts(*context->symbol_table, *context->ast_storage, context->query); auto &vertex_counts = *context->db; double total_cost = std::numeric_limits::max(); + bool curr_uses_index_hint = false; using ProcessedPlan = typename TPlanPostProcess::ProcessedPlan; ProcessedPlan plan_with_least_cost; @@ -110,16 +111,28 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process, // Plans are generated lazily and the current plan will disappear, so // it's ok to move it. auto rewritten_plan = post_process->Rewrite(std::move(plan), context); - double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); - if (!curr_plan || cost < total_cost) { + auto plan_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); + // if we have a plan that uses index hints, we reject all the plans that don't use index hinting because we want + // to force the plan using the index hints to be executed + if (curr_uses_index_hint && !plan_cost.use_index_hints) continue; + // if a plan uses index hints, and there is currently not yet a plan that utilizes it, we will take it regardless + if (plan_cost.use_index_hints && !curr_uses_index_hint) { + curr_uses_index_hint = plan_cost.use_index_hints; curr_plan.emplace(std::move(rewritten_plan)); - total_cost = cost; + total_cost = plan_cost.cost; + continue; + } + // if both plans either use or don't use index hints, we want to use the one with the least cost + if (!curr_plan || plan_cost.cost < total_cost) { + curr_uses_index_hint = plan_cost.use_index_hints; + curr_plan.emplace(std::move(rewritten_plan)); + total_cost = plan_cost.cost; } } } else { auto plan = MakeLogicalPlanForSingleQuery(query_parts, context); auto rewritten_plan = post_process->Rewrite(std::move(plan), context); - total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table); + total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table).cost; curr_plan.emplace(std::move(rewritten_plan)); } diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 590bad5f4..407c32ba0 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -28,6 +28,7 @@ #include "query/plan/operator.hpp" #include "query/plan/preprocess.hpp" +#include "storage/v2/id_types.hpp" DECLARE_int64(query_vertex_count_to_expand_existing); @@ -59,6 +60,29 @@ struct IndexHints { } } + template + bool HasLabelIndex(TDbAccessor *db, storage::LabelId label) const { + for (const auto &[index_type, label_hint, _] : label_index_hints_) { + auto label_id = db->NameToLabel(label_hint.name); + if (label_id == label) { + return true; + } + } + return false; + } + + template + bool HasLabelPropertyIndex(TDbAccessor *db, storage::LabelId label, storage::PropertyId property) const { + for (const auto &[index_type, label_hint, property_hint] : label_property_index_hints_) { + auto label_id = db->NameToLabel(label_hint.name); + auto property_id = db->NameToProperty(property_hint->name); + if (label_id == label && property_id == property) { + return true; + } + } + return false; + } + std::vector label_index_hints_{}; std::vector label_property_index_hints_{}; }; diff --git a/tests/benchmark/query/planner.cpp b/tests/benchmark/query/planner.cpp index 52cb44490..b64c4c39f 100644 --- a/tests/benchmark/query/planner.cpp +++ b/tests/benchmark/query/planner.cpp @@ -16,6 +16,7 @@ #include "query/frontend/semantic/symbol_generator.hpp" #include "query/plan/cost_estimator.hpp" #include "query/plan/planner.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "query/plan/vertex_count_cache.hpp" #include "storage/v2/inmemory/storage.hpp" @@ -136,7 +137,8 @@ static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) { auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery( query_parts, &ctx); for (auto plan : plans) { - memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan); + memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan, + memgraph::query::plan::IndexHints()); } } } @@ -166,7 +168,8 @@ static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State & auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery( query_parts, &ctx); for (auto plan : plans) { - memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan); + memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan, + memgraph::query::plan::IndexHints()); } } } diff --git a/tests/e2e/index_hints/index_hints.py b/tests/e2e/index_hints/index_hints.py index 70d3ce6b6..b59d60103 100644 --- a/tests/e2e/index_hints/index_hints.py +++ b/tests/e2e/index_hints/index_hints.py @@ -478,5 +478,44 @@ def test_nonexistent_label_property_index(memgraph): assert False +def test_index_hint_on_expand(memgraph): + # Prefer expanding from the node with the given hint even if estimator estimates higher cost for that plan + + memgraph.execute("FOREACH (i IN range(1, 1000) | CREATE (n:Label1 {id: i}));") + memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i}));") + memgraph.execute("CREATE INDEX ON :Label1;") + memgraph.execute("CREATE INDEX ON :Label2;") + + expected_explain_without_hint = [ + " * Produce {n, m}", + " * Filter (n :Label1)", + " * Expand (m)<-[anon1:rel]-(n)", + " * ScanAllByLabel (m :Label2)", + " * Once", + ] + + expected_explain_with_hint = [ + " * Produce {n, m}", + " * Filter (m :Label2)", + " * Expand (n)-[anon1:rel]->(m)", + " * ScanAllByLabel (n :Label1)", + " * Once", + ] + + explain_without_hint = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;") + ] + + explain_with_hint = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch( + "EXPLAIN USING INDEX :Label1 MATCH (n:Label1)-[:rel]->(m:Label2) RETURN n, m;" + ) + ] + + assert explain_without_hint == expected_explain_without_hint and explain_with_hint == expected_explain_with_hint + + if __name__ == "__main__": sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/manual/interactive_planning.cpp b/tests/manual/interactive_planning.cpp index 76ae4cc1a..f550b9724 100644 --- a/tests/manual/interactive_planning.cpp +++ b/tests/manual/interactive_planning.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -463,7 +463,7 @@ auto MakeLogicalPlans(memgraph::query::CypherQuery *query, memgraph::query::AstS memgraph::query::AstStorage ast_copy; auto unoptimized_plan = plan->Clone(&ast_copy); auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx); - double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table); + double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table).cost; interactive_plans.push_back( InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost}); } diff --git a/tests/unit/query_cost_estimator.cpp b/tests/unit/query_cost_estimator.cpp index ff9525cdc..631d17414 100644 --- a/tests/unit/query_cost_estimator.cpp +++ b/tests/unit/query_cost_estimator.cpp @@ -17,6 +17,7 @@ #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/cost_estimator.hpp" #include "query/plan/operator.hpp" +#include "query/plan/rewrite/index_lookup.hpp" #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/storage.hpp" @@ -83,7 +84,8 @@ class QueryCostEstimator : public ::testing::Test { } auto Cost() { - CostEstimator cost_estimator(&*dba, symbol_table_, parameters_); + CostEstimator cost_estimator(&*dba, symbol_table_, parameters_, + memgraph::query::plan::IndexHints()); last_op_->Accept(cost_estimator); return cost_estimator.cost(); }