Add caching VerticesCount during planning and estimation
Summary: Benchmark planning and estimating indexed ScanAll. According to the benchmark, caching speeds up the whole process of planning and estimation by a factor of 2. Most of the performance gain is in the `CostEstimator` itself, due to plenty of calls to `VerticesCount` when estimating all of the generated plans. Reviewers: mislav.bradac, florijan Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D765
This commit is contained in:
parent
f0b1f24006
commit
0aee18544d
@ -16,6 +16,7 @@
|
||||
#include "query/interpret/frame.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/vertex_count_cache.hpp"
|
||||
#include "threading/sync/spinlock.hpp"
|
||||
#include "utils/timer.hpp"
|
||||
|
||||
@ -111,15 +112,14 @@ class Interpreter {
|
||||
|
||||
// high level tree -> logical plan
|
||||
std::unique_ptr<plan::LogicalOperator> logical_plan;
|
||||
auto vertex_counts = plan::MakeVertexCountCache(db_accessor);
|
||||
double query_plan_cost_estimation = 0.0;
|
||||
if (FLAGS_query_cost_planner) {
|
||||
auto plans = plan::MakeLogicalPlan<plan::VariableStartPlanner>(
|
||||
ast_storage, symbol_table, db_accessor);
|
||||
ast_storage, symbol_table, vertex_counts);
|
||||
double min_cost = std::numeric_limits<double>::max();
|
||||
for (auto &plan : plans) {
|
||||
plan::CostEstimator<GraphDbAccessor> estimator(db_accessor);
|
||||
plan->Accept(estimator);
|
||||
auto cost = estimator.cost();
|
||||
auto cost = EstimatePlanCost(vertex_counts, *plan);
|
||||
if (!logical_plan || cost < min_cost) {
|
||||
// We won't be iterating over plans anymore, so it's ok to invalidate
|
||||
// unique_ptrs inside.
|
||||
@ -130,10 +130,9 @@ class Interpreter {
|
||||
query_plan_cost_estimation = min_cost;
|
||||
} else {
|
||||
logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
|
||||
ast_storage, symbol_table, db_accessor);
|
||||
plan::CostEstimator<GraphDbAccessor> cost_estimator(db_accessor);
|
||||
logical_plan->Accept(cost_estimator);
|
||||
query_plan_cost_estimation = cost_estimator.cost();
|
||||
ast_storage, symbol_table, vertex_counts);
|
||||
query_plan_cost_estimation =
|
||||
EstimatePlanCost(vertex_counts, *logical_plan);
|
||||
}
|
||||
|
||||
// generate frame based on symbol table max_position
|
||||
|
@ -223,4 +223,12 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
}
|
||||
};
|
||||
|
||||
/** Returns the estimated cost of the given plan. */
|
||||
template <class TDbAccessor>
|
||||
double EstimatePlanCost(TDbAccessor &db, LogicalOperator &plan) {
|
||||
CostEstimator<TDbAccessor> estimator(db);
|
||||
plan.Accept(estimator);
|
||||
return estimator.cost();
|
||||
}
|
||||
|
||||
} // namespace query::plan
|
||||
|
147
src/query/plan/vertex_count_cache.hpp
Normal file
147
src/query/plan/vertex_count_cache.hpp
Normal file
@ -0,0 +1,147 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include "utils/hashing/fnv.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
/// A stand in class for `TDbAccessor` which provides memoized calls to
|
||||
/// `VerticesCount`.
|
||||
template <class TDbAccessor>
|
||||
class VertexCountCache {
|
||||
public:
|
||||
VertexCountCache(const TDbAccessor &db) : db_(db) {}
|
||||
|
||||
int64_t VerticesCount() const {
|
||||
auto non_const_this = const_cast<VertexCountCache *>(this);
|
||||
if (!vertices_count_) non_const_this->vertices_count_ = db_.VerticesCount();
|
||||
return *vertices_count_;
|
||||
}
|
||||
|
||||
int64_t VerticesCount(const GraphDbTypes::Label &label) const {
|
||||
if (label_vertex_count_.find(label) == label_vertex_count_.end()) {
|
||||
// DbAccessor API needs to be const. Since we know that
|
||||
// InteractiveDbAccessor should never be const in this file, we use
|
||||
// const_cast.
|
||||
auto non_const_this = const_cast<VertexCountCache *>(this);
|
||||
non_const_this->label_vertex_count_[label] = db_.VerticesCount(label);
|
||||
}
|
||||
return label_vertex_count_.at(label);
|
||||
}
|
||||
|
||||
int64_t VerticesCount(const GraphDbTypes::Label &label,
|
||||
const GraphDbTypes::Property &property) const {
|
||||
auto key = std::make_pair(label, property);
|
||||
if (label_property_vertex_count_.find(key) ==
|
||||
label_property_vertex_count_.end()) {
|
||||
auto non_const_this = const_cast<VertexCountCache *>(this);
|
||||
non_const_this->label_property_vertex_count_[key] =
|
||||
db_.VerticesCount(label, property);
|
||||
}
|
||||
return label_property_vertex_count_.at(key);
|
||||
}
|
||||
|
||||
int64_t VerticesCount(const GraphDbTypes::Label &label,
|
||||
const GraphDbTypes::Property &property,
|
||||
const PropertyValue &value) const {
|
||||
auto label_prop = std::make_pair(label, property);
|
||||
auto non_const_this = const_cast<VertexCountCache *>(this);
|
||||
auto &value_vertex_count =
|
||||
non_const_this->property_value_vertex_count_[label_prop];
|
||||
if (value_vertex_count.find(value) == value_vertex_count.end()) {
|
||||
value_vertex_count[value] = db_.VerticesCount(label, property, value);
|
||||
}
|
||||
return value_vertex_count.at(value);
|
||||
}
|
||||
|
||||
int64_t VerticesCount(
|
||||
const GraphDbTypes::Label &label, const GraphDbTypes::Property &property,
|
||||
const std::experimental::optional<utils::Bound<PropertyValue>> &lower,
|
||||
const std::experimental::optional<utils::Bound<PropertyValue>> &upper)
|
||||
const {
|
||||
auto label_prop = std::make_pair(label, property);
|
||||
auto non_const_this = const_cast<VertexCountCache *>(this);
|
||||
auto &bounds_vertex_count =
|
||||
non_const_this->property_bounds_vertex_count_[label_prop];
|
||||
BoundsKey bounds = std::make_pair(lower, upper);
|
||||
if (bounds_vertex_count.find(bounds) == bounds_vertex_count.end()) {
|
||||
bounds_vertex_count[bounds] =
|
||||
db_.VerticesCount(label, property, lower, upper);
|
||||
}
|
||||
return bounds_vertex_count.at(bounds);
|
||||
}
|
||||
|
||||
bool LabelPropertyIndexExists(const GraphDbTypes::Label &label,
|
||||
const GraphDbTypes::Property &property) const {
|
||||
return db_.LabelPropertyIndexExists(label, property);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef std::pair<GraphDbTypes::Label, GraphDbTypes::Property>
|
||||
LabelPropertyKey;
|
||||
|
||||
struct LabelPropertyHash {
|
||||
size_t operator()(const LabelPropertyKey &key) const {
|
||||
return HashCombine<GraphDbTypes::Label, GraphDbTypes::Property>{}(
|
||||
key.first, key.second);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::pair<std::experimental::optional<utils::Bound<PropertyValue>>,
|
||||
std::experimental::optional<utils::Bound<PropertyValue>>>
|
||||
BoundsKey;
|
||||
|
||||
struct BoundsHash {
|
||||
size_t operator()(const BoundsKey &key) const {
|
||||
const auto &maybe_lower = key.first;
|
||||
const auto &maybe_upper = key.second;
|
||||
query::TypedValue lower(query::TypedValue::Null);
|
||||
query::TypedValue upper(query::TypedValue::Null);
|
||||
if (maybe_lower) lower = maybe_lower->value();
|
||||
if (maybe_upper) upper = maybe_upper->value();
|
||||
query::TypedValue::Hash hash;
|
||||
return HashCombine<size_t, size_t>{}(hash(lower), hash(upper));
|
||||
}
|
||||
};
|
||||
|
||||
struct BoundsEqual {
|
||||
bool operator()(const BoundsKey &a, const BoundsKey &b) const {
|
||||
auto bound_equal = [](const auto &maybe_bound_a,
|
||||
const auto &maybe_bound_b) {
|
||||
if (maybe_bound_a && maybe_bound_b &&
|
||||
maybe_bound_a->type() != maybe_bound_b->type())
|
||||
return false;
|
||||
query::TypedValue bound_a(query::TypedValue::Null);
|
||||
query::TypedValue bound_b(query::TypedValue::Null);
|
||||
if (maybe_bound_a) bound_a = maybe_bound_a->value();
|
||||
if (maybe_bound_b) bound_b = maybe_bound_b->value();
|
||||
return query::TypedValue::BoolEqual{}(bound_a, bound_b);
|
||||
};
|
||||
return bound_equal(a.first, b.first) && bound_equal(a.second, b.second);
|
||||
}
|
||||
};
|
||||
|
||||
const TDbAccessor &db_;
|
||||
std::experimental::optional<int64_t> vertices_count_;
|
||||
std::unordered_map<GraphDbTypes::Label, int64_t> label_vertex_count_;
|
||||
std::unordered_map<LabelPropertyKey, int64_t, LabelPropertyHash>
|
||||
label_property_vertex_count_;
|
||||
std::unordered_map<
|
||||
LabelPropertyKey,
|
||||
std::unordered_map<query::TypedValue, int64_t, query::TypedValue::Hash,
|
||||
query::TypedValue::BoolEqual>,
|
||||
LabelPropertyHash>
|
||||
property_value_vertex_count_;
|
||||
std::unordered_map<
|
||||
LabelPropertyKey,
|
||||
std::unordered_map<BoundsKey, int64_t, BoundsHash, BoundsEqual>,
|
||||
LabelPropertyHash>
|
||||
property_bounds_vertex_count_;
|
||||
};
|
||||
|
||||
template <class TDbAccessor>
|
||||
auto MakeVertexCountCache(const TDbAccessor &db) {
|
||||
return VertexCountCache<TDbAccessor>(db);
|
||||
}
|
||||
|
||||
} // namespace plan::query
|
@ -4,10 +4,12 @@
|
||||
|
||||
#include "database/dbms.hpp"
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/vertex_count_cache.hpp"
|
||||
|
||||
// Add chained MATCH (node1) -- (node2), MATCH (node2) -- (node3) ... clauses.
|
||||
static void AddMatches(int num_matches, query::AstTreeStorage &storage) {
|
||||
static void AddChainedMatches(int num_matches, query::AstTreeStorage &storage) {
|
||||
for (int i = 0; i < num_matches; ++i) {
|
||||
auto *match = storage.Create<query::Match>();
|
||||
auto *pattern = storage.Create<query::Pattern>();
|
||||
@ -25,14 +27,14 @@ static void AddMatches(int num_matches, query::AstTreeStorage &storage) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_MakeLogicalPlan(benchmark::State &state) {
|
||||
static void BM_PlanChainedMatches(benchmark::State &state) {
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
while (state.KeepRunning()) {
|
||||
state.PauseTiming();
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
query::AstTreeStorage storage;
|
||||
int num_matches = state.range(0);
|
||||
AddMatches(num_matches, storage);
|
||||
AddChainedMatches(num_matches, storage);
|
||||
query::SymbolTable symbol_table;
|
||||
query::SymbolGenerator symbol_generator(symbol_table);
|
||||
storage.query()->Accept(symbol_generator);
|
||||
@ -40,11 +42,114 @@ static void BM_MakeLogicalPlan(benchmark::State &state) {
|
||||
query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
|
||||
storage, symbol_table, *dba);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
BENCHMARK(BM_MakeLogicalPlan)
|
||||
BENCHMARK(BM_PlanChainedMatches)
|
||||
->RangeMultiplier(2)
|
||||
->Range(50, 400)
|
||||
->Unit(benchmark::kMillisecond);
|
||||
|
||||
static void AddIndexedMatches(
|
||||
int num_matches, const GraphDbTypes::Label &label,
|
||||
const std::pair<std::string, GraphDbTypes::Property> &property,
|
||||
query::AstTreeStorage &storage) {
|
||||
for (int i = 0; i < num_matches; ++i) {
|
||||
auto *match = storage.Create<query::Match>();
|
||||
auto *pattern = storage.Create<query::Pattern>();
|
||||
pattern->identifier_ = storage.Create<query::Identifier>("path");
|
||||
match->patterns_.emplace_back(pattern);
|
||||
std::string node1_name = "node" + std::to_string(i - 1);
|
||||
auto *node = storage.Create<query::NodeAtom>(
|
||||
storage.Create<query::Identifier>(node1_name));
|
||||
node->labels_.emplace_back(label);
|
||||
node->properties_[property] = storage.Create<query::PrimitiveLiteral>(i);
|
||||
pattern->atoms_.emplace_back(node);
|
||||
storage.query()->clauses_.emplace_back(match);
|
||||
}
|
||||
}
|
||||
|
||||
static auto CreateIndexedVertices(int index_count, int vertex_count,
|
||||
Dbms &dbms) {
|
||||
auto dba = dbms.active();
|
||||
auto label = dba->Label("label");
|
||||
auto prop = dba->Property("prop");
|
||||
dba->BuildIndex(label, prop);
|
||||
dba = dbms.active();
|
||||
for (int vi = 0; vi < vertex_count; ++vi) {
|
||||
for (int index = 0; index < index_count; ++index) {
|
||||
auto vertex = dba->InsertVertex();
|
||||
vertex.add_label(label);
|
||||
vertex.PropsSet(prop, index);
|
||||
}
|
||||
}
|
||||
dba->Commit();
|
||||
return std::make_pair(label, prop);
|
||||
}
|
||||
|
||||
static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
|
||||
Dbms dbms;
|
||||
GraphDbTypes::Label label;
|
||||
GraphDbTypes::Property prop;
|
||||
int index_count = state.range(0);
|
||||
int vertex_count = state.range(1);
|
||||
std::tie(label, prop) =
|
||||
CreateIndexedVertices(index_count, vertex_count, dbms);
|
||||
auto dba = dbms.active();
|
||||
while (state.KeepRunning()) {
|
||||
state.PauseTiming();
|
||||
query::AstTreeStorage storage;
|
||||
AddIndexedMatches(index_count, label, std::make_pair("prop", prop),
|
||||
storage);
|
||||
query::SymbolTable symbol_table;
|
||||
query::SymbolGenerator symbol_generator(symbol_table);
|
||||
storage.query()->Accept(symbol_generator);
|
||||
state.ResumeTiming();
|
||||
auto plans =
|
||||
query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
|
||||
storage, symbol_table, *dba);
|
||||
for (auto &plan : plans) {
|
||||
query::plan::EstimatePlanCost(*dba, *plan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(
|
||||
benchmark::State &state) {
|
||||
Dbms dbms;
|
||||
GraphDbTypes::Label label;
|
||||
GraphDbTypes::Property prop;
|
||||
int index_count = state.range(0);
|
||||
int vertex_count = state.range(1);
|
||||
std::tie(label, prop) =
|
||||
CreateIndexedVertices(index_count, vertex_count, dbms);
|
||||
auto dba = dbms.active();
|
||||
auto vertex_counts = query::plan::MakeVertexCountCache(*dba);
|
||||
while (state.KeepRunning()) {
|
||||
state.PauseTiming();
|
||||
query::AstTreeStorage storage;
|
||||
AddIndexedMatches(index_count, label, std::make_pair("prop", prop),
|
||||
storage);
|
||||
query::SymbolTable symbol_table;
|
||||
query::SymbolGenerator symbol_generator(symbol_table);
|
||||
storage.query()->Accept(symbol_generator);
|
||||
state.ResumeTiming();
|
||||
auto plans =
|
||||
query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
|
||||
storage, symbol_table, vertex_counts);
|
||||
for (auto &plan : plans) {
|
||||
query::plan::EstimatePlanCost(vertex_counts, *plan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_PlanAndEstimateIndexedMatching)
|
||||
->RangeMultiplier(4)
|
||||
->Ranges({{1, 100}, {100, 1000}})
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
|
||||
BENCHMARK(BM_PlanAndEstimateIndexedMatchingWithCachedCounts)
|
||||
->RangeMultiplier(4)
|
||||
->Ranges({{1, 100}, {100, 1000}})
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
Loading…
Reference in New Issue
Block a user