Add caching VerticesCount during planning and estimation

Summary: Benchmark planning and estimating indexed ScanAll. According to the benchmark, caching speeds up the whole process of planning and estimation by a factor of 2. Most of the performance gain is in the `CostEstimator` itself, due to plenty of calls to `VerticesCount` when estimating all of the generated plans. Reviewers: mislav.bradac, florijan Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D765
2017-09-07 16:23:59 +02:00 · 2017-09-07 16:23:59 +02:00 · 0aee18544d
commit 0aee18544d
parent f0b1f24006
4 changed files with 274 additions and 15 deletions
--- a/src/query/interpreter.hpp
+++ b/src/query/interpreter.hpp
@ -16,6 +16,7 @@
 #include "query/interpret/frame.hpp"
 #include "query/plan/cost_estimator.hpp"
 #include "query/plan/planner.hpp"
+#include "query/plan/vertex_count_cache.hpp"
 #include "threading/sync/spinlock.hpp"
 #include "utils/timer.hpp"

@ -111,15 +112,14 @@ class Interpreter {

    // high level tree -> logical plan
    std::unique_ptr<plan::LogicalOperator> logical_plan;
+    auto vertex_counts = plan::MakeVertexCountCache(db_accessor);
    double query_plan_cost_estimation = 0.0;
    if (FLAGS_query_cost_planner) {
      auto plans = plan::MakeLogicalPlan<plan::VariableStartPlanner>(
-          ast_storage, symbol_table, db_accessor);
+          ast_storage, symbol_table, vertex_counts);
      double min_cost = std::numeric_limits<double>::max();
      for (auto &plan : plans) {
-        plan::CostEstimator<GraphDbAccessor> estimator(db_accessor);
-        plan->Accept(estimator);
-        auto cost = estimator.cost();
+        auto cost = EstimatePlanCost(vertex_counts, *plan);
        if (!logical_plan || cost < min_cost) {
          // We won't be iterating over plans anymore, so it's ok to invalidate
          // unique_ptrs inside.
@ -130,10 +130,9 @@ class Interpreter {
      query_plan_cost_estimation = min_cost;
    } else {
      logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
-          ast_storage, symbol_table, db_accessor);
-      plan::CostEstimator<GraphDbAccessor> cost_estimator(db_accessor);
-      logical_plan->Accept(cost_estimator);
-      query_plan_cost_estimation = cost_estimator.cost();
+          ast_storage, symbol_table, vertex_counts);
+      query_plan_cost_estimation =
+          EstimatePlanCost(vertex_counts, *logical_plan);
    }

    // generate frame based on symbol table max_position
--- a/src/query/plan/cost_estimator.hpp
+++ b/src/query/plan/cost_estimator.hpp
@ -223,4 +223,12 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
  }
 };

+/** Returns the estimated cost of the given plan. */
+template <class TDbAccessor>
+double EstimatePlanCost(TDbAccessor &db, LogicalOperator &plan) {
+  CostEstimator<TDbAccessor> estimator(db);
+  plan.Accept(estimator);
+  return estimator.cost();
+}
+
 }  // namespace query::plan
--- a/src/query/plan/vertex_count_cache.hpp
+++ b/src/query/plan/vertex_count_cache.hpp
@ -0,0 +1,147 @@
+/// @file
+#pragma once
+
+#include "utils/hashing/fnv.hpp"
+
+namespace query::plan {
+
+/// A stand in class for `TDbAccessor` which provides memoized calls to
+/// `VerticesCount`.
+template <class TDbAccessor>
+class VertexCountCache {
+ public:
+  VertexCountCache(const TDbAccessor &db) : db_(db) {}
+
+  int64_t VerticesCount() const {
+    auto non_const_this = const_cast<VertexCountCache *>(this);
+    if (!vertices_count_) non_const_this->vertices_count_ = db_.VerticesCount();
+    return *vertices_count_;
+  }
+
+  int64_t VerticesCount(const GraphDbTypes::Label &label) const {
+    if (label_vertex_count_.find(label) == label_vertex_count_.end()) {
+      // DbAccessor API needs to be const. Since we know that
+      // InteractiveDbAccessor should never be const in this file, we use
+      // const_cast.
+      auto non_const_this = const_cast<VertexCountCache *>(this);
+      non_const_this->label_vertex_count_[label] = db_.VerticesCount(label);
+    }
+    return label_vertex_count_.at(label);
+  }
+
+  int64_t VerticesCount(const GraphDbTypes::Label &label,
+                        const GraphDbTypes::Property &property) const {
+    auto key = std::make_pair(label, property);
+    if (label_property_vertex_count_.find(key) ==
+        label_property_vertex_count_.end()) {
+      auto non_const_this = const_cast<VertexCountCache *>(this);
+      non_const_this->label_property_vertex_count_[key] =
+          db_.VerticesCount(label, property);
+    }
+    return label_property_vertex_count_.at(key);
+  }
+
+  int64_t VerticesCount(const GraphDbTypes::Label &label,
+                        const GraphDbTypes::Property &property,
+                        const PropertyValue &value) const {
+    auto label_prop = std::make_pair(label, property);
+    auto non_const_this = const_cast<VertexCountCache *>(this);
+    auto &value_vertex_count =
+        non_const_this->property_value_vertex_count_[label_prop];
+    if (value_vertex_count.find(value) == value_vertex_count.end()) {
+      value_vertex_count[value] = db_.VerticesCount(label, property, value);
+    }
+    return value_vertex_count.at(value);
+  }
+
+  int64_t VerticesCount(
+      const GraphDbTypes::Label &label, const GraphDbTypes::Property &property,
+      const std::experimental::optional<utils::Bound<PropertyValue>> &lower,
+      const std::experimental::optional<utils::Bound<PropertyValue>> &upper)
+      const {
+    auto label_prop = std::make_pair(label, property);
+    auto non_const_this = const_cast<VertexCountCache *>(this);
+    auto &bounds_vertex_count =
+        non_const_this->property_bounds_vertex_count_[label_prop];
+    BoundsKey bounds = std::make_pair(lower, upper);
+    if (bounds_vertex_count.find(bounds) == bounds_vertex_count.end()) {
+      bounds_vertex_count[bounds] =
+          db_.VerticesCount(label, property, lower, upper);
+    }
+    return bounds_vertex_count.at(bounds);
+  }
+
+  bool LabelPropertyIndexExists(const GraphDbTypes::Label &label,
+                                const GraphDbTypes::Property &property) const {
+    return db_.LabelPropertyIndexExists(label, property);
+  }
+
+ private:
+  typedef std::pair<GraphDbTypes::Label, GraphDbTypes::Property>
+      LabelPropertyKey;
+
+  struct LabelPropertyHash {
+    size_t operator()(const LabelPropertyKey &key) const {
+      return HashCombine<GraphDbTypes::Label, GraphDbTypes::Property>{}(
+          key.first, key.second);
+    }
+  };
+
+  typedef std::pair<std::experimental::optional<utils::Bound<PropertyValue>>,
+                    std::experimental::optional<utils::Bound<PropertyValue>>>
+      BoundsKey;
+
+  struct BoundsHash {
+    size_t operator()(const BoundsKey &key) const {
+      const auto &maybe_lower = key.first;
+      const auto &maybe_upper = key.second;
+      query::TypedValue lower(query::TypedValue::Null);
+      query::TypedValue upper(query::TypedValue::Null);
+      if (maybe_lower) lower = maybe_lower->value();
+      if (maybe_upper) upper = maybe_upper->value();
+      query::TypedValue::Hash hash;
+      return HashCombine<size_t, size_t>{}(hash(lower), hash(upper));
+    }
+  };
+
+  struct BoundsEqual {
+    bool operator()(const BoundsKey &a, const BoundsKey &b) const {
+      auto bound_equal = [](const auto &maybe_bound_a,
+                            const auto &maybe_bound_b) {
+        if (maybe_bound_a && maybe_bound_b &&
+            maybe_bound_a->type() != maybe_bound_b->type())
+          return false;
+        query::TypedValue bound_a(query::TypedValue::Null);
+        query::TypedValue bound_b(query::TypedValue::Null);
+        if (maybe_bound_a) bound_a = maybe_bound_a->value();
+        if (maybe_bound_b) bound_b = maybe_bound_b->value();
+        return query::TypedValue::BoolEqual{}(bound_a, bound_b);
+      };
+      return bound_equal(a.first, b.first) && bound_equal(a.second, b.second);
+    }
+  };
+
+  const TDbAccessor &db_;
+  std::experimental::optional<int64_t> vertices_count_;
+  std::unordered_map<GraphDbTypes::Label, int64_t> label_vertex_count_;
+  std::unordered_map<LabelPropertyKey, int64_t, LabelPropertyHash>
+      label_property_vertex_count_;
+  std::unordered_map<
+      LabelPropertyKey,
+      std::unordered_map<query::TypedValue, int64_t, query::TypedValue::Hash,
+                         query::TypedValue::BoolEqual>,
+      LabelPropertyHash>
+      property_value_vertex_count_;
+  std::unordered_map<
+      LabelPropertyKey,
+      std::unordered_map<BoundsKey, int64_t, BoundsHash, BoundsEqual>,
+      LabelPropertyHash>
+      property_bounds_vertex_count_;
+};
+
+template <class TDbAccessor>
+auto MakeVertexCountCache(const TDbAccessor &db) {
+  return VertexCountCache<TDbAccessor>(db);
+}
+
+}  // namespace plan::query
--- a/tests/benchmark/query/planner.cpp
+++ b/tests/benchmark/query/planner.cpp
@ -4,10 +4,12 @@

 #include "database/dbms.hpp"
 #include "query/frontend/semantic/symbol_generator.hpp"
+#include "query/plan/cost_estimator.hpp"
 #include "query/plan/planner.hpp"
+#include "query/plan/vertex_count_cache.hpp"

 // Add chained MATCH (node1) -- (node2), MATCH (node2) -- (node3) ... clauses.
-static void AddMatches(int num_matches, query::AstTreeStorage &storage) {
+static void AddChainedMatches(int num_matches, query::AstTreeStorage &storage) {
  for (int i = 0; i < num_matches; ++i) {
    auto *match = storage.Create<query::Match>();
    auto *pattern = storage.Create<query::Pattern>();
@ -25,14 +27,14 @@ static void AddMatches(int num_matches, query::AstTreeStorage &storage) {
  }
 }

-static void BM_MakeLogicalPlan(benchmark::State &state) {
+static void BM_PlanChainedMatches(benchmark::State &state) {
+  Dbms dbms;
+  auto dba = dbms.active();
  while (state.KeepRunning()) {
    state.PauseTiming();
-    Dbms dbms;
-    auto dba = dbms.active();
    query::AstTreeStorage storage;
    int num_matches = state.range(0);
-    AddMatches(num_matches, storage);
+    AddChainedMatches(num_matches, storage);
    query::SymbolTable symbol_table;
    query::SymbolGenerator symbol_generator(symbol_table);
    storage.query()->Accept(symbol_generator);
@ -40,11 +42,114 @@ static void BM_MakeLogicalPlan(benchmark::State &state) {
    query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
        storage, symbol_table, *dba);
  }
-};
+}

-BENCHMARK(BM_MakeLogicalPlan)
+BENCHMARK(BM_PlanChainedMatches)
    ->RangeMultiplier(2)
    ->Range(50, 400)
    ->Unit(benchmark::kMillisecond);

+static void AddIndexedMatches(
+    int num_matches, const GraphDbTypes::Label &label,
+    const std::pair<std::string, GraphDbTypes::Property> &property,
+    query::AstTreeStorage &storage) {
+  for (int i = 0; i < num_matches; ++i) {
+    auto *match = storage.Create<query::Match>();
+    auto *pattern = storage.Create<query::Pattern>();
+    pattern->identifier_ = storage.Create<query::Identifier>("path");
+    match->patterns_.emplace_back(pattern);
+    std::string node1_name = "node" + std::to_string(i - 1);
+    auto *node = storage.Create<query::NodeAtom>(
+        storage.Create<query::Identifier>(node1_name));
+    node->labels_.emplace_back(label);
+    node->properties_[property] = storage.Create<query::PrimitiveLiteral>(i);
+    pattern->atoms_.emplace_back(node);
+    storage.query()->clauses_.emplace_back(match);
+  }
+}
+
+static auto CreateIndexedVertices(int index_count, int vertex_count,
+                                  Dbms &dbms) {
+  auto dba = dbms.active();
+  auto label = dba->Label("label");
+  auto prop = dba->Property("prop");
+  dba->BuildIndex(label, prop);
+  dba = dbms.active();
+  for (int vi = 0; vi < vertex_count; ++vi) {
+    for (int index = 0; index < index_count; ++index) {
+      auto vertex = dba->InsertVertex();
+      vertex.add_label(label);
+      vertex.PropsSet(prop, index);
+    }
+  }
+  dba->Commit();
+  return std::make_pair(label, prop);
+}
+
+static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
+  Dbms dbms;
+  GraphDbTypes::Label label;
+  GraphDbTypes::Property prop;
+  int index_count = state.range(0);
+  int vertex_count = state.range(1);
+  std::tie(label, prop) =
+      CreateIndexedVertices(index_count, vertex_count, dbms);
+  auto dba = dbms.active();
+  while (state.KeepRunning()) {
+    state.PauseTiming();
+    query::AstTreeStorage storage;
+    AddIndexedMatches(index_count, label, std::make_pair("prop", prop),
+                      storage);
+    query::SymbolTable symbol_table;
+    query::SymbolGenerator symbol_generator(symbol_table);
+    storage.query()->Accept(symbol_generator);
+    state.ResumeTiming();
+    auto plans =
+        query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
+            storage, symbol_table, *dba);
+    for (auto &plan : plans) {
+      query::plan::EstimatePlanCost(*dba, *plan);
+    }
+  }
+}
+
+static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(
+    benchmark::State &state) {
+  Dbms dbms;
+  GraphDbTypes::Label label;
+  GraphDbTypes::Property prop;
+  int index_count = state.range(0);
+  int vertex_count = state.range(1);
+  std::tie(label, prop) =
+      CreateIndexedVertices(index_count, vertex_count, dbms);
+  auto dba = dbms.active();
+  auto vertex_counts = query::plan::MakeVertexCountCache(*dba);
+  while (state.KeepRunning()) {
+    state.PauseTiming();
+    query::AstTreeStorage storage;
+    AddIndexedMatches(index_count, label, std::make_pair("prop", prop),
+                      storage);
+    query::SymbolTable symbol_table;
+    query::SymbolGenerator symbol_generator(symbol_table);
+    storage.query()->Accept(symbol_generator);
+    state.ResumeTiming();
+    auto plans =
+        query::plan::MakeLogicalPlan<query::plan::VariableStartPlanner>(
+            storage, symbol_table, vertex_counts);
+    for (auto &plan : plans) {
+      query::plan::EstimatePlanCost(vertex_counts, *plan);
+    }
+  }
+}
+
+BENCHMARK(BM_PlanAndEstimateIndexedMatching)
+    ->RangeMultiplier(4)
+    ->Ranges({{1, 100}, {100, 1000}})
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_PlanAndEstimateIndexedMatchingWithCachedCounts)
+    ->RangeMultiplier(4)
+    ->Ranges({{1, 100}, {100, 1000}})
+    ->Unit(benchmark::kMicrosecond);
+
 BENCHMARK_MAIN();