From 394039a05ec4e2062e184e7c4570e71ebdf1fe89 Mon Sep 17 00:00:00 2001 From: Teon Banek <teon.banek@memgraph.io> Date: Thu, 25 Apr 2019 14:56:12 +0200 Subject: [PATCH] Use and bench custom allocator in Distinct Summary: According to the written benchmark, using MonotonicBufferResource yields significant improvements to performance of Distinct. The setup fills the database with vertices depending on the benchmark state. No edges are created. Then we run DISTINCT on that. Since each vertex is unique, we will store everything in the `DistinctCursor::seen_rows_`, which is backed by a MemoryResource. This setup, on my machine, yields 10 times better performance when run with MonotonicBufferResource. Reviewers: mferencevic, mtomic, msantl Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1894 --- src/query/plan/operator.cpp | 31 ++++++--- tests/benchmark/CMakeLists.txt | 3 + tests/benchmark/query/execution.cpp | 99 +++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 tests/benchmark/query/execution.cpp diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index fb622f3af..31ec91f30 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -3042,11 +3042,23 @@ void Unwind::UnwindCursor::Reset() { input_value_it_ = input_value_.end(); } +struct TypedValueVectorAllocatorEqual { + bool operator()( + const std::vector<TypedValue, utils::Allocator<TypedValue>> &left, + const std::vector<TypedValue, utils::Allocator<TypedValue>> &right) + const { + return std::equal(left.begin(), left.end(), right.begin(), right.end(), + TypedValue::BoolEqual()); + } +}; + class DistinctCursor : public Cursor { public: DistinctCursor(const Distinct &self, database::GraphDbAccessor *db, utils::MemoryResource *mem) - : self_(self), input_cursor_(self.input_->MakeCursor(db, mem)) {} + : self_(self), + input_cursor_(self.input_->MakeCursor(db, mem)), + seen_rows_(mem) {} bool Pull(Frame &frame, ExecutionContext &context) override { SCOPED_PROFILE_OP("Distinct"); @@ -3054,7 +3066,8 @@ class DistinctCursor : public Cursor { while (true) { if (!input_cursor_->Pull(frame, context)) return false; - std::vector<TypedValue> row; + std::vector<TypedValue, utils::Allocator<TypedValue>> row( + seen_rows_.get_allocator().GetMemoryResource()); row.reserve(self_.value_symbols_.size()); for (const auto &symbol : self_.value_symbols_) row.emplace_back(frame[symbol]); @@ -3073,12 +3086,14 @@ class DistinctCursor : public Cursor { const Distinct &self_; const std::unique_ptr<Cursor> input_cursor_; // a set of already seen rows - std::unordered_set<std::vector<TypedValue>, - // use FNV collection hashing specialized for a vector of - // TypedValues - utils::FnvCollection<std::vector<TypedValue>, TypedValue, - TypedValue::Hash>, - TypedValueVectorEqual> + std::unordered_set< + std::vector<TypedValue, utils::Allocator<TypedValue>>, + // use FNV collection hashing specialized for a vector of TypedValue + utils::FnvCollection< + std::vector<TypedValue, utils::Allocator<TypedValue>>, TypedValue, + TypedValue::Hash>, + TypedValueVectorAllocatorEqual, + utils::Allocator<std::vector<TypedValue, utils::Allocator<TypedValue>>>> seen_rows_; }; diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt index a11aa5204..256b8843d 100644 --- a/tests/benchmark/CMakeLists.txt +++ b/tests/benchmark/CMakeLists.txt @@ -36,6 +36,9 @@ target_link_libraries(${test_prefix}map_concurrent mg-single-node kvstore_dummy_ add_benchmark(data_structures/ring_buffer.cpp) target_link_libraries(${test_prefix}ring_buffer mg-single-node kvstore_dummy_lib) +add_benchmark(query/execution.cpp) +target_link_libraries(${test_prefix}execution mg-single-node kvstore_dummy_lib) + add_benchmark(query/planner.cpp) target_link_libraries(${test_prefix}planner mg-single-node kvstore_dummy_lib) diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp new file mode 100644 index 000000000..b0fc61d5b --- /dev/null +++ b/tests/benchmark/query/execution.cpp @@ -0,0 +1,99 @@ +#include <string> + +#include <benchmark/benchmark.h> + +#include "communication/result_stream_faker.hpp" +#include "database/graph_db.hpp" +#include "database/graph_db_accessor.hpp" +#include "query/frontend/opencypher/parser.hpp" +#include "query/frontend/semantic/required_privileges.hpp" +#include "query/frontend/semantic/symbol_generator.hpp" +#include "query/interpreter.hpp" +#include "query/plan/planner.hpp" + +static void AddVertices(database::GraphDb *db, int vertex_count) { + auto dba = db->Access(); + for (int i = 0; i < vertex_count; i++) dba.InsertVertex(); + dba.Commit(); +} + +static query::CypherQuery *ParseCypherQuery(const std::string &query_string, + query::AstStorage *ast) { + query::frontend::ParsingContext parsing_context; + parsing_context.is_query_cached = false; + query::frontend::opencypher::Parser parser(query_string); + // Convert antlr4 AST into Memgraph AST. + query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast); + cypher_visitor.visit(parser.tree()); + query::Interpreter::ParsedQuery parsed_query{ + cypher_visitor.query(), + query::GetRequiredPrivileges(cypher_visitor.query())}; + return utils::Downcast<query::CypherQuery>(parsed_query.query); +}; + +// NOLINTNEXTLINE(google-runtime-references) +static void DistinctDefaultAllocator(benchmark::State &state) { + query::AstStorage ast; + query::Parameters parameters; + database::GraphDb db; + AddVertices(&db, state.range(0)); + auto dba = db.Access(); + auto query_string = "MATCH (s) RETURN DISTINCT s"; + auto *cypher_query = ParseCypherQuery(query_string, &ast); + auto symbol_table = query::MakeSymbolTable(cypher_query); + auto context = + query::plan::MakePlanningContext(&ast, &symbol_table, cypher_query, &dba); + auto plan_and_cost = + query::plan::MakeLogicalPlan(&context, parameters, false); + ResultStreamFaker<query::TypedValue> results; + query::Frame frame(symbol_table.max_position()); + // Nothing should be used from the EvaluationContext, so leave it empty. + query::EvaluationContext evaluation_context; + while (state.KeepRunning()) { + query::ExecutionContext execution_context{&dba, symbol_table, + evaluation_context}; + auto cursor = plan_and_cost.first->MakeCursor(dba); + while (cursor->Pull(frame, execution_context)) + ; + } + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(DistinctDefaultAllocator) + ->Range(1024, 1U << 21U) + ->Unit(benchmark::kMicrosecond); + +// NOLINTNEXTLINE(google-runtime-references) +static void DistinctLinearAllocator(benchmark::State &state) { + query::AstStorage ast; + query::Parameters parameters; + database::GraphDb db; + auto dba = db.Access(); + AddVertices(&db, state.range(0)); + auto query_string = "MATCH (s) RETURN DISTINCT s"; + auto *cypher_query = ParseCypherQuery(query_string, &ast); + auto symbol_table = query::MakeSymbolTable(cypher_query); + auto context = + query::plan::MakePlanningContext(&ast, &symbol_table, cypher_query, &dba); + auto plan_and_cost = + query::plan::MakeLogicalPlan(&context, parameters, false); + ResultStreamFaker<query::TypedValue> results; + query::Frame frame(symbol_table.max_position()); + // Nothing should be used from the EvaluationContext, so leave it empty. + query::EvaluationContext evaluation_context; + while (state.KeepRunning()) { + query::ExecutionContext execution_context{&dba, symbol_table, + evaluation_context}; + utils::MonotonicBufferResource memory(1 * 1024 * 1024); + auto cursor = plan_and_cost.first->MakeCursor(&dba, &memory); + while (cursor->Pull(frame, execution_context)) + ; + } + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(DistinctLinearAllocator) + ->Range(1024, 1U << 21U) + ->Unit(benchmark::kMicrosecond); + +BENCHMARK_MAIN();