Use and bench custom allocator in Distinct
Summary: According to the written benchmark, using MonotonicBufferResource yields significant improvements to performance of Distinct. The setup fills the database with vertices depending on the benchmark state. No edges are created. Then we run DISTINCT on that. Since each vertex is unique, we will store everything in the `DistinctCursor::seen_rows_`, which is backed by a MemoryResource. This setup, on my machine, yields 10 times better performance when run with MonotonicBufferResource. Reviewers: mferencevic, mtomic, msantl Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1894
This commit is contained in:
parent
b6ca42176a
commit
394039a05e
@ -3042,11 +3042,23 @@ void Unwind::UnwindCursor::Reset() {
|
||||
input_value_it_ = input_value_.end();
|
||||
}
|
||||
|
||||
struct TypedValueVectorAllocatorEqual {
|
||||
bool operator()(
|
||||
const std::vector<TypedValue, utils::Allocator<TypedValue>> &left,
|
||||
const std::vector<TypedValue, utils::Allocator<TypedValue>> &right)
|
||||
const {
|
||||
return std::equal(left.begin(), left.end(), right.begin(), right.end(),
|
||||
TypedValue::BoolEqual());
|
||||
}
|
||||
};
|
||||
|
||||
class DistinctCursor : public Cursor {
|
||||
public:
|
||||
DistinctCursor(const Distinct &self, database::GraphDbAccessor *db,
|
||||
utils::MemoryResource *mem)
|
||||
: self_(self), input_cursor_(self.input_->MakeCursor(db, mem)) {}
|
||||
: self_(self),
|
||||
input_cursor_(self.input_->MakeCursor(db, mem)),
|
||||
seen_rows_(mem) {}
|
||||
|
||||
bool Pull(Frame &frame, ExecutionContext &context) override {
|
||||
SCOPED_PROFILE_OP("Distinct");
|
||||
@ -3054,7 +3066,8 @@ class DistinctCursor : public Cursor {
|
||||
while (true) {
|
||||
if (!input_cursor_->Pull(frame, context)) return false;
|
||||
|
||||
std::vector<TypedValue> row;
|
||||
std::vector<TypedValue, utils::Allocator<TypedValue>> row(
|
||||
seen_rows_.get_allocator().GetMemoryResource());
|
||||
row.reserve(self_.value_symbols_.size());
|
||||
for (const auto &symbol : self_.value_symbols_)
|
||||
row.emplace_back(frame[symbol]);
|
||||
@ -3073,12 +3086,14 @@ class DistinctCursor : public Cursor {
|
||||
const Distinct &self_;
|
||||
const std::unique_ptr<Cursor> input_cursor_;
|
||||
// a set of already seen rows
|
||||
std::unordered_set<std::vector<TypedValue>,
|
||||
// use FNV collection hashing specialized for a vector of
|
||||
// TypedValues
|
||||
utils::FnvCollection<std::vector<TypedValue>, TypedValue,
|
||||
TypedValue::Hash>,
|
||||
TypedValueVectorEqual>
|
||||
std::unordered_set<
|
||||
std::vector<TypedValue, utils::Allocator<TypedValue>>,
|
||||
// use FNV collection hashing specialized for a vector of TypedValue
|
||||
utils::FnvCollection<
|
||||
std::vector<TypedValue, utils::Allocator<TypedValue>>, TypedValue,
|
||||
TypedValue::Hash>,
|
||||
TypedValueVectorAllocatorEqual,
|
||||
utils::Allocator<std::vector<TypedValue, utils::Allocator<TypedValue>>>>
|
||||
seen_rows_;
|
||||
};
|
||||
|
||||
|
@ -36,6 +36,9 @@ target_link_libraries(${test_prefix}map_concurrent mg-single-node kvstore_dummy_
|
||||
add_benchmark(data_structures/ring_buffer.cpp)
|
||||
target_link_libraries(${test_prefix}ring_buffer mg-single-node kvstore_dummy_lib)
|
||||
|
||||
add_benchmark(query/execution.cpp)
|
||||
target_link_libraries(${test_prefix}execution mg-single-node kvstore_dummy_lib)
|
||||
|
||||
add_benchmark(query/planner.cpp)
|
||||
target_link_libraries(${test_prefix}planner mg-single-node kvstore_dummy_lib)
|
||||
|
||||
|
99
tests/benchmark/query/execution.cpp
Normal file
99
tests/benchmark/query/execution.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
#include <string>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#include "communication/result_stream_faker.hpp"
|
||||
#include "database/graph_db.hpp"
|
||||
#include "database/graph_db_accessor.hpp"
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
#include "query/frontend/semantic/required_privileges.hpp"
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/interpreter.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
|
||||
static void AddVertices(database::GraphDb *db, int vertex_count) {
|
||||
auto dba = db->Access();
|
||||
for (int i = 0; i < vertex_count; i++) dba.InsertVertex();
|
||||
dba.Commit();
|
||||
}
|
||||
|
||||
static query::CypherQuery *ParseCypherQuery(const std::string &query_string,
|
||||
query::AstStorage *ast) {
|
||||
query::frontend::ParsingContext parsing_context;
|
||||
parsing_context.is_query_cached = false;
|
||||
query::frontend::opencypher::Parser parser(query_string);
|
||||
// Convert antlr4 AST into Memgraph AST.
|
||||
query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast);
|
||||
cypher_visitor.visit(parser.tree());
|
||||
query::Interpreter::ParsedQuery parsed_query{
|
||||
cypher_visitor.query(),
|
||||
query::GetRequiredPrivileges(cypher_visitor.query())};
|
||||
return utils::Downcast<query::CypherQuery>(parsed_query.query);
|
||||
};
|
||||
|
||||
// NOLINTNEXTLINE(google-runtime-references)
|
||||
static void DistinctDefaultAllocator(benchmark::State &state) {
|
||||
query::AstStorage ast;
|
||||
query::Parameters parameters;
|
||||
database::GraphDb db;
|
||||
AddVertices(&db, state.range(0));
|
||||
auto dba = db.Access();
|
||||
auto query_string = "MATCH (s) RETURN DISTINCT s";
|
||||
auto *cypher_query = ParseCypherQuery(query_string, &ast);
|
||||
auto symbol_table = query::MakeSymbolTable(cypher_query);
|
||||
auto context =
|
||||
query::plan::MakePlanningContext(&ast, &symbol_table, cypher_query, &dba);
|
||||
auto plan_and_cost =
|
||||
query::plan::MakeLogicalPlan(&context, parameters, false);
|
||||
ResultStreamFaker<query::TypedValue> results;
|
||||
query::Frame frame(symbol_table.max_position());
|
||||
// Nothing should be used from the EvaluationContext, so leave it empty.
|
||||
query::EvaluationContext evaluation_context;
|
||||
while (state.KeepRunning()) {
|
||||
query::ExecutionContext execution_context{&dba, symbol_table,
|
||||
evaluation_context};
|
||||
auto cursor = plan_and_cost.first->MakeCursor(dba);
|
||||
while (cursor->Pull(frame, execution_context))
|
||||
;
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
BENCHMARK(DistinctDefaultAllocator)
|
||||
->Range(1024, 1U << 21U)
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
|
||||
// NOLINTNEXTLINE(google-runtime-references)
|
||||
static void DistinctLinearAllocator(benchmark::State &state) {
|
||||
query::AstStorage ast;
|
||||
query::Parameters parameters;
|
||||
database::GraphDb db;
|
||||
auto dba = db.Access();
|
||||
AddVertices(&db, state.range(0));
|
||||
auto query_string = "MATCH (s) RETURN DISTINCT s";
|
||||
auto *cypher_query = ParseCypherQuery(query_string, &ast);
|
||||
auto symbol_table = query::MakeSymbolTable(cypher_query);
|
||||
auto context =
|
||||
query::plan::MakePlanningContext(&ast, &symbol_table, cypher_query, &dba);
|
||||
auto plan_and_cost =
|
||||
query::plan::MakeLogicalPlan(&context, parameters, false);
|
||||
ResultStreamFaker<query::TypedValue> results;
|
||||
query::Frame frame(symbol_table.max_position());
|
||||
// Nothing should be used from the EvaluationContext, so leave it empty.
|
||||
query::EvaluationContext evaluation_context;
|
||||
while (state.KeepRunning()) {
|
||||
query::ExecutionContext execution_context{&dba, symbol_table,
|
||||
evaluation_context};
|
||||
utils::MonotonicBufferResource memory(1 * 1024 * 1024);
|
||||
auto cursor = plan_and_cost.first->MakeCursor(&dba, &memory);
|
||||
while (cursor->Pull(frame, execution_context))
|
||||
;
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
BENCHMARK(DistinctLinearAllocator)
|
||||
->Range(1024, 1U << 21U)
|
||||
->Unit(benchmark::kMicrosecond);
|
||||
|
||||
BENCHMARK_MAIN();
|
Loading…
Reference in New Issue
Block a user