2024-01-06 00:42:54 +08:00
|
|
|
// Copyright 2024 Memgraph Ltd.
|
2021-10-26 14:53:56 +08:00
|
|
|
//
|
|
|
|
// Use of this software is governed by the Business Source License
|
|
|
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
|
|
// License, and you may not use this file except in compliance with the Business Source License.
|
|
|
|
//
|
|
|
|
// As of the Change Date specified in that file, in accordance with
|
|
|
|
// the Business Source License, use of this software will be governed
|
|
|
|
// by the Apache License, Version 2.0, included in the file
|
|
|
|
// licenses/APL.txt.
|
|
|
|
|
2022-01-18 19:51:27 +08:00
|
|
|
#include <benchmark/benchmark.h>
|
2021-09-09 18:39:13 +08:00
|
|
|
#include <string>
|
|
|
|
#include <variant>
|
2017-08-22 22:24:40 +08:00
|
|
|
|
|
|
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
2017-09-07 22:23:59 +08:00
|
|
|
#include "query/plan/cost_estimator.hpp"
|
2017-08-22 22:24:40 +08:00
|
|
|
#include "query/plan/planner.hpp"
|
2017-09-07 22:23:59 +08:00
|
|
|
#include "query/plan/vertex_count_cache.hpp"
|
2023-06-29 17:44:55 +08:00
|
|
|
#include "storage/v2/inmemory/storage.hpp"
|
2017-08-22 22:24:40 +08:00
|
|
|
|
2024-01-06 00:42:54 +08:00
|
|
|
using memgraph::replication::ReplicationRole;
|
|
|
|
|
2017-08-22 22:24:40 +08:00
|
|
|
// Add chained MATCH (node1) -- (node2), MATCH (node2) -- (node3) ... clauses.
|
2022-02-22 20:33:45 +08:00
|
|
|
static memgraph::query::CypherQuery *AddChainedMatches(int num_matches, memgraph::query::AstStorage &storage) {
|
|
|
|
auto *query = storage.Create<memgraph::query::CypherQuery>();
|
2017-08-22 22:24:40 +08:00
|
|
|
for (int i = 0; i < num_matches; ++i) {
|
2022-02-22 20:33:45 +08:00
|
|
|
auto *match = storage.Create<memgraph::query::Match>();
|
|
|
|
auto *pattern = storage.Create<memgraph::query::Pattern>();
|
|
|
|
auto *single_query = storage.Create<memgraph::query::SingleQuery>();
|
|
|
|
pattern->identifier_ = storage.Create<memgraph::query::Identifier>("path");
|
2017-08-22 22:24:40 +08:00
|
|
|
match->patterns_.emplace_back(pattern);
|
|
|
|
std::string node1_name = "node" + std::to_string(i - 1);
|
2021-02-18 22:32:43 +08:00
|
|
|
pattern->atoms_.emplace_back(
|
2022-02-22 20:33:45 +08:00
|
|
|
storage.Create<memgraph::query::NodeAtom>(storage.Create<memgraph::query::Identifier>(node1_name)));
|
|
|
|
pattern->atoms_.emplace_back(storage.Create<memgraph::query::EdgeAtom>(
|
|
|
|
storage.Create<memgraph::query::Identifier>("edge" + std::to_string(i)),
|
|
|
|
memgraph::query::EdgeAtom::Type::SINGLE, memgraph::query::EdgeAtom::Direction::BOTH));
|
|
|
|
pattern->atoms_.emplace_back(storage.Create<memgraph::query::NodeAtom>(
|
|
|
|
storage.Create<memgraph::query::Identifier>("node" + std::to_string(i))));
|
2017-11-29 20:55:02 +08:00
|
|
|
single_query->clauses_.emplace_back(match);
|
2018-10-10 21:19:34 +08:00
|
|
|
query->single_query_ = single_query;
|
2017-08-22 22:24:40 +08:00
|
|
|
}
|
2018-10-10 21:19:34 +08:00
|
|
|
return query;
|
2017-08-22 22:24:40 +08:00
|
|
|
}
|
|
|
|
|
2017-09-07 22:23:59 +08:00
|
|
|
static void BM_PlanChainedMatches(benchmark::State &state) {
|
2023-06-29 17:44:55 +08:00
|
|
|
std::unique_ptr<memgraph::storage::Storage> db(new memgraph::storage::InMemoryStorage());
|
2024-01-06 00:42:54 +08:00
|
|
|
auto storage_dba = db->Access(ReplicationRole::MAIN);
|
2023-06-29 17:44:55 +08:00
|
|
|
memgraph::query::DbAccessor dba(storage_dba.get());
|
2017-08-22 22:24:40 +08:00
|
|
|
while (state.KeepRunning()) {
|
|
|
|
state.PauseTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
memgraph::query::AstStorage storage;
|
2017-08-22 22:24:40 +08:00
|
|
|
int num_matches = state.range(0);
|
2018-10-10 21:19:34 +08:00
|
|
|
auto *query = AddChainedMatches(num_matches, storage);
|
2022-02-22 20:33:45 +08:00
|
|
|
auto symbol_table = memgraph::query::MakeSymbolTable(query);
|
|
|
|
auto ctx = memgraph::query::plan::MakePlanningContext(&storage, &symbol_table, query, &dba);
|
2017-08-22 22:24:40 +08:00
|
|
|
state.ResumeTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
auto query_parts = memgraph::query::plan::CollectQueryParts(symbol_table, storage, query);
|
2017-11-29 20:55:02 +08:00
|
|
|
if (query_parts.query_parts.size() == 0) {
|
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
|
}
|
2022-02-22 20:33:45 +08:00
|
|
|
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
2023-03-31 21:24:02 +08:00
|
|
|
query_parts, &ctx);
|
2017-10-13 15:46:49 +08:00
|
|
|
for (const auto &plan : plans) {
|
|
|
|
// Exhaust through all generated plans, since they are lazily generated.
|
2017-11-29 20:55:02 +08:00
|
|
|
benchmark::DoNotOptimize(plan.get());
|
2017-10-13 15:46:49 +08:00
|
|
|
}
|
2017-08-22 22:24:40 +08:00
|
|
|
}
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
2017-08-22 22:24:40 +08:00
|
|
|
|
2021-02-18 22:32:43 +08:00
|
|
|
BENCHMARK(BM_PlanChainedMatches)->RangeMultiplier(2)->Range(50, 400)->Unit(benchmark::kMillisecond);
|
2017-08-22 22:24:40 +08:00
|
|
|
|
2022-02-22 20:33:45 +08:00
|
|
|
static memgraph::query::CypherQuery *AddIndexedMatches(int num_matches, const std::string &label,
|
|
|
|
const std::string &property,
|
|
|
|
memgraph::query::AstStorage &storage) {
|
|
|
|
auto *query = storage.Create<memgraph::query::CypherQuery>();
|
2017-09-07 22:23:59 +08:00
|
|
|
for (int i = 0; i < num_matches; ++i) {
|
2022-02-22 20:33:45 +08:00
|
|
|
auto *match = storage.Create<memgraph::query::Match>();
|
|
|
|
auto *pattern = storage.Create<memgraph::query::Pattern>();
|
|
|
|
auto *single_query = storage.Create<memgraph::query::SingleQuery>();
|
|
|
|
pattern->identifier_ = storage.Create<memgraph::query::Identifier>("path");
|
2017-09-07 22:23:59 +08:00
|
|
|
match->patterns_.emplace_back(pattern);
|
|
|
|
std::string node1_name = "node" + std::to_string(i - 1);
|
2022-02-22 20:33:45 +08:00
|
|
|
auto *node = storage.Create<memgraph::query::NodeAtom>(storage.Create<memgraph::query::Identifier>(node1_name));
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
node->labels_.emplace_back(storage.GetLabelIx(label));
|
2022-02-22 20:33:45 +08:00
|
|
|
std::get<0>(node->properties_)[storage.GetPropertyIx(property)] =
|
|
|
|
storage.Create<memgraph::query::PrimitiveLiteral>(i);
|
2017-09-07 22:23:59 +08:00
|
|
|
pattern->atoms_.emplace_back(node);
|
2017-11-29 20:55:02 +08:00
|
|
|
single_query->clauses_.emplace_back(match);
|
2018-10-10 21:19:34 +08:00
|
|
|
query->single_query_ = single_query;
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
2018-10-10 21:19:34 +08:00
|
|
|
return query;
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
|
|
|
|
2022-02-22 20:33:45 +08:00
|
|
|
static auto CreateIndexedVertices(int index_count, int vertex_count, memgraph::storage::Storage *db) {
|
2019-11-22 00:24:01 +08:00
|
|
|
auto label = db->NameToLabel("label");
|
|
|
|
auto prop = db->NameToProperty("prop");
|
2023-10-05 22:58:39 +08:00
|
|
|
{
|
2024-01-06 00:42:54 +08:00
|
|
|
auto unique_acc = db->UniqueAccess(ReplicationRole::MAIN);
|
2023-10-05 22:58:39 +08:00
|
|
|
[[maybe_unused]] auto _ = unique_acc->CreateIndex(label, prop);
|
|
|
|
}
|
2024-01-06 00:42:54 +08:00
|
|
|
auto dba = db->Access(ReplicationRole::MAIN);
|
2017-09-07 22:23:59 +08:00
|
|
|
for (int vi = 0; vi < vertex_count; ++vi) {
|
|
|
|
for (int index = 0; index < index_count; ++index) {
|
2023-06-29 17:44:55 +08:00
|
|
|
auto vertex = dba->CreateVertex();
|
2021-01-21 22:47:56 +08:00
|
|
|
MG_ASSERT(vertex.AddLabel(label).HasValue());
|
2022-02-22 20:33:45 +08:00
|
|
|
MG_ASSERT(vertex.SetProperty(prop, memgraph::storage::PropertyValue(index)).HasValue());
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
|
|
|
}
|
2023-06-29 17:44:55 +08:00
|
|
|
MG_ASSERT(!dba->Commit().HasError());
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
return std::make_pair("label", "prop");
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
|
2023-06-29 17:44:55 +08:00
|
|
|
std::unique_ptr<memgraph::storage::Storage> db(new memgraph::storage::InMemoryStorage());
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
std::string label;
|
|
|
|
std::string prop;
|
2017-09-07 22:23:59 +08:00
|
|
|
int index_count = state.range(0);
|
|
|
|
int vertex_count = state.range(1);
|
2023-06-29 17:44:55 +08:00
|
|
|
std::tie(label, prop) = CreateIndexedVertices(index_count, vertex_count, db.get());
|
2024-01-06 00:42:54 +08:00
|
|
|
auto storage_dba = db->Access(ReplicationRole::MAIN);
|
2023-06-29 17:44:55 +08:00
|
|
|
memgraph::query::DbAccessor dba(storage_dba.get());
|
2022-02-22 20:33:45 +08:00
|
|
|
memgraph::query::Parameters parameters;
|
2017-09-07 22:23:59 +08:00
|
|
|
while (state.KeepRunning()) {
|
|
|
|
state.PauseTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
memgraph::query::AstStorage storage;
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
auto *query = AddIndexedMatches(index_count, label, prop, storage);
|
2022-02-22 20:33:45 +08:00
|
|
|
auto symbol_table = memgraph::query::MakeSymbolTable(query);
|
2017-09-07 22:23:59 +08:00
|
|
|
state.ResumeTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
auto ctx = memgraph::query::plan::MakePlanningContext(&storage, &symbol_table, query, &dba);
|
|
|
|
auto query_parts = memgraph::query::plan::CollectQueryParts(symbol_table, storage, query);
|
2017-11-29 20:55:02 +08:00
|
|
|
if (query_parts.query_parts.size() == 0) {
|
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
|
}
|
2022-02-22 20:33:45 +08:00
|
|
|
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
2023-03-31 21:24:02 +08:00
|
|
|
query_parts, &ctx);
|
2017-10-13 15:46:49 +08:00
|
|
|
for (auto plan : plans) {
|
2023-06-28 00:06:20 +08:00
|
|
|
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan);
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-18 22:32:43 +08:00
|
|
|
static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State &state) {
|
2023-06-29 17:44:55 +08:00
|
|
|
std::unique_ptr<memgraph::storage::Storage> db(new memgraph::storage::InMemoryStorage());
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
std::string label;
|
|
|
|
std::string prop;
|
2017-09-07 22:23:59 +08:00
|
|
|
int index_count = state.range(0);
|
|
|
|
int vertex_count = state.range(1);
|
2023-06-29 17:44:55 +08:00
|
|
|
std::tie(label, prop) = CreateIndexedVertices(index_count, vertex_count, db.get());
|
2024-01-06 00:42:54 +08:00
|
|
|
auto storage_dba = db->Access(ReplicationRole::MAIN);
|
2023-06-29 17:44:55 +08:00
|
|
|
memgraph::query::DbAccessor dba(storage_dba.get());
|
2022-02-22 20:33:45 +08:00
|
|
|
auto vertex_counts = memgraph::query::plan::MakeVertexCountCache(&dba);
|
|
|
|
memgraph::query::Parameters parameters;
|
2017-09-07 22:23:59 +08:00
|
|
|
while (state.KeepRunning()) {
|
|
|
|
state.PauseTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
memgraph::query::AstStorage storage;
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
auto *query = AddIndexedMatches(index_count, label, prop, storage);
|
2022-02-22 20:33:45 +08:00
|
|
|
auto symbol_table = memgraph::query::MakeSymbolTable(query);
|
2017-09-07 22:23:59 +08:00
|
|
|
state.ResumeTiming();
|
2022-02-22 20:33:45 +08:00
|
|
|
auto ctx = memgraph::query::plan::MakePlanningContext(&storage, &symbol_table, query, &vertex_counts);
|
|
|
|
auto query_parts = memgraph::query::plan::CollectQueryParts(symbol_table, storage, query);
|
2017-11-29 20:55:02 +08:00
|
|
|
if (query_parts.query_parts.size() == 0) {
|
|
|
|
std::exit(EXIT_FAILURE);
|
|
|
|
}
|
2022-02-22 20:33:45 +08:00
|
|
|
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
2023-03-31 21:24:02 +08:00
|
|
|
query_parts, &ctx);
|
2017-10-13 15:46:49 +08:00
|
|
|
for (auto plan : plans) {
|
2023-06-28 00:06:20 +08:00
|
|
|
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan);
|
2017-09-07 22:23:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(BM_PlanAndEstimateIndexedMatching)
|
|
|
|
->RangeMultiplier(4)
|
|
|
|
->Ranges({{1, 100}, {100, 1000}})
|
|
|
|
->Unit(benchmark::kMicrosecond);
|
|
|
|
|
|
|
|
BENCHMARK(BM_PlanAndEstimateIndexedMatchingWithCachedCounts)
|
|
|
|
->RangeMultiplier(4)
|
|
|
|
->Ranges({{1, 100}, {100, 1000}})
|
|
|
|
->Unit(benchmark::kMicrosecond);
|
|
|
|
|
2017-08-22 22:24:40 +08:00
|
|
|
BENCHMARK_MAIN();
|