2023-03-07 07:28:41 +08:00
|
|
|
// Copyright 2023 Memgraph Ltd.
|
2021-10-26 14:53:56 +08:00
|
|
|
//
|
|
|
|
// Use of this software is governed by the Business Source License
|
|
|
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
|
|
// License, and you may not use this file except in compliance with the Business Source License.
|
|
|
|
//
|
|
|
|
// As of the Change Date specified in that file, in accordance with
|
|
|
|
// the Business Source License, use of this software will be governed
|
|
|
|
// by the Apache License, Version 2.0, included in the file
|
|
|
|
// licenses/APL.txt.
|
|
|
|
|
2017-06-01 18:09:18 +08:00
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <memory>
|
|
|
|
|
2019-11-22 00:24:01 +08:00
|
|
|
#include "query/db_accessor.hpp"
|
2017-06-01 18:09:18 +08:00
|
|
|
#include "query/frontend/ast/ast.hpp"
|
|
|
|
#include "query/frontend/semantic/symbol_table.hpp"
|
|
|
|
#include "query/plan/cost_estimator.hpp"
|
|
|
|
#include "query/plan/operator.hpp"
|
2023-06-29 17:44:55 +08:00
|
|
|
#include "storage/v2/inmemory/storage.hpp"
|
2019-11-22 00:24:01 +08:00
|
|
|
#include "storage/v2/storage.hpp"
|
2017-06-01 18:09:18 +08:00
|
|
|
|
2022-02-22 20:33:45 +08:00
|
|
|
using namespace memgraph::query;
|
|
|
|
using namespace memgraph::query::plan;
|
2017-06-01 18:09:18 +08:00
|
|
|
|
2022-02-22 20:33:45 +08:00
|
|
|
using CardParam = CostEstimator<memgraph::query::DbAccessor>::CardParam;
|
|
|
|
using CostParam = CostEstimator<memgraph::query::DbAccessor>::CostParam;
|
|
|
|
using MiscParam = CostEstimator<memgraph::query::DbAccessor>::MiscParam;
|
2017-06-01 18:09:18 +08:00
|
|
|
|
|
|
|
/** A fixture for cost estimation. Sets up the database
|
|
|
|
* and accessor (adds some vertices). Provides convenience
|
|
|
|
* functions for creating the logical plan. Note that the
|
|
|
|
* resulting plan is NOT fit for execution, only for cost
|
|
|
|
* estimation testing. */
|
|
|
|
class QueryCostEstimator : public ::testing::Test {
|
|
|
|
protected:
|
2023-06-29 17:44:55 +08:00
|
|
|
std::unique_ptr<memgraph::storage::Storage> db = std::make_unique<memgraph::storage::InMemoryStorage>();
|
|
|
|
std::optional<std::unique_ptr<memgraph::storage::Storage::Accessor>> storage_dba;
|
2022-02-22 20:33:45 +08:00
|
|
|
std::optional<memgraph::query::DbAccessor> dba;
|
2023-06-29 17:44:55 +08:00
|
|
|
memgraph::storage::LabelId label = db->NameToLabel("label");
|
|
|
|
memgraph::storage::PropertyId property = db->NameToProperty("property");
|
2017-06-01 18:09:18 +08:00
|
|
|
|
|
|
|
// we incrementally build the logical operator plan
|
|
|
|
// start it off with Once
|
|
|
|
std::shared_ptr<LogicalOperator> last_op_ = std::make_shared<Once>();
|
|
|
|
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage storage_;
|
2017-06-01 18:09:18 +08:00
|
|
|
SymbolTable symbol_table_;
|
2017-09-15 20:52:02 +08:00
|
|
|
Parameters parameters_;
|
2017-06-01 18:09:18 +08:00
|
|
|
int symbol_count = 0;
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
void SetUp() {
|
2023-10-05 22:58:39 +08:00
|
|
|
{
|
|
|
|
auto unique_acc = db->UniqueAccess();
|
|
|
|
ASSERT_FALSE(unique_acc->CreateIndex(label).HasError());
|
|
|
|
ASSERT_FALSE(unique_acc->Commit().HasError());
|
|
|
|
}
|
|
|
|
{
|
|
|
|
auto unique_acc = db->UniqueAccess();
|
|
|
|
ASSERT_FALSE(unique_acc->CreateIndex(label, property).HasError());
|
|
|
|
ASSERT_FALSE(unique_acc->Commit().HasError());
|
|
|
|
}
|
2023-06-29 17:44:55 +08:00
|
|
|
storage_dba.emplace(db->Access());
|
|
|
|
dba.emplace(storage_dba->get());
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2021-02-18 22:32:43 +08:00
|
|
|
Symbol NextSymbol() { return symbol_table_.CreateSymbol("Symbol" + std::to_string(symbol_count++), true); }
|
2017-06-01 18:09:18 +08:00
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
/** Adds the given number of vertices to the DB, of which
|
|
|
|
* the given numbers are labeled and have a property set. */
|
2021-02-18 22:32:43 +08:00
|
|
|
void AddVertices(int vertex_count, int labeled_count, int property_count = 0) {
|
2017-06-01 18:09:18 +08:00
|
|
|
for (int i = 0; i < vertex_count; i++) {
|
2019-11-22 00:24:01 +08:00
|
|
|
auto vertex = dba->InsertVertex();
|
|
|
|
if (i < labeled_count) {
|
|
|
|
ASSERT_TRUE(vertex.AddLabel(label).HasValue());
|
|
|
|
}
|
|
|
|
if (i < property_count) {
|
2022-02-22 20:33:45 +08:00
|
|
|
ASSERT_TRUE(vertex.SetProperty(property, memgraph::storage::PropertyValue(i)).HasValue());
|
2019-11-22 00:24:01 +08:00
|
|
|
}
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
2019-11-22 00:24:01 +08:00
|
|
|
dba->AdvanceCommand();
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
auto Cost() {
|
2023-06-28 00:06:20 +08:00
|
|
|
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_);
|
2017-06-01 18:09:18 +08:00
|
|
|
last_op_->Accept(cost_estimator);
|
|
|
|
return cost_estimator.cost();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TLogicalOperator, typename... TArgs>
|
|
|
|
void MakeOp(TArgs... args) {
|
|
|
|
last_op_ = std::make_shared<TLogicalOperator>(args...);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
|
|
|
|
template <typename TValue>
|
|
|
|
Expression *Literal(TValue value) {
|
|
|
|
return storage_.Create<PrimitiveLiteral>(value);
|
|
|
|
}
|
|
|
|
|
2019-08-28 19:10:27 +08:00
|
|
|
template <typename TValue>
|
|
|
|
Expression *Parameter(TValue value) {
|
2017-09-15 20:52:02 +08:00
|
|
|
int token_position = parameters_.size();
|
2022-02-22 20:33:45 +08:00
|
|
|
parameters_.Add(token_position, memgraph::storage::PropertyValue(value));
|
2017-09-15 20:52:02 +08:00
|
|
|
return storage_.Create<ParameterLookup>(token_position);
|
|
|
|
}
|
|
|
|
|
2022-02-22 20:33:45 +08:00
|
|
|
auto InclusiveBound(Expression *expression) {
|
|
|
|
return std::make_optional(memgraph::utils::MakeBoundInclusive(expression));
|
|
|
|
};
|
2017-08-03 20:18:19 +08:00
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
const std::nullopt_t nullopt = std::nullopt;
|
2017-06-01 18:09:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// multiply with 1 to avoid linker error (possibly fixed in CLang >= 3.81)
|
|
|
|
#define EXPECT_COST(COST) EXPECT_FLOAT_EQ(Cost(), 1 * COST)
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Once) { EXPECT_COST(0); }
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, ScanAll) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-06-01 18:09:18 +08:00
|
|
|
MakeOp<ScanAll>(last_op_, NextSymbol());
|
|
|
|
EXPECT_COST(100 * CostParam::kScanAll);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelCardinality) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-06-01 18:09:18 +08:00
|
|
|
MakeOp<ScanAllByLabel>(last_op_, NextSymbol(), label);
|
|
|
|
EXPECT_COST(30 * CostParam::kScanAllByLabel);
|
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyValueConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ScanAllByLabelPropertyValue>(nullptr, NextSymbol(), label, property, "property", const_val);
|
2017-09-15 20:52:02 +08:00
|
|
|
EXPECT_COST(1 * CostParam::MakeScanAllByLabelPropertyValue);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyValueConstExpr) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ScanAllByLabelPropertyValue>(nullptr, NextSymbol(), label, property, "property",
|
|
|
|
// once we make expression const-folding this test case will fail
|
|
|
|
storage_.Create<UnaryPlusOperator>(const_val));
|
|
|
|
EXPECT_COST(20 * CardParam::kFilter * CostParam::MakeScanAllByLabelPropertyValue);
|
2017-09-15 20:52:02 +08:00
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeUpperConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property, "property", nullopt,
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
InclusiveBound(const_val));
|
2017-09-15 20:52:02 +08:00
|
|
|
// cardinality estimation is exact for very small indexes
|
|
|
|
EXPECT_COST(13 * CostParam::MakeScanAllByLabelPropertyRange);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeLowerConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(17), Parameter(17)}) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property, "property", InclusiveBound(const_val),
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
nullopt);
|
2017-09-15 20:52:02 +08:00
|
|
|
// cardinality estimation is exact for very small indexes
|
|
|
|
EXPECT_COST(3 * CostParam::MakeScanAllByLabelPropertyRange);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeConstExpr) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
2021-02-18 22:32:43 +08:00
|
|
|
auto bound = std::make_optional(
|
2022-02-22 20:33:45 +08:00
|
|
|
memgraph::utils::MakeBoundInclusive(static_cast<Expression *>(storage_.Create<UnaryPlusOperator>(const_val))));
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property, "property", bound, nullopt);
|
|
|
|
EXPECT_COST(20 * CardParam::kFilter * CostParam::MakeScanAllByLabelPropertyRange);
|
2017-09-15 20:52:02 +08:00
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Expand) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<Expand>(last_op_, NextSymbol(), NextSymbol(), NextSymbol(), EdgeAtom::Direction::IN,
|
2022-02-22 20:33:45 +08:00
|
|
|
std::vector<memgraph::storage::EdgeTypeId>{}, false, memgraph::storage::View::OLD);
|
2017-06-01 18:09:18 +08:00
|
|
|
EXPECT_COST(CardParam::kExpand * CostParam::kExpand);
|
|
|
|
}
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
TEST_F(QueryCostEstimator, ExpandVariable) {
|
2021-02-18 22:32:43 +08:00
|
|
|
MakeOp<ExpandVariable>(last_op_, NextSymbol(), NextSymbol(), NextSymbol(), EdgeAtom::Type::DEPTH_FIRST,
|
2022-02-22 20:33:45 +08:00
|
|
|
EdgeAtom::Direction::IN, std::vector<memgraph::storage::EdgeTypeId>{}, false, nullptr, nullptr,
|
|
|
|
false, ExpansionLambda{NextSymbol(), NextSymbol(), nullptr}, std::nullopt, std::nullopt);
|
2017-08-03 20:18:19 +08:00
|
|
|
EXPECT_COST(CardParam::kExpandVariable * CostParam::kExpandVariable);
|
|
|
|
}
|
|
|
|
|
2022-04-11 18:55:34 +08:00
|
|
|
TEST_F(QueryCostEstimator, ForeachListLiteral) {
|
|
|
|
constexpr size_t list_expr_sz = 10;
|
|
|
|
std::shared_ptr<LogicalOperator> create = std::make_shared<CreateNode>(std::make_shared<Once>(), NodeCreationInfo{});
|
|
|
|
MakeOp<memgraph::query::plan::Foreach>(
|
|
|
|
last_op_, create, storage_.Create<ListLiteral>(std::vector<Expression *>(list_expr_sz, nullptr)), NextSymbol());
|
|
|
|
EXPECT_COST(CostParam::kForeach * list_expr_sz);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Foreach) {
|
|
|
|
std::shared_ptr<LogicalOperator> create = std::make_shared<CreateNode>(std::make_shared<Once>(), NodeCreationInfo{});
|
|
|
|
MakeOp<memgraph::query::plan::Foreach>(last_op_, create, storage_.Create<Identifier>(), NextSymbol());
|
|
|
|
EXPECT_COST(CostParam::kForeach * MiscParam::kForeachNoLiteral);
|
|
|
|
}
|
2023-03-31 21:24:02 +08:00
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, SubqueryCartesian) {
|
|
|
|
auto no_vertices = 4;
|
|
|
|
AddVertices(no_vertices, 0, 0);
|
|
|
|
std::shared_ptr<LogicalOperator> input = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
|
|
|
std::shared_ptr<LogicalOperator> subquery = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
|
|
|
MakeOp<memgraph::query::plan::Apply>(input, subquery, true);
|
2023-06-28 00:06:20 +08:00
|
|
|
EXPECT_COST(CostParam::kSubquery * no_vertices * no_vertices + no_vertices);
|
2023-03-31 21:24:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, UnitSubquery) {
|
|
|
|
auto no_vertices = 4;
|
|
|
|
AddVertices(no_vertices, 0, 0);
|
|
|
|
std::shared_ptr<LogicalOperator> input = std::make_shared<Once>();
|
|
|
|
std::shared_ptr<LogicalOperator> subquery = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
|
|
|
MakeOp<memgraph::query::plan::Apply>(input, subquery, true);
|
|
|
|
EXPECT_COST(CostParam::kSubquery * no_vertices);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Union) {
|
|
|
|
auto no_vertices = 4;
|
|
|
|
AddVertices(no_vertices, 0, 0);
|
|
|
|
|
|
|
|
std::vector<Symbol> union_symbols{NextSymbol()};
|
|
|
|
std::shared_ptr<LogicalOperator> left_op = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
|
|
|
std::shared_ptr<LogicalOperator> right_op = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
|
|
|
MakeOp<memgraph::query::plan::Union>(left_op, right_op, union_symbols, left_op->OutputSymbols(symbol_table_),
|
|
|
|
right_op->OutputSymbols(symbol_table_));
|
|
|
|
EXPECT_COST(CostParam::kUnion * (no_vertices + no_vertices));
|
|
|
|
}
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
// Helper for testing an operations cost and cardinality.
|
|
|
|
// Only for operations that first increment cost, then modify cardinality.
|
|
|
|
// Intentially a macro (instead of function) for better test feedback.
|
2017-06-01 18:09:18 +08:00
|
|
|
#define TEST_OP(OP, OP_COST_PARAM, OP_CARD_PARAM) \
|
|
|
|
OP; \
|
|
|
|
EXPECT_COST(OP_COST_PARAM); \
|
|
|
|
OP; \
|
|
|
|
EXPECT_COST(OP_COST_PARAM + OP_CARD_PARAM * OP_COST_PARAM);
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Filter) {
|
2023-03-07 07:28:41 +08:00
|
|
|
TEST_OP(MakeOp<Filter>(last_op_, std::vector<std::shared_ptr<LogicalOperator>>{}, Literal(true)), CostParam::kFilter,
|
|
|
|
CardParam::kFilter);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
2018-11-19 17:58:49 +08:00
|
|
|
TEST_F(QueryCostEstimator, EdgeUniquenessFilter) {
|
2021-02-18 22:32:43 +08:00
|
|
|
TEST_OP(MakeOp<EdgeUniquenessFilter>(last_op_, NextSymbol(), std::vector<Symbol>()), CostParam::kEdgeUniquenessFilter,
|
|
|
|
CardParam::kEdgeUniquenessFilter);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, UnwindLiteral) {
|
2022-02-22 20:33:45 +08:00
|
|
|
TEST_OP(MakeOp<memgraph::query::plan::Unwind>(
|
|
|
|
last_op_, storage_.Create<ListLiteral>(std::vector<Expression *>(7, nullptr)), NextSymbol()),
|
2021-02-18 22:32:43 +08:00
|
|
|
CostParam::kUnwind, 7);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, UnwindNoLiteral) {
|
2022-02-22 20:33:45 +08:00
|
|
|
TEST_OP(MakeOp<memgraph::query::plan::Unwind>(last_op_, nullptr, NextSymbol()), CostParam::kUnwind,
|
2021-02-18 22:32:43 +08:00
|
|
|
MiscParam::kUnwindNoLiteral);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef TEST_OP
|
|
|
|
#undef EXPECT_COST
|
|
|
|
//
|
|
|
|
// TODO test cost when ScanAll, Expand, Accumulate, Limit
|
|
|
|
// vs cost for SA, Expand, Limit
|