2017-06-01 18:09:18 +08:00
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <memory>
|
|
|
|
|
2018-10-05 18:37:23 +08:00
|
|
|
#include "database/single_node/graph_db.hpp"
|
|
|
|
#include "database/single_node/graph_db_accessor.hpp"
|
2017-06-01 18:09:18 +08:00
|
|
|
#include "query/frontend/ast/ast.hpp"
|
|
|
|
#include "query/frontend/semantic/symbol_table.hpp"
|
|
|
|
#include "query/plan/cost_estimator.hpp"
|
|
|
|
#include "query/plan/operator.hpp"
|
|
|
|
#include "storage/vertex_accessor.hpp"
|
|
|
|
|
|
|
|
using namespace query;
|
|
|
|
using namespace query::plan;
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
using CardParam = CostEstimator<database::GraphDbAccessor>::CardParam;
|
|
|
|
using CostParam = CostEstimator<database::GraphDbAccessor>::CostParam;
|
|
|
|
using MiscParam = CostEstimator<database::GraphDbAccessor>::MiscParam;
|
2017-06-01 18:09:18 +08:00
|
|
|
|
|
|
|
/** A fixture for cost estimation. Sets up the database
|
|
|
|
* and accessor (adds some vertices). Provides convenience
|
|
|
|
* functions for creating the logical plan. Note that the
|
|
|
|
* resulting plan is NOT fit for execution, only for cost
|
|
|
|
* estimation testing. */
|
|
|
|
class QueryCostEstimator : public ::testing::Test {
|
|
|
|
protected:
|
2018-10-09 17:09:10 +08:00
|
|
|
database::GraphDb db;
|
2019-04-15 17:36:43 +08:00
|
|
|
database::GraphDbAccessor dba{db.Access()};
|
|
|
|
storage::Label label = dba.Label("label");
|
|
|
|
storage::Property property = dba.Property("property");
|
2017-06-01 18:09:18 +08:00
|
|
|
|
|
|
|
// we incrementally build the logical operator plan
|
|
|
|
// start it off with Once
|
|
|
|
std::shared_ptr<LogicalOperator> last_op_ = std::make_shared<Once>();
|
|
|
|
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage storage_;
|
2017-06-01 18:09:18 +08:00
|
|
|
SymbolTable symbol_table_;
|
2017-09-15 20:52:02 +08:00
|
|
|
Parameters parameters_;
|
2017-06-01 18:09:18 +08:00
|
|
|
int symbol_count = 0;
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
void SetUp() {
|
|
|
|
// create the index in the current db accessor and then swap it to a new one
|
2019-05-13 23:06:16 +08:00
|
|
|
dba.BuildIndex(label, property);
|
2018-07-26 15:08:21 +08:00
|
|
|
dba = db.Access();
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-06-01 18:09:18 +08:00
|
|
|
Symbol NextSymbol() {
|
|
|
|
return symbol_table_.CreateSymbol("Symbol" + std::to_string(symbol_count++),
|
|
|
|
true);
|
|
|
|
}
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
/** Adds the given number of vertices to the DB, of which
|
|
|
|
* the given numbers are labeled and have a property set. */
|
|
|
|
void AddVertices(int vertex_count, int labeled_count,
|
|
|
|
int property_count = 0) {
|
2017-06-01 18:09:18 +08:00
|
|
|
for (int i = 0; i < vertex_count; i++) {
|
2019-04-15 17:36:43 +08:00
|
|
|
auto vertex = dba.InsertVertex();
|
2017-06-01 18:09:18 +08:00
|
|
|
if (i < labeled_count) vertex.add_label(label);
|
2017-08-03 20:18:19 +08:00
|
|
|
if (i < property_count) vertex.PropsSet(property, i);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
2019-04-15 17:36:43 +08:00
|
|
|
dba.AdvanceCommand();
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
auto Cost() {
|
2019-04-15 17:36:43 +08:00
|
|
|
CostEstimator<database::GraphDbAccessor> cost_estimator(&dba, parameters_);
|
2017-06-01 18:09:18 +08:00
|
|
|
last_op_->Accept(cost_estimator);
|
|
|
|
return cost_estimator.cost();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TLogicalOperator, typename... TArgs>
|
|
|
|
void MakeOp(TArgs... args) {
|
|
|
|
last_op_ = std::make_shared<TLogicalOperator>(args...);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
|
|
|
|
template <typename TValue>
|
|
|
|
Expression *Literal(TValue value) {
|
|
|
|
return storage_.Create<PrimitiveLiteral>(value);
|
|
|
|
}
|
|
|
|
|
Clean-up TypedValue misuse
Summary:
In a bunch of places `TypedValue` was used where `PropertyValue` should be. A lot of times it was only because `TypedValue` serialization code could be reused for `PropertyValue`, only without providing callbacks for `VERTEX`, `EDGE` and `PATH`. So first I wrote separate serialization code for `PropertyValue` and put it into storage folder. Then I fixed all the places where `TypedValue` was incorrectly used instead of `PropertyValue`. I also disabled implicit `TypedValue` to `PropertyValue` conversion in hopes of preventing misuse in the future.
After that, I wrote code for `VertexAccessor` and `EdgeAccessor` serialization and put it into `storage` folder because it was almost duplicated in distributed BFS and pull produce RPC messages. On the sender side, some subset of records (old or new or both) is serialized, and on the reciever side, records are deserialized and immediately put into transaction cache.
Then I rewrote the `TypedValue` serialization functions (`SaveCapnpTypedValue` and `LoadCapnpTypedValue`) to not take callbacks for `VERTEX`, `EDGE` and `PATH`, but use accessor serialization functions instead. That means that any code that wants to use `TypedValue` serialization must hold a reference to `GraphDbAccessor` and `DataManager`, so that should make clients reconsider if they really want to use `TypedValue` instead of `PropertyValue`.
Reviewers: teon.banek, msantl
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1598
2018-09-13 18:12:07 +08:00
|
|
|
Expression *Parameter(const PropertyValue &value) {
|
2017-09-15 20:52:02 +08:00
|
|
|
int token_position = parameters_.size();
|
|
|
|
parameters_.Add(token_position, value);
|
|
|
|
return storage_.Create<ParameterLookup>(token_position);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto InclusiveBound(Expression *expression) {
|
2019-04-23 17:00:49 +08:00
|
|
|
return std::make_optional(utils::MakeBoundInclusive(expression));
|
2017-08-03 20:18:19 +08:00
|
|
|
};
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
const std::nullopt_t nullopt = std::nullopt;
|
2017-06-01 18:09:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// multiply with 1 to avoid linker error (possibly fixed in CLang >= 3.81)
|
|
|
|
#define EXPECT_COST(COST) EXPECT_FLOAT_EQ(Cost(), 1 * COST)
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Once) { EXPECT_COST(0); }
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, ScanAll) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-06-01 18:09:18 +08:00
|
|
|
MakeOp<ScanAll>(last_op_, NextSymbol());
|
|
|
|
EXPECT_COST(100 * CostParam::kScanAll);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelCardinality) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-06-01 18:09:18 +08:00
|
|
|
MakeOp<ScanAllByLabel>(last_op_, NextSymbol(), label);
|
|
|
|
EXPECT_COST(30 * CostParam::kScanAllByLabel);
|
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyValueConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
|
|
|
MakeOp<ScanAllByLabelPropertyValue>(nullptr, NextSymbol(), label, property,
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
"property", const_val);
|
2017-09-15 20:52:02 +08:00
|
|
|
EXPECT_COST(1 * CostParam::MakeScanAllByLabelPropertyValue);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyValueConstExpr) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
|
|
|
MakeOp<ScanAllByLabelPropertyValue>(
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
nullptr, NextSymbol(), label, property, "property",
|
2017-09-15 20:52:02 +08:00
|
|
|
// once we make expression const-folding this test case will fail
|
|
|
|
storage_.Create<UnaryPlusOperator>(const_val));
|
|
|
|
EXPECT_COST(20 * CardParam::kFilter *
|
|
|
|
CostParam::MakeScanAllByLabelPropertyValue);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeUpperConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property,
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
"property", nullopt,
|
|
|
|
InclusiveBound(const_val));
|
2017-09-15 20:52:02 +08:00
|
|
|
// cardinality estimation is exact for very small indexes
|
|
|
|
EXPECT_COST(13 * CostParam::MakeScanAllByLabelPropertyRange);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeLowerConstant) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(17), Parameter(17)}) {
|
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property,
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
"property", InclusiveBound(const_val),
|
|
|
|
nullopt);
|
2017-09-15 20:52:02 +08:00
|
|
|
// cardinality estimation is exact for very small indexes
|
|
|
|
EXPECT_COST(3 * CostParam::MakeScanAllByLabelPropertyRange);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
2017-09-15 20:52:02 +08:00
|
|
|
TEST_F(QueryCostEstimator, ScanAllByLabelPropertyRangeConstExpr) {
|
2017-08-03 20:18:19 +08:00
|
|
|
AddVertices(100, 30, 20);
|
2017-09-15 20:52:02 +08:00
|
|
|
for (auto const_val : {Literal(12), Parameter(12)}) {
|
2019-04-23 17:00:49 +08:00
|
|
|
auto bound =
|
|
|
|
std::make_optional(utils::MakeBoundInclusive(static_cast<Expression *>(
|
2017-09-15 20:52:02 +08:00
|
|
|
storage_.Create<UnaryPlusOperator>(const_val))));
|
|
|
|
MakeOp<ScanAllByLabelPropertyRange>(nullptr, NextSymbol(), label, property,
|
Remove GraphDbAccessor and storage types from Ast
Summary:
This diff removes the need for a database when parsing a query and
creating an Ast. Instead of storing storage::{Label,Property,EdgeType}
in Ast nodes, we store the name and an index into all of the names. This
allows for easy creation of a map from {Label,Property,EdgeType} index
into the concrete storage type. Obviously, this comes with a performance
penalty during execution, but it should be minor. The upside is that the
query/frontend minimally depends on storage (PropertyValue), which makes
writing tests easier as well as running them a lot faster (there is no
database setup). This is most noticeable in the ast_serialization test
which took a long time due to start up of a distributed database.
Reviewers: mtomic, llugovic
Reviewed By: mtomic
Subscribers: mferencevic, pullbot
Differential Revision: https://phabricator.memgraph.io/D1774
2019-01-14 21:41:37 +08:00
|
|
|
"property", bound, nullopt);
|
2017-09-15 20:52:02 +08:00
|
|
|
EXPECT_COST(20 * CardParam::kFilter *
|
|
|
|
CostParam::MakeScanAllByLabelPropertyRange);
|
|
|
|
}
|
2017-08-03 20:18:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Expand) {
|
2018-10-29 21:26:47 +08:00
|
|
|
MakeOp<Expand>(last_op_, NextSymbol(), NextSymbol(), NextSymbol(),
|
|
|
|
EdgeAtom::Direction::IN, std::vector<storage::EdgeType>{},
|
2018-05-15 19:10:15 +08:00
|
|
|
false, GraphView::OLD);
|
2017-06-01 18:09:18 +08:00
|
|
|
EXPECT_COST(CardParam::kExpand * CostParam::kExpand);
|
|
|
|
}
|
|
|
|
|
2017-08-03 20:18:19 +08:00
|
|
|
TEST_F(QueryCostEstimator, ExpandVariable) {
|
2019-04-23 17:00:49 +08:00
|
|
|
MakeOp<ExpandVariable>(last_op_, NextSymbol(), NextSymbol(), NextSymbol(),
|
|
|
|
EdgeAtom::Type::DEPTH_FIRST, EdgeAtom::Direction::IN,
|
|
|
|
std::vector<storage::EdgeType>{}, false, nullptr,
|
|
|
|
nullptr, false,
|
|
|
|
ExpansionLambda{NextSymbol(), NextSymbol(), nullptr},
|
|
|
|
std::nullopt, std::nullopt);
|
2017-08-03 20:18:19 +08:00
|
|
|
EXPECT_COST(CardParam::kExpandVariable * CostParam::kExpandVariable);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helper for testing an operations cost and cardinality.
|
|
|
|
// Only for operations that first increment cost, then modify cardinality.
|
|
|
|
// Intentially a macro (instead of function) for better test feedback.
|
2017-06-01 18:09:18 +08:00
|
|
|
#define TEST_OP(OP, OP_COST_PARAM, OP_CARD_PARAM) \
|
|
|
|
OP; \
|
|
|
|
EXPECT_COST(OP_COST_PARAM); \
|
|
|
|
OP; \
|
|
|
|
EXPECT_COST(OP_COST_PARAM + OP_CARD_PARAM * OP_COST_PARAM);
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, Filter) {
|
2017-08-03 20:18:19 +08:00
|
|
|
TEST_OP(MakeOp<Filter>(last_op_, Literal(true)), CostParam::kFilter,
|
|
|
|
CardParam::kFilter);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
2018-11-19 17:58:49 +08:00
|
|
|
TEST_F(QueryCostEstimator, EdgeUniquenessFilter) {
|
|
|
|
TEST_OP(MakeOp<EdgeUniquenessFilter>(last_op_, NextSymbol(),
|
|
|
|
std::vector<Symbol>()),
|
|
|
|
CostParam::kEdgeUniquenessFilter,
|
|
|
|
CardParam::kEdgeUniquenessFilter);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, UnwindLiteral) {
|
2018-05-15 19:10:15 +08:00
|
|
|
TEST_OP(
|
|
|
|
MakeOp<query::plan::Unwind>(
|
|
|
|
last_op_,
|
|
|
|
storage_.Create<ListLiteral>(std::vector<Expression *>(7, nullptr)),
|
|
|
|
NextSymbol()),
|
|
|
|
CostParam::kUnwind, 7);
|
2017-06-01 18:09:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(QueryCostEstimator, UnwindNoLiteral) {
|
|
|
|
TEST_OP(MakeOp<query::plan::Unwind>(last_op_, nullptr, NextSymbol()),
|
|
|
|
CostParam::kUnwind, MiscParam::kUnwindNoLiteral);
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef TEST_OP
|
|
|
|
#undef EXPECT_COST
|
|
|
|
//
|
|
|
|
// TODO test cost when ScanAll, Expand, Accumulate, Limit
|
|
|
|
// vs cost for SA, Expand, Limit
|