Query - plan - cost estimator basic. REPL mods
Reviewers: teon.banek, buda, mislav.bradac Reviewed By: teon.banek, buda Subscribers: pullbot, teon.banek Differential Revision: https://phabricator.memgraph.io/D399
This commit is contained in:
parent
29041eb4d1
commit
509d5db67a
@ -362,6 +362,7 @@ set(memgraph_src_files
|
||||
${src_dir}/query/plan/operator.cpp
|
||||
${src_dir}/query/plan/rule_based_planner.cpp
|
||||
${src_dir}/query/plan/variable_start_planner.cpp
|
||||
${src_dir}/query/plan/cost_estimator.cpp
|
||||
${src_dir}/query/frontend/semantic/symbol_generator.cpp
|
||||
)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/interpret/frame.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
|
||||
namespace query {
|
||||
|
||||
@ -42,6 +43,11 @@ void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
|
||||
auto logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
|
||||
visitor.storage(), symbol_table, &db_accessor);
|
||||
|
||||
// cost estimation
|
||||
plan::CostEstimator cost_estimator(db_accessor);
|
||||
logical_plan->Accept(cost_estimator);
|
||||
double query_plan_cost_estimation = cost_estimator.cost();
|
||||
|
||||
// generate frame based on symbol table max_position
|
||||
Frame frame(symbol_table.max_position());
|
||||
|
||||
@ -93,6 +99,7 @@ void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
|
||||
time_second(antlr_end_time, planning_end_time);
|
||||
summary["query_plan_execution_time"] =
|
||||
time_second(planning_end_time, execution_end_time);
|
||||
summary["query_cost_estimate"] = query_plan_cost_estimation;
|
||||
//
|
||||
// TODO set summary['type'] based on transaction metadata
|
||||
// the type can't be determined based only on top level LogicalOp
|
||||
|
62
src/query/plan/cost_estimator.cpp
Normal file
62
src/query/plan/cost_estimator.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include "cost_estimator.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
bool CostEstimator::PostVisit(ScanAll &) {
|
||||
cardinality_ *= db_accessor_.vertices_count();
|
||||
// ScanAll performs some work for every element that is produced
|
||||
IncrementCost(CostParam::kScanAll);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CostEstimator::PostVisit(ScanAllByLabel &scan_all_by_label) {
|
||||
cardinality_ *= db_accessor_.vertices_count(scan_all_by_label.label());
|
||||
// ScanAllByLabel performs some work for every element that is produced
|
||||
IncrementCost(CostParam::kScanAllByLabel);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CostEstimator::PostVisit(Expand &) {
|
||||
cardinality_ *= CardParam::kExpand;
|
||||
// Expand performs some work for every expansion
|
||||
IncrementCost(CostParam::kExpand);
|
||||
return true;
|
||||
}
|
||||
|
||||
// for the given op first increments the cost and then cardinality
|
||||
#define POST_VISIT(LOGICAL_OP, PARAM_NAME) \
|
||||
bool CostEstimator::PostVisit(LOGICAL_OP &) { \
|
||||
IncrementCost(CostParam::PARAM_NAME); \
|
||||
cardinality_ *= CardParam::PARAM_NAME; \
|
||||
return true; \
|
||||
}
|
||||
|
||||
POST_VISIT(Filter, kFilter)
|
||||
POST_VISIT(ExpandUniquenessFilter<VertexAccessor>, kExpandUniquenessFilter);
|
||||
POST_VISIT(ExpandUniquenessFilter<EdgeAccessor>, kExpandUniquenessFilter);
|
||||
|
||||
#undef POST_VISIT
|
||||
|
||||
bool CostEstimator::PostVisit(Unwind &unwind) {
|
||||
// Unwind cost depends more on the number of lists that get unwound
|
||||
// much less on the number of outputs
|
||||
// for that reason first increment cost, then modify cardinality
|
||||
IncrementCost(CostParam::kUnwind);
|
||||
|
||||
// try to determine how many values will be yielded by Unwind
|
||||
// if the Unwind expression is a list literal, we can deduce cardinality
|
||||
// exactly, otherwise we approximate
|
||||
int unwind_value;
|
||||
if (auto literal =
|
||||
dynamic_cast<query::ListLiteral *>(unwind.input_expression()))
|
||||
unwind_value = literal->elements_.size();
|
||||
else
|
||||
unwind_value = MiscParam::kUnwindNoLiteral;
|
||||
|
||||
cardinality_ *= unwind_value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CostEstimator::Visit(Once &) { return true; }
|
||||
|
||||
} // namespace query::plan
|
87
src/query/plan/cost_estimator.hpp
Normal file
87
src/query/plan/cost_estimator.hpp
Normal file
@ -0,0 +1,87 @@
|
||||
#include "logging/loggable.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
/**
|
||||
* @brief: Query plan execution time cost estimator,
|
||||
* for comparing and choosing optimal execution plans.
|
||||
*
|
||||
* In Cypher the write part of the query always executes in
|
||||
* the same cardinality. It is not allowed to execute a write
|
||||
* operation before all the expansion for that query part
|
||||
* (WITH splits a query into parts) have executed.
|
||||
*
|
||||
* Note that expansions and filtering can also happen during
|
||||
* Merge, which is a write operation. We let that get evaluated
|
||||
* like all other cardinality influencing ops. Also, Merge
|
||||
* cardinality modification should be contained (it can never
|
||||
* reduce it's input cardinality), but since Merge always happens
|
||||
* after the read part, and can't be reoredered, we can ignore
|
||||
* that.
|
||||
*
|
||||
* Limiting and accumulating (Aggregate, OrderBy, Accumulate)
|
||||
* operations are cardinality
|
||||
* modifiers that always execute at the end of the
|
||||
* query part. Their cardinality influence is irrelevant
|
||||
* because they generally execute the same for all plans
|
||||
* for a single query part, and query part reordering is
|
||||
* not allowed.
|
||||
*/
|
||||
class CostEstimator : public HierarchicalLogicalOperatorVisitor, Loggable {
|
||||
public:
|
||||
struct CostParam {
|
||||
static constexpr double kScanAll{1.0};
|
||||
static constexpr double kScanAllByLabel{1.1};
|
||||
static constexpr double kExpand{2.0};
|
||||
static constexpr double kFilter{1.5};
|
||||
static constexpr double kExpandUniquenessFilter{1.5};
|
||||
static constexpr double kUnwind{1.3};
|
||||
};
|
||||
|
||||
struct CardParam {
|
||||
static constexpr double kExpand{3.0};
|
||||
static constexpr double kFilter{0.25};
|
||||
static constexpr double kExpandUniquenessFilter{0.95};
|
||||
};
|
||||
|
||||
struct MiscParam {
|
||||
static constexpr double kUnwindNoLiteral{10.0};
|
||||
};
|
||||
|
||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||
|
||||
CostEstimator(const GraphDbAccessor &db_accessor)
|
||||
: Loggable("QueryCostEstimator"), db_accessor_(db_accessor) {}
|
||||
|
||||
bool PostVisit(ScanAll &) override;
|
||||
bool PostVisit(ScanAllByLabel &scan_all_by_label) override;
|
||||
bool PostVisit(Expand &) override;
|
||||
bool PostVisit(Filter &) override;
|
||||
bool PostVisit(ExpandUniquenessFilter<VertexAccessor> &) override;
|
||||
bool PostVisit(ExpandUniquenessFilter<EdgeAccessor> &) override;
|
||||
bool PostVisit(Unwind &unwind) override;
|
||||
bool Visit(Once &) override;
|
||||
|
||||
auto cost() const { return cost_; }
|
||||
auto cardinality() const { return cardinality_; }
|
||||
|
||||
private:
|
||||
// cost estimation that gets accumulated as the visitor
|
||||
// tours the logical plan
|
||||
double cost_{0};
|
||||
|
||||
// cardinality estimation (how many times an operator gets executed)
|
||||
// cardinality is a double to make it easier to work with
|
||||
double cardinality_{1};
|
||||
//
|
||||
// accessor used for cardinality estimates in ScanAll and ScanAllByLabel
|
||||
const GraphDbAccessor &db_accessor_;
|
||||
|
||||
void IncrementCost(double param) { cost_ += param * cardinality_; }
|
||||
};
|
||||
|
||||
} // namespace query::plan
|
@ -320,6 +320,8 @@ class ScanAllByLabel : public ScanAll {
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
std::unique_ptr<Cursor> MakeCursor(GraphDbAccessor &db) override;
|
||||
|
||||
GraphDbTypes::Label label() const { return label_; }
|
||||
|
||||
private:
|
||||
const GraphDbTypes::Label label_;
|
||||
};
|
||||
@ -1221,6 +1223,8 @@ class Unwind : public LogicalOperator {
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
std::unique_ptr<Cursor> MakeCursor(GraphDbAccessor &db) override;
|
||||
|
||||
Expression *input_expression() const { return input_expression_; }
|
||||
|
||||
private:
|
||||
const std::shared_ptr<LogicalOperator> input_;
|
||||
Expression *input_expression_;
|
||||
|
@ -5,6 +5,9 @@
|
||||
#include "query/interpreter.hpp"
|
||||
#include "utils/random_graph_generator.hpp"
|
||||
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
|
||||
void random_generate(Dbms &dbms, uint node_count, int edge_factor = 5) {
|
||||
auto dba = dbms.active();
|
||||
utils::RandomGraphGenerator generator(*dba);
|
||||
@ -20,72 +23,23 @@ void random_generate(Dbms &dbms, uint node_count, int edge_factor = 5) {
|
||||
generator.Commit();
|
||||
}
|
||||
|
||||
void fill_db(Dbms &dbms) {
|
||||
auto dba = dbms.active();
|
||||
|
||||
// labels
|
||||
auto company = dba->label("Company");
|
||||
auto person = dba->label("Person");
|
||||
auto device = dba->label("Device");
|
||||
|
||||
// props
|
||||
auto name = dba->property("name");
|
||||
auto age = dba->property("age");
|
||||
auto type = dba->property("type");
|
||||
|
||||
// vertices
|
||||
auto memgraph = dba->insert_vertex();
|
||||
memgraph.PropsSet(name, "Memgraph");
|
||||
memgraph.add_label(company);
|
||||
auto teon = dba->insert_vertex();
|
||||
teon.PropsSet(name, "Teon");
|
||||
teon.PropsSet(age, 26);
|
||||
teon.add_label(person);
|
||||
auto mislav = dba->insert_vertex();
|
||||
mislav.PropsSet(name, "Mislav");
|
||||
mislav.PropsSet(age, 22);
|
||||
mislav.add_label(person);
|
||||
auto florijan = dba->insert_vertex();
|
||||
florijan.PropsSet(name, "Florijan");
|
||||
florijan.PropsSet(age, 31);
|
||||
florijan.add_label(person);
|
||||
auto xps_15 = dba->insert_vertex();
|
||||
xps_15.PropsSet(type, "PC");
|
||||
xps_15.PropsSet(name, "Dell XPS 15");
|
||||
xps_15.add_label(device);
|
||||
|
||||
// edges
|
||||
dba->insert_edge(teon, memgraph, dba->edge_type("MEMBER_OF"));
|
||||
dba->insert_edge(mislav, memgraph, dba->edge_type("MEMBER_OF"));
|
||||
dba->insert_edge(florijan, memgraph, dba->edge_type("MEMBER_OF"));
|
||||
|
||||
dba->insert_edge(teon, mislav, dba->edge_type("FRIEND_OF"));
|
||||
dba->insert_edge(mislav, teon, dba->edge_type("FRIEND_OF"));
|
||||
dba->insert_edge(florijan, mislav, dba->edge_type("FRIEND_OF"));
|
||||
dba->insert_edge(mislav, florijan, dba->edge_type("FRIEND_OF"));
|
||||
dba->insert_edge(florijan, teon, dba->edge_type("FRIEND_OF"));
|
||||
dba->insert_edge(teon, florijan, dba->edge_type("FRIEND_OF"));
|
||||
|
||||
dba->insert_edge(memgraph, xps_15, dba->edge_type("OWNS"));
|
||||
|
||||
dba->insert_edge(teon, xps_15, dba->edge_type("USES"));
|
||||
dba->insert_edge(mislav, xps_15, dba->edge_type("USES"));
|
||||
dba->insert_edge(florijan, xps_15, dba->edge_type("USES"));
|
||||
|
||||
dba->commit();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
REGISTER_ARGS(argc, argv);
|
||||
|
||||
// parse the first cmd line argument as the count of nodes to randomly create
|
||||
uint node_count = 100000;
|
||||
uint node_count = 0;
|
||||
if (argc > 1) {
|
||||
node_count = (uint) std::stoul(argv[1]);
|
||||
permanent_assert(node_count < 10000000,
|
||||
"More then 10M nodes requested, that's too much");
|
||||
}
|
||||
|
||||
// TODO switch to GFlags, once finally available
|
||||
if (argc > 2) {
|
||||
logging::init_sync();
|
||||
logging::log->pipe(std::make_unique<Stdout>());
|
||||
}
|
||||
|
||||
Dbms dbms;
|
||||
std::cout << "Generating graph..." << std::endl;
|
||||
// fill_db(dbms);
|
||||
|
127
tests/unit/query_cost_estimator.cpp
Normal file
127
tests/unit/query_cost_estimator.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <memory>
|
||||
|
||||
#include "dbms/dbms.hpp"
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "storage/vertex_accessor.hpp"
|
||||
|
||||
using namespace query;
|
||||
using namespace query::plan;
|
||||
|
||||
using CardParam = CostEstimator::CardParam;
|
||||
using CostParam = CostEstimator::CostParam;
|
||||
using MiscParam = CostEstimator::MiscParam;
|
||||
|
||||
/** A fixture for cost estimation. Sets up the database
|
||||
* and accessor (adds some vertices). Provides convenience
|
||||
* functions for creating the logical plan. Note that the
|
||||
* resulting plan is NOT fit for execution, only for cost
|
||||
* estimation testing. */
|
||||
class QueryCostEstimator : public ::testing::Test {
|
||||
protected:
|
||||
Dbms dbms;
|
||||
std::unique_ptr<GraphDbAccessor> dba = dbms.active();
|
||||
|
||||
// we incrementally build the logical operator plan
|
||||
// start it off with Once
|
||||
std::shared_ptr<LogicalOperator> last_op_ = std::make_shared<Once>();
|
||||
|
||||
AstTreeStorage storage_;
|
||||
SymbolTable symbol_table_;
|
||||
int symbol_count = 0;
|
||||
|
||||
Symbol NextSymbol() {
|
||||
return symbol_table_.CreateSymbol("Symbol" + std::to_string(symbol_count++),
|
||||
true);
|
||||
}
|
||||
|
||||
/** Adds the given number of vertices to the DB, which
|
||||
* the given number is labeled with the given label */
|
||||
void AddVertices(int vertex_count, GraphDbTypes::Label label,
|
||||
int labeled_count) {
|
||||
for (int i = 0; i < vertex_count; i++) {
|
||||
auto vertex = dba->insert_vertex();
|
||||
if (i < labeled_count) vertex.add_label(label);
|
||||
}
|
||||
|
||||
dba->advance_command();
|
||||
}
|
||||
|
||||
auto Cost() {
|
||||
CostEstimator cost_estimator(*dba);
|
||||
last_op_->Accept(cost_estimator);
|
||||
return cost_estimator.cost();
|
||||
}
|
||||
|
||||
template <typename TLogicalOperator, typename... TArgs>
|
||||
void MakeOp(TArgs... args) {
|
||||
last_op_ = std::make_shared<TLogicalOperator>(args...);
|
||||
}
|
||||
};
|
||||
|
||||
// multiply with 1 to avoid linker error (possibly fixed in CLang >= 3.81)
|
||||
#define EXPECT_COST(COST) EXPECT_FLOAT_EQ(Cost(), 1 * COST)
|
||||
|
||||
TEST_F(QueryCostEstimator, Once) { EXPECT_COST(0); }
|
||||
|
||||
TEST_F(QueryCostEstimator, ScanAll) {
|
||||
AddVertices(100, dba->label("Label"), 30);
|
||||
MakeOp<ScanAll>(last_op_, NextSymbol());
|
||||
EXPECT_COST(100 * CostParam::kScanAll);
|
||||
}
|
||||
|
||||
TEST_F(QueryCostEstimator, ScanAllByLabelCardinality) {
|
||||
GraphDbTypes::Label label = dba->label("Label");
|
||||
AddVertices(100, label, 30);
|
||||
MakeOp<ScanAllByLabel>(last_op_, NextSymbol(), label);
|
||||
EXPECT_COST(30 * CostParam::kScanAllByLabel);
|
||||
}
|
||||
|
||||
TEST_F(QueryCostEstimator, ExpandCardinality) {
|
||||
MakeOp<Expand>(NextSymbol(), NextSymbol(), EdgeAtom::Direction::IN, last_op_,
|
||||
NextSymbol(), false, false);
|
||||
EXPECT_COST(CardParam::kExpand * CostParam::kExpand);
|
||||
}
|
||||
|
||||
// helper for testing an operations cost and cardinality
|
||||
// only for operations that first increment cost, then modify cardinality
|
||||
// intentially a macro (instead of function) for better test feedback
|
||||
#define TEST_OP(OP, OP_COST_PARAM, OP_CARD_PARAM) \
|
||||
OP; \
|
||||
EXPECT_COST(OP_COST_PARAM); \
|
||||
OP; \
|
||||
EXPECT_COST(OP_COST_PARAM + OP_CARD_PARAM * OP_COST_PARAM);
|
||||
|
||||
TEST_F(QueryCostEstimator, Filter) {
|
||||
TEST_OP(MakeOp<Filter>(last_op_, storage_.Create<PrimitiveLiteral>(true)),
|
||||
CostParam::kFilter, CardParam::kFilter);
|
||||
}
|
||||
|
||||
TEST_F(QueryCostEstimator, ExpandUniquenessFilter) {
|
||||
TEST_OP(MakeOp<ExpandUniquenessFilter<VertexAccessor>>(last_op_, NextSymbol(),
|
||||
std::vector<Symbol>()),
|
||||
CostParam::kExpandUniquenessFilter,
|
||||
CardParam::kExpandUniquenessFilter);
|
||||
}
|
||||
|
||||
TEST_F(QueryCostEstimator, UnwindLiteral) {
|
||||
TEST_OP(MakeOp<query::plan::Unwind>(
|
||||
last_op_, storage_.Create<ListLiteral>(
|
||||
std::vector<Expression *>(7, nullptr)),
|
||||
NextSymbol()),
|
||||
CostParam::kUnwind, 7);
|
||||
}
|
||||
|
||||
TEST_F(QueryCostEstimator, UnwindNoLiteral) {
|
||||
TEST_OP(MakeOp<query::plan::Unwind>(last_op_, nullptr, NextSymbol()),
|
||||
CostParam::kUnwind, MiscParam::kUnwindNoLiteral);
|
||||
}
|
||||
|
||||
#undef TEST_OP
|
||||
#undef EXPECT_COST
|
||||
//
|
||||
// TODO test cost when ScanAll, Expand, Accumulate, Limit
|
||||
// vs cost for SA, Expand, Limit
|
Loading…
Reference in New Issue
Block a user