2017-06-15 00:53:02 +08:00
|
|
|
#include "query/interpreter.hpp"
|
|
|
|
|
2017-10-05 00:38:17 +08:00
|
|
|
#include <glog/logging.h>
|
2018-03-13 17:35:14 +08:00
|
|
|
#include <limits>
|
2017-10-05 00:38:17 +08:00
|
|
|
|
2018-01-26 23:22:59 +08:00
|
|
|
#include "distributed/plan_dispatcher.hpp"
|
2017-12-22 20:39:31 +08:00
|
|
|
#include "query/exceptions.hpp"
|
|
|
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
|
|
|
#include "query/frontend/opencypher/parser.hpp"
|
|
|
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
2017-09-19 22:58:22 +08:00
|
|
|
#include "query/plan/planner.hpp"
|
|
|
|
#include "query/plan/vertex_count_cache.hpp"
|
|
|
|
#include "utils/flag_validation.hpp"
|
|
|
|
|
2017-10-07 20:41:59 +08:00
|
|
|
DEFINE_HIDDEN_bool(query_cost_planner, true,
|
|
|
|
"Use the cost-estimating query planner.");
|
Flags cleanup and QueryEngine removal
Summary:
I started with cleaning flags up (removing unused ones, documenting undocumented ones). There were some flags to remove in `QueryEngine`. Seeing how we never use hardcoded queries (AFAIK last Mislav's testing also indicated they aren't faster then interpretation), when removing those unused flags the `QueryEngine` becomes obsolete. That means that a bunch of other stuff becomes obsolete, along with the hardcoded queries. So I removed it all (this has been discussed and approved on the daily).
Some flags that were previously undocumented in `docs/user_technical/installation` are now documented. The following flags are NOT documented and in my opinion should not be displayed when starting `./memgraph --help` (@mferencevic):
```
query_vertex_count_to_expand_existsing (from rule_based_planner.cpp)
query_max_plans (rule_based_planner.cpp)
```
If you think that another organization is needed w.r.t. flag visibility, comment.
@teon.banek: I had to remove some stuff from CMakeLists to make it buildable. Please review what I removed and clean up if necessary if/when this lands. If the needed changes are minor, you can also comment.
Reviewers: buda, mislav.bradac, teon.banek, mferencevic
Reviewed By: buda, mislav.bradac
Subscribers: pullbot, mferencevic, teon.banek
Differential Revision: https://phabricator.memgraph.io/D825
2017-09-22 22:17:09 +08:00
|
|
|
DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60,
|
|
|
|
"Time to live for cached query plans, in seconds.",
|
|
|
|
FLAG_IN_RANGE(0, std::numeric_limits<int32_t>::max()));
|
2017-09-19 22:58:22 +08:00
|
|
|
|
|
|
|
namespace query {
|
|
|
|
|
2018-03-15 22:00:43 +08:00
|
|
|
Interpreter::CachedPlan::CachedPlan(
|
|
|
|
plan::DistributedPlan distributed_plan, double cost,
|
|
|
|
distributed::PlanDispatcher *plan_dispatcher)
|
|
|
|
: distributed_plan_(std::move(distributed_plan)),
|
|
|
|
cost_(cost),
|
|
|
|
plan_dispatcher_(plan_dispatcher) {
|
2018-03-13 17:35:14 +08:00
|
|
|
if (plan_dispatcher_) {
|
2018-03-15 22:00:43 +08:00
|
|
|
for (const auto &plan_pair : distributed_plan_.worker_plans) {
|
2018-03-13 17:35:14 +08:00
|
|
|
const auto &plan_id = plan_pair.first;
|
|
|
|
const auto &worker_plan = plan_pair.second;
|
|
|
|
plan_dispatcher_->DispatchPlan(plan_id, worker_plan,
|
2018-03-15 22:00:43 +08:00
|
|
|
distributed_plan_.symbol_table);
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-15 22:00:43 +08:00
|
|
|
Interpreter::CachedPlan::~CachedPlan() {
|
|
|
|
if (plan_dispatcher_) {
|
|
|
|
for (const auto &plan_pair : distributed_plan_.worker_plans) {
|
|
|
|
const auto &plan_id = plan_pair.first;
|
|
|
|
plan_dispatcher_->RemovePlan(plan_id);
|
|
|
|
}
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Interpreter::Interpreter(database::GraphDb &db)
|
2018-03-15 22:00:43 +08:00
|
|
|
: plan_dispatcher_(db.type() == database::GraphDb::Type::DISTRIBUTED_MASTER
|
|
|
|
? &db.plan_dispatcher()
|
|
|
|
: nullptr) {}
|
2018-03-13 17:35:14 +08:00
|
|
|
|
2017-12-22 20:39:31 +08:00
|
|
|
Interpreter::Results Interpreter::operator()(
|
2018-01-12 22:17:04 +08:00
|
|
|
const std::string &query, database::GraphDbAccessor &db_accessor,
|
2017-12-22 20:39:31 +08:00
|
|
|
const std::map<std::string, TypedValue> ¶ms,
|
|
|
|
bool in_explicit_transaction) {
|
|
|
|
utils::Timer frontend_timer;
|
|
|
|
Context ctx(db_accessor);
|
|
|
|
ctx.in_explicit_transaction_ = in_explicit_transaction;
|
|
|
|
ctx.is_query_cached_ = true;
|
2018-06-27 17:08:21 +08:00
|
|
|
ctx.timestamp_ = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
|
|
std::chrono::system_clock::now().time_since_epoch())
|
|
|
|
.count();
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_ = kafka_streams_;
|
2017-12-22 20:39:31 +08:00
|
|
|
|
|
|
|
// query -> stripped query
|
|
|
|
StrippedQuery stripped(query);
|
|
|
|
|
|
|
|
// Update context with provided parameters.
|
|
|
|
ctx.parameters_ = stripped.literals();
|
|
|
|
for (const auto ¶m_pair : stripped.parameters()) {
|
|
|
|
auto param_it = params.find(param_pair.second);
|
|
|
|
if (param_it == params.end()) {
|
|
|
|
throw query::UnprovidedParameterError(
|
2018-06-19 20:52:48 +08:00
|
|
|
fmt::format("Parameter ${} not provided", param_pair.second));
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
|
|
|
ctx.parameters_.Add(param_pair.first, param_it->second);
|
|
|
|
}
|
|
|
|
auto frontend_time = frontend_timer.Elapsed();
|
|
|
|
|
2018-03-13 17:35:14 +08:00
|
|
|
// Try to get a cached plan. Note that this local shared_ptr might be the only
|
|
|
|
// owner of the CachedPlan, so ensure it lives during the whole
|
|
|
|
// interpretation.
|
2018-03-15 22:00:43 +08:00
|
|
|
std::shared_ptr<CachedPlan> plan{nullptr};
|
|
|
|
auto plan_cache_access = plan_cache_.access();
|
|
|
|
auto it = plan_cache_access.find(stripped.hash());
|
|
|
|
if (it != plan_cache_access.end()) {
|
|
|
|
if (it->second->IsExpired())
|
|
|
|
plan_cache_access.remove(stripped.hash());
|
|
|
|
else
|
|
|
|
plan = it->second;
|
|
|
|
}
|
2017-12-22 20:39:31 +08:00
|
|
|
utils::Timer planning_timer;
|
2017-12-27 19:11:20 +08:00
|
|
|
if (!plan) {
|
2018-03-15 22:00:43 +08:00
|
|
|
plan = plan_cache_access.insert(stripped.hash(), QueryToPlan(stripped, ctx))
|
|
|
|
.first->second;
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
2018-03-13 17:35:14 +08:00
|
|
|
auto planning_time = planning_timer.Elapsed();
|
2017-12-27 19:11:20 +08:00
|
|
|
|
|
|
|
ctx.symbol_table_ = plan->symbol_table();
|
2017-12-22 20:39:31 +08:00
|
|
|
|
Extract communication to static library
Summary:
Session specifics have been move out of the Bolt `executing` state, and
are accessed via pure virtual Session type. Our server is templated on
the session and we are setting the concrete type, so there should be no
virtual call overhead. Abstract Session is used to indicate the
interface, this could have also been templated, but the explicit
interface definition makes it clearer.
Specific session implementation for running Memgraph is now implemented
in memgraph_bolt, which instantiates the concrete session type. This may
not be 100% appropriate place, but Memgraph specific session isn't
needed anywhere else.
Bolt/communication tests now use a dummy session and depend only on
communication, which significantly improves test run times.
All these changes make the communication a library which doesn't depend
on storage nor the database. Only shared connection points, which aren't
part of the base communication library are:
* glue/conversion -- which converts between storage and bolt types, and
* communication/result_stream_faker -- templated, but used in tests and query/repl
Depends on D1453
Reviewers: mferencevic, buda, mtomic, msantl
Reviewed By: mferencevic, mtomic
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1456
2018-07-10 22:18:19 +08:00
|
|
|
std::map<std::string, TypedValue> summary;
|
2017-12-22 20:39:31 +08:00
|
|
|
summary["parsing_time"] = frontend_time.count();
|
|
|
|
summary["planning_time"] = planning_time.count();
|
2017-12-27 19:11:20 +08:00
|
|
|
summary["cost_estimate"] = plan->cost();
|
2017-12-22 20:39:31 +08:00
|
|
|
// TODO: set summary['type'] based on transaction metadata
|
|
|
|
// the type can't be determined based only on top level LogicalOp
|
|
|
|
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
|
|
|
// for now always use "rw" because something must be set, but it doesn't
|
|
|
|
// have to be correct (for Bolt clients)
|
|
|
|
summary["type"] = "rw";
|
|
|
|
|
2018-02-14 23:33:01 +08:00
|
|
|
auto cursor = plan->plan().MakeCursor(ctx.db_accessor_);
|
2017-12-22 20:39:31 +08:00
|
|
|
std::vector<std::string> header;
|
|
|
|
std::vector<Symbol> output_symbols(
|
2018-02-14 23:33:01 +08:00
|
|
|
plan->plan().OutputSymbols(ctx.symbol_table_));
|
2017-12-22 20:39:31 +08:00
|
|
|
for (const auto &symbol : output_symbols) {
|
|
|
|
// When the symbol is aliased or expanded from '*' (inside RETURN or
|
|
|
|
// WITH), then there is no token position, so use symbol name.
|
|
|
|
// Otherwise, find the name from stripped query.
|
|
|
|
header.push_back(utils::FindOr(stripped.named_expressions(),
|
|
|
|
symbol.token_position(), symbol.name())
|
|
|
|
.first);
|
|
|
|
}
|
|
|
|
|
2017-12-27 19:11:20 +08:00
|
|
|
return Results(std::move(ctx), plan, std::move(cursor), output_symbols,
|
|
|
|
header, summary, plan_cache_);
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
|
|
|
|
2018-03-13 17:35:14 +08:00
|
|
|
std::shared_ptr<Interpreter::CachedPlan> Interpreter::QueryToPlan(
|
|
|
|
const StrippedQuery &stripped, Context &ctx) {
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage ast_storage = QueryToAst(stripped, ctx);
|
2018-03-13 17:35:14 +08:00
|
|
|
SymbolGenerator symbol_generator(ctx.symbol_table_);
|
|
|
|
ast_storage.query()->Accept(symbol_generator);
|
|
|
|
|
|
|
|
std::unique_ptr<plan::LogicalOperator> tmp_logical_plan;
|
|
|
|
double query_plan_cost_estimation = 0.0;
|
|
|
|
std::tie(tmp_logical_plan, query_plan_cost_estimation) =
|
|
|
|
MakeLogicalPlan(ast_storage, ctx);
|
|
|
|
|
|
|
|
DCHECK(ctx.db_accessor_.db().type() !=
|
|
|
|
database::GraphDb::Type::DISTRIBUTED_WORKER);
|
|
|
|
if (ctx.db_accessor_.db().type() ==
|
|
|
|
database::GraphDb::Type::DISTRIBUTED_MASTER) {
|
|
|
|
auto distributed_plan = MakeDistributedPlan(
|
|
|
|
*tmp_logical_plan, ctx.symbol_table_, next_plan_id_);
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[Interpreter] Created plan for distributed execution "
|
|
|
|
<< next_plan_id_ - 1;
|
2018-03-13 17:35:14 +08:00
|
|
|
return std::make_shared<CachedPlan>(std::move(distributed_plan),
|
2018-03-15 22:00:43 +08:00
|
|
|
query_plan_cost_estimation,
|
|
|
|
plan_dispatcher_);
|
2018-03-13 17:35:14 +08:00
|
|
|
} else {
|
|
|
|
return std::make_shared<CachedPlan>(
|
2018-03-15 22:00:43 +08:00
|
|
|
plan::DistributedPlan{0,
|
|
|
|
std::move(tmp_logical_plan),
|
|
|
|
{},
|
|
|
|
std::move(ast_storage),
|
|
|
|
ctx.symbol_table_},
|
|
|
|
query_plan_cost_estimation, plan_dispatcher_);
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage Interpreter::QueryToAst(const StrippedQuery &stripped,
|
2018-06-27 17:08:21 +08:00
|
|
|
Context &ctx) {
|
2017-09-19 22:58:22 +08:00
|
|
|
if (!ctx.is_query_cached_) {
|
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-09-19 22:58:22 +08:00
|
|
|
return std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
|
|
|
frontend::CypherMainVisitor visitor(ctx);
|
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
return std::move(visitor.storage());
|
|
|
|
}
|
|
|
|
auto ast_cache_accessor = ast_cache_.access();
|
|
|
|
auto ast_it = ast_cache_accessor.find(stripped.hash());
|
|
|
|
if (ast_it == ast_cache_accessor.end()) {
|
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-10-05 00:38:17 +08:00
|
|
|
try {
|
|
|
|
return std::make_unique<frontend::opencypher::Parser>(stripped.query());
|
|
|
|
} catch (const SyntaxException &e) {
|
|
|
|
// There is syntax exception in stripped query. Rerun parser with
|
|
|
|
// original query to get appropriate error messsage.
|
|
|
|
auto parser = std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
// If exception was not thrown here, it means StrippedQuery messed up
|
|
|
|
// something.
|
|
|
|
LOG(FATAL) << "Stripped query can't be parsed, original can";
|
|
|
|
return parser;
|
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
|
|
|
frontend::CypherMainVisitor visitor(ctx);
|
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
// Cache it.
|
|
|
|
ast_it =
|
|
|
|
ast_cache_accessor.insert(stripped.hash(), std::move(visitor.storage()))
|
|
|
|
.first;
|
|
|
|
}
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage new_ast;
|
2017-09-19 22:58:22 +08:00
|
|
|
ast_it->second.query()->Clone(new_ast);
|
|
|
|
return new_ast;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<std::unique_ptr<plan::LogicalOperator>, double>
|
2018-05-22 22:45:52 +08:00
|
|
|
Interpreter::MakeLogicalPlan(AstStorage &ast_storage, Context &context) {
|
2017-09-19 22:58:22 +08:00
|
|
|
std::unique_ptr<plan::LogicalOperator> logical_plan;
|
2018-03-13 17:35:14 +08:00
|
|
|
auto vertex_counts = plan::MakeVertexCountCache(context.db_accessor_);
|
2017-10-13 15:46:49 +08:00
|
|
|
auto planning_context = plan::MakePlanningContext(
|
|
|
|
ast_storage, context.symbol_table_, vertex_counts);
|
2017-11-29 20:55:02 +08:00
|
|
|
return plan::MakeLogicalPlan(planning_context, context.parameters_,
|
|
|
|
FLAGS_query_cost_planner);
|
2017-09-19 22:58:22 +08:00
|
|
|
};
|
|
|
|
} // namespace query
|