2017-06-15 00:53:02 +08:00
|
|
|
#include "query/interpreter.hpp"
|
|
|
|
|
2017-10-05 00:38:17 +08:00
|
|
|
#include <glog/logging.h>
|
2018-03-13 17:35:14 +08:00
|
|
|
#include <limits>
|
2017-10-05 00:38:17 +08:00
|
|
|
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
#include "database/distributed_graph_db.hpp"
|
2018-01-26 23:22:59 +08:00
|
|
|
#include "distributed/plan_dispatcher.hpp"
|
2017-12-22 20:39:31 +08:00
|
|
|
#include "query/exceptions.hpp"
|
|
|
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
|
|
|
#include "query/frontend/opencypher/parser.hpp"
|
|
|
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
2017-09-19 22:58:22 +08:00
|
|
|
#include "query/plan/planner.hpp"
|
|
|
|
#include "query/plan/vertex_count_cache.hpp"
|
|
|
|
#include "utils/flag_validation.hpp"
|
|
|
|
|
2017-10-07 20:41:59 +08:00
|
|
|
DEFINE_HIDDEN_bool(query_cost_planner, true,
|
|
|
|
"Use the cost-estimating query planner.");
|
Flags cleanup and QueryEngine removal
Summary:
I started with cleaning flags up (removing unused ones, documenting undocumented ones). There were some flags to remove in `QueryEngine`. Seeing how we never use hardcoded queries (AFAIK last Mislav's testing also indicated they aren't faster then interpretation), when removing those unused flags the `QueryEngine` becomes obsolete. That means that a bunch of other stuff becomes obsolete, along with the hardcoded queries. So I removed it all (this has been discussed and approved on the daily).
Some flags that were previously undocumented in `docs/user_technical/installation` are now documented. The following flags are NOT documented and in my opinion should not be displayed when starting `./memgraph --help` (@mferencevic):
```
query_vertex_count_to_expand_existsing (from rule_based_planner.cpp)
query_max_plans (rule_based_planner.cpp)
```
If you think that another organization is needed w.r.t. flag visibility, comment.
@teon.banek: I had to remove some stuff from CMakeLists to make it buildable. Please review what I removed and clean up if necessary if/when this lands. If the needed changes are minor, you can also comment.
Reviewers: buda, mislav.bradac, teon.banek, mferencevic
Reviewed By: buda, mislav.bradac
Subscribers: pullbot, mferencevic, teon.banek
Differential Revision: https://phabricator.memgraph.io/D825
2017-09-22 22:17:09 +08:00
|
|
|
DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60,
|
|
|
|
"Time to live for cached query plans, in seconds.",
|
|
|
|
FLAG_IN_RANGE(0, std::numeric_limits<int32_t>::max()));
|
2017-09-19 22:58:22 +08:00
|
|
|
|
|
|
|
namespace query {
|
|
|
|
|
2018-03-15 22:00:43 +08:00
|
|
|
Interpreter::CachedPlan::CachedPlan(
|
|
|
|
plan::DistributedPlan distributed_plan, double cost,
|
|
|
|
distributed::PlanDispatcher *plan_dispatcher)
|
|
|
|
: distributed_plan_(std::move(distributed_plan)),
|
|
|
|
cost_(cost),
|
|
|
|
plan_dispatcher_(plan_dispatcher) {
|
2018-03-13 17:35:14 +08:00
|
|
|
if (plan_dispatcher_) {
|
2018-03-15 22:00:43 +08:00
|
|
|
for (const auto &plan_pair : distributed_plan_.worker_plans) {
|
2018-03-13 17:35:14 +08:00
|
|
|
const auto &plan_id = plan_pair.first;
|
|
|
|
const auto &worker_plan = plan_pair.second;
|
|
|
|
plan_dispatcher_->DispatchPlan(plan_id, worker_plan,
|
2018-03-15 22:00:43 +08:00
|
|
|
distributed_plan_.symbol_table);
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-15 22:00:43 +08:00
|
|
|
Interpreter::CachedPlan::~CachedPlan() {
|
|
|
|
if (plan_dispatcher_) {
|
|
|
|
for (const auto &plan_pair : distributed_plan_.worker_plans) {
|
|
|
|
const auto &plan_id = plan_pair.first;
|
|
|
|
plan_dispatcher_->RemovePlan(plan_id);
|
|
|
|
}
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Interpreter::Interpreter(database::GraphDb &db)
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
: plan_dispatcher_(
|
|
|
|
db.type() == database::GraphDb::Type::DISTRIBUTED_MASTER
|
|
|
|
// TODO: Replace this with virtual call or some other mechanism.
|
|
|
|
? &dynamic_cast<database::Master *>(&db)->plan_dispatcher()
|
|
|
|
: nullptr) {}
|
2018-03-13 17:35:14 +08:00
|
|
|
|
2017-12-22 20:39:31 +08:00
|
|
|
Interpreter::Results Interpreter::operator()(
|
2018-01-12 22:17:04 +08:00
|
|
|
const std::string &query, database::GraphDbAccessor &db_accessor,
|
2017-12-22 20:39:31 +08:00
|
|
|
const std::map<std::string, TypedValue> ¶ms,
|
|
|
|
bool in_explicit_transaction) {
|
|
|
|
utils::Timer frontend_timer;
|
|
|
|
Context ctx(db_accessor);
|
|
|
|
ctx.in_explicit_transaction_ = in_explicit_transaction;
|
|
|
|
ctx.is_query_cached_ = true;
|
2018-06-27 17:08:21 +08:00
|
|
|
ctx.timestamp_ = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
|
|
std::chrono::system_clock::now().time_since_epoch())
|
|
|
|
.count();
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_ = kafka_streams_;
|
2017-12-22 20:39:31 +08:00
|
|
|
|
|
|
|
// query -> stripped query
|
|
|
|
StrippedQuery stripped(query);
|
|
|
|
|
|
|
|
// Update context with provided parameters.
|
|
|
|
ctx.parameters_ = stripped.literals();
|
|
|
|
for (const auto ¶m_pair : stripped.parameters()) {
|
|
|
|
auto param_it = params.find(param_pair.second);
|
|
|
|
if (param_it == params.end()) {
|
|
|
|
throw query::UnprovidedParameterError(
|
2018-06-19 20:52:48 +08:00
|
|
|
fmt::format("Parameter ${} not provided", param_pair.second));
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
|
|
|
ctx.parameters_.Add(param_pair.first, param_it->second);
|
|
|
|
}
|
|
|
|
auto frontend_time = frontend_timer.Elapsed();
|
|
|
|
|
2018-03-13 17:35:14 +08:00
|
|
|
// Try to get a cached plan. Note that this local shared_ptr might be the only
|
|
|
|
// owner of the CachedPlan, so ensure it lives during the whole
|
|
|
|
// interpretation.
|
2018-03-15 22:00:43 +08:00
|
|
|
std::shared_ptr<CachedPlan> plan{nullptr};
|
|
|
|
auto plan_cache_access = plan_cache_.access();
|
|
|
|
auto it = plan_cache_access.find(stripped.hash());
|
|
|
|
if (it != plan_cache_access.end()) {
|
|
|
|
if (it->second->IsExpired())
|
|
|
|
plan_cache_access.remove(stripped.hash());
|
|
|
|
else
|
|
|
|
plan = it->second;
|
|
|
|
}
|
2017-12-22 20:39:31 +08:00
|
|
|
utils::Timer planning_timer;
|
2017-12-27 19:11:20 +08:00
|
|
|
if (!plan) {
|
2018-03-15 22:00:43 +08:00
|
|
|
plan = plan_cache_access.insert(stripped.hash(), QueryToPlan(stripped, ctx))
|
|
|
|
.first->second;
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
2018-03-13 17:35:14 +08:00
|
|
|
auto planning_time = planning_timer.Elapsed();
|
2017-12-27 19:11:20 +08:00
|
|
|
|
|
|
|
ctx.symbol_table_ = plan->symbol_table();
|
2017-12-22 20:39:31 +08:00
|
|
|
|
Extract communication to static library
Summary:
Session specifics have been move out of the Bolt `executing` state, and
are accessed via pure virtual Session type. Our server is templated on
the session and we are setting the concrete type, so there should be no
virtual call overhead. Abstract Session is used to indicate the
interface, this could have also been templated, but the explicit
interface definition makes it clearer.
Specific session implementation for running Memgraph is now implemented
in memgraph_bolt, which instantiates the concrete session type. This may
not be 100% appropriate place, but Memgraph specific session isn't
needed anywhere else.
Bolt/communication tests now use a dummy session and depend only on
communication, which significantly improves test run times.
All these changes make the communication a library which doesn't depend
on storage nor the database. Only shared connection points, which aren't
part of the base communication library are:
* glue/conversion -- which converts between storage and bolt types, and
* communication/result_stream_faker -- templated, but used in tests and query/repl
Depends on D1453
Reviewers: mferencevic, buda, mtomic, msantl
Reviewed By: mferencevic, mtomic
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1456
2018-07-10 22:18:19 +08:00
|
|
|
std::map<std::string, TypedValue> summary;
|
2017-12-22 20:39:31 +08:00
|
|
|
summary["parsing_time"] = frontend_time.count();
|
|
|
|
summary["planning_time"] = planning_time.count();
|
2017-12-27 19:11:20 +08:00
|
|
|
summary["cost_estimate"] = plan->cost();
|
2017-12-22 20:39:31 +08:00
|
|
|
// TODO: set summary['type'] based on transaction metadata
|
|
|
|
// the type can't be determined based only on top level LogicalOp
|
|
|
|
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
|
|
|
// for now always use "rw" because something must be set, but it doesn't
|
|
|
|
// have to be correct (for Bolt clients)
|
|
|
|
summary["type"] = "rw";
|
|
|
|
|
2018-02-14 23:33:01 +08:00
|
|
|
auto cursor = plan->plan().MakeCursor(ctx.db_accessor_);
|
2017-12-22 20:39:31 +08:00
|
|
|
std::vector<std::string> header;
|
|
|
|
std::vector<Symbol> output_symbols(
|
2018-02-14 23:33:01 +08:00
|
|
|
plan->plan().OutputSymbols(ctx.symbol_table_));
|
2017-12-22 20:39:31 +08:00
|
|
|
for (const auto &symbol : output_symbols) {
|
|
|
|
// When the symbol is aliased or expanded from '*' (inside RETURN or
|
|
|
|
// WITH), then there is no token position, so use symbol name.
|
|
|
|
// Otherwise, find the name from stripped query.
|
|
|
|
header.push_back(utils::FindOr(stripped.named_expressions(),
|
|
|
|
symbol.token_position(), symbol.name())
|
|
|
|
.first);
|
|
|
|
}
|
|
|
|
|
2017-12-27 19:11:20 +08:00
|
|
|
return Results(std::move(ctx), plan, std::move(cursor), output_symbols,
|
|
|
|
header, summary, plan_cache_);
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
|
|
|
|
2018-03-13 17:35:14 +08:00
|
|
|
std::shared_ptr<Interpreter::CachedPlan> Interpreter::QueryToPlan(
|
|
|
|
const StrippedQuery &stripped, Context &ctx) {
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage ast_storage = QueryToAst(stripped, ctx);
|
2018-03-13 17:35:14 +08:00
|
|
|
SymbolGenerator symbol_generator(ctx.symbol_table_);
|
|
|
|
ast_storage.query()->Accept(symbol_generator);
|
|
|
|
|
|
|
|
std::unique_ptr<plan::LogicalOperator> tmp_logical_plan;
|
|
|
|
double query_plan_cost_estimation = 0.0;
|
|
|
|
std::tie(tmp_logical_plan, query_plan_cost_estimation) =
|
|
|
|
MakeLogicalPlan(ast_storage, ctx);
|
|
|
|
|
|
|
|
DCHECK(ctx.db_accessor_.db().type() !=
|
|
|
|
database::GraphDb::Type::DISTRIBUTED_WORKER);
|
|
|
|
if (ctx.db_accessor_.db().type() ==
|
|
|
|
database::GraphDb::Type::DISTRIBUTED_MASTER) {
|
|
|
|
auto distributed_plan = MakeDistributedPlan(
|
|
|
|
*tmp_logical_plan, ctx.symbol_table_, next_plan_id_);
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[Interpreter] Created plan for distributed execution "
|
|
|
|
<< next_plan_id_ - 1;
|
2018-03-13 17:35:14 +08:00
|
|
|
return std::make_shared<CachedPlan>(std::move(distributed_plan),
|
2018-03-15 22:00:43 +08:00
|
|
|
query_plan_cost_estimation,
|
|
|
|
plan_dispatcher_);
|
2018-03-13 17:35:14 +08:00
|
|
|
} else {
|
|
|
|
return std::make_shared<CachedPlan>(
|
2018-03-15 22:00:43 +08:00
|
|
|
plan::DistributedPlan{0,
|
|
|
|
std::move(tmp_logical_plan),
|
|
|
|
{},
|
|
|
|
std::move(ast_storage),
|
|
|
|
ctx.symbol_table_},
|
|
|
|
query_plan_cost_estimation, plan_dispatcher_);
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage Interpreter::QueryToAst(const StrippedQuery &stripped,
|
2018-06-27 17:08:21 +08:00
|
|
|
Context &ctx) {
|
2017-09-19 22:58:22 +08:00
|
|
|
if (!ctx.is_query_cached_) {
|
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-09-19 22:58:22 +08:00
|
|
|
return std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
|
|
|
frontend::CypherMainVisitor visitor(ctx);
|
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
return std::move(visitor.storage());
|
|
|
|
}
|
|
|
|
auto ast_cache_accessor = ast_cache_.access();
|
|
|
|
auto ast_it = ast_cache_accessor.find(stripped.hash());
|
|
|
|
if (ast_it == ast_cache_accessor.end()) {
|
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-10-05 00:38:17 +08:00
|
|
|
try {
|
|
|
|
return std::make_unique<frontend::opencypher::Parser>(stripped.query());
|
|
|
|
} catch (const SyntaxException &e) {
|
|
|
|
// There is syntax exception in stripped query. Rerun parser with
|
|
|
|
// original query to get appropriate error messsage.
|
|
|
|
auto parser = std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
// If exception was not thrown here, it means StrippedQuery messed up
|
|
|
|
// something.
|
|
|
|
LOG(FATAL) << "Stripped query can't be parsed, original can";
|
|
|
|
return parser;
|
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
|
|
|
frontend::CypherMainVisitor visitor(ctx);
|
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
// Cache it.
|
|
|
|
ast_it =
|
|
|
|
ast_cache_accessor.insert(stripped.hash(), std::move(visitor.storage()))
|
|
|
|
.first;
|
|
|
|
}
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage new_ast;
|
2017-09-19 22:58:22 +08:00
|
|
|
ast_it->second.query()->Clone(new_ast);
|
|
|
|
return new_ast;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::pair<std::unique_ptr<plan::LogicalOperator>, double>
|
2018-05-22 22:45:52 +08:00
|
|
|
Interpreter::MakeLogicalPlan(AstStorage &ast_storage, Context &context) {
|
2017-09-19 22:58:22 +08:00
|
|
|
std::unique_ptr<plan::LogicalOperator> logical_plan;
|
2018-03-13 17:35:14 +08:00
|
|
|
auto vertex_counts = plan::MakeVertexCountCache(context.db_accessor_);
|
2017-10-13 15:46:49 +08:00
|
|
|
auto planning_context = plan::MakePlanningContext(
|
|
|
|
ast_storage, context.symbol_table_, vertex_counts);
|
2017-11-29 20:55:02 +08:00
|
|
|
return plan::MakeLogicalPlan(planning_context, context.parameters_,
|
|
|
|
FLAGS_query_cost_planner);
|
2017-09-19 22:58:22 +08:00
|
|
|
};
|
|
|
|
} // namespace query
|