2017-06-15 00:53:02 +08:00
|
|
|
#include "query/interpreter.hpp"
|
|
|
|
|
2017-10-05 00:38:17 +08:00
|
|
|
#include <glog/logging.h>
|
2018-03-13 17:35:14 +08:00
|
|
|
#include <limits>
|
2017-10-05 00:38:17 +08:00
|
|
|
|
2017-12-22 20:39:31 +08:00
|
|
|
#include "query/exceptions.hpp"
|
|
|
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
|
|
|
#include "query/frontend/opencypher/parser.hpp"
|
2018-08-16 16:13:04 +08:00
|
|
|
#include "query/frontend/semantic/required_privileges.hpp"
|
2017-12-22 20:39:31 +08:00
|
|
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
2017-09-19 22:58:22 +08:00
|
|
|
#include "query/plan/planner.hpp"
|
|
|
|
#include "query/plan/vertex_count_cache.hpp"
|
|
|
|
#include "utils/flag_validation.hpp"
|
|
|
|
|
2017-10-07 20:41:59 +08:00
|
|
|
DEFINE_HIDDEN_bool(query_cost_planner, true,
|
|
|
|
"Use the cost-estimating query planner.");
|
Flags cleanup and QueryEngine removal
Summary:
I started with cleaning flags up (removing unused ones, documenting undocumented ones). There were some flags to remove in `QueryEngine`. Seeing how we never use hardcoded queries (AFAIK last Mislav's testing also indicated they aren't faster then interpretation), when removing those unused flags the `QueryEngine` becomes obsolete. That means that a bunch of other stuff becomes obsolete, along with the hardcoded queries. So I removed it all (this has been discussed and approved on the daily).
Some flags that were previously undocumented in `docs/user_technical/installation` are now documented. The following flags are NOT documented and in my opinion should not be displayed when starting `./memgraph --help` (@mferencevic):
```
query_vertex_count_to_expand_existsing (from rule_based_planner.cpp)
query_max_plans (rule_based_planner.cpp)
```
If you think that another organization is needed w.r.t. flag visibility, comment.
@teon.banek: I had to remove some stuff from CMakeLists to make it buildable. Please review what I removed and clean up if necessary if/when this lands. If the needed changes are minor, you can also comment.
Reviewers: buda, mislav.bradac, teon.banek, mferencevic
Reviewed By: buda, mislav.bradac
Subscribers: pullbot, mferencevic, teon.banek
Differential Revision: https://phabricator.memgraph.io/D825
2017-09-22 22:17:09 +08:00
|
|
|
DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60,
|
|
|
|
"Time to live for cached query plans, in seconds.",
|
|
|
|
FLAG_IN_RANGE(0, std::numeric_limits<int32_t>::max()));
|
2017-09-19 22:58:22 +08:00
|
|
|
|
|
|
|
namespace query {
|
|
|
|
|
2018-08-24 16:12:04 +08:00
|
|
|
Interpreter::CachedPlan::CachedPlan(std::unique_ptr<LogicalPlan> plan)
|
|
|
|
: plan_(std::move(plan)) {}
|
2018-03-13 17:35:14 +08:00
|
|
|
|
2017-12-22 20:39:31 +08:00
|
|
|
Interpreter::Results Interpreter::operator()(
|
2018-01-12 22:17:04 +08:00
|
|
|
const std::string &query, database::GraphDbAccessor &db_accessor,
|
Clean-up TypedValue misuse
Summary:
In a bunch of places `TypedValue` was used where `PropertyValue` should be. A lot of times it was only because `TypedValue` serialization code could be reused for `PropertyValue`, only without providing callbacks for `VERTEX`, `EDGE` and `PATH`. So first I wrote separate serialization code for `PropertyValue` and put it into storage folder. Then I fixed all the places where `TypedValue` was incorrectly used instead of `PropertyValue`. I also disabled implicit `TypedValue` to `PropertyValue` conversion in hopes of preventing misuse in the future.
After that, I wrote code for `VertexAccessor` and `EdgeAccessor` serialization and put it into `storage` folder because it was almost duplicated in distributed BFS and pull produce RPC messages. On the sender side, some subset of records (old or new or both) is serialized, and on the reciever side, records are deserialized and immediately put into transaction cache.
Then I rewrote the `TypedValue` serialization functions (`SaveCapnpTypedValue` and `LoadCapnpTypedValue`) to not take callbacks for `VERTEX`, `EDGE` and `PATH`, but use accessor serialization functions instead. That means that any code that wants to use `TypedValue` serialization must hold a reference to `GraphDbAccessor` and `DataManager`, so that should make clients reconsider if they really want to use `TypedValue` instead of `PropertyValue`.
Reviewers: teon.banek, msantl
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1598
2018-09-13 18:12:07 +08:00
|
|
|
const std::map<std::string, PropertyValue> ¶ms,
|
2017-12-22 20:39:31 +08:00
|
|
|
bool in_explicit_transaction) {
|
|
|
|
utils::Timer frontend_timer;
|
2018-09-05 00:02:30 +08:00
|
|
|
|
|
|
|
EvaluationContext evaluation_context;
|
|
|
|
evaluation_context.timestamp =
|
|
|
|
std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
|
|
std::chrono::system_clock::now().time_since_epoch())
|
|
|
|
.count();
|
|
|
|
|
2017-12-22 20:39:31 +08:00
|
|
|
// query -> stripped query
|
|
|
|
StrippedQuery stripped(query);
|
|
|
|
|
|
|
|
// Update context with provided parameters.
|
2018-09-11 19:32:25 +08:00
|
|
|
evaluation_context.parameters = stripped.literals();
|
2017-12-22 20:39:31 +08:00
|
|
|
for (const auto ¶m_pair : stripped.parameters()) {
|
|
|
|
auto param_it = params.find(param_pair.second);
|
|
|
|
if (param_it == params.end()) {
|
|
|
|
throw query::UnprovidedParameterError(
|
2018-08-29 17:05:34 +08:00
|
|
|
fmt::format("Parameter ${} not provided.", param_pair.second));
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
2018-09-11 19:32:25 +08:00
|
|
|
evaluation_context.parameters.Add(param_pair.first, param_it->second);
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
2018-09-03 21:42:43 +08:00
|
|
|
|
2018-09-11 19:32:25 +08:00
|
|
|
Context ctx(db_accessor);
|
|
|
|
ctx.in_explicit_transaction_ = in_explicit_transaction;
|
|
|
|
ctx.auth_ = auth_;
|
|
|
|
ctx.kafka_streams_ = kafka_streams_;
|
|
|
|
ctx.evaluation_context_ = evaluation_context;
|
|
|
|
|
2018-09-03 21:42:43 +08:00
|
|
|
ParsingContext parsing_context;
|
|
|
|
parsing_context.is_query_cached = true;
|
|
|
|
AstStorage ast_storage = QueryToAst(stripped, parsing_context, &db_accessor);
|
2018-08-16 16:13:04 +08:00
|
|
|
// TODO: Maybe cache required privileges to improve performance on very simple
|
|
|
|
// queries.
|
|
|
|
auto required_privileges = query::GetRequiredPrivileges(ast_storage);
|
2017-12-22 20:39:31 +08:00
|
|
|
auto frontend_time = frontend_timer.Elapsed();
|
|
|
|
|
2018-03-13 17:35:14 +08:00
|
|
|
// Try to get a cached plan. Note that this local shared_ptr might be the only
|
|
|
|
// owner of the CachedPlan, so ensure it lives during the whole
|
|
|
|
// interpretation.
|
2018-03-15 22:00:43 +08:00
|
|
|
std::shared_ptr<CachedPlan> plan{nullptr};
|
|
|
|
auto plan_cache_access = plan_cache_.access();
|
|
|
|
auto it = plan_cache_access.find(stripped.hash());
|
|
|
|
if (it != plan_cache_access.end()) {
|
|
|
|
if (it->second->IsExpired())
|
|
|
|
plan_cache_access.remove(stripped.hash());
|
|
|
|
else
|
|
|
|
plan = it->second;
|
|
|
|
}
|
2017-12-22 20:39:31 +08:00
|
|
|
utils::Timer planning_timer;
|
2017-12-27 19:11:20 +08:00
|
|
|
if (!plan) {
|
2018-08-24 16:12:04 +08:00
|
|
|
plan = plan_cache_access
|
|
|
|
.insert(stripped.hash(), AstToPlan(std::move(ast_storage), &ctx))
|
|
|
|
.first->second;
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
2018-03-13 17:35:14 +08:00
|
|
|
auto planning_time = planning_timer.Elapsed();
|
2017-12-27 19:11:20 +08:00
|
|
|
|
|
|
|
ctx.symbol_table_ = plan->symbol_table();
|
2017-12-22 20:39:31 +08:00
|
|
|
|
Extract communication to static library
Summary:
Session specifics have been move out of the Bolt `executing` state, and
are accessed via pure virtual Session type. Our server is templated on
the session and we are setting the concrete type, so there should be no
virtual call overhead. Abstract Session is used to indicate the
interface, this could have also been templated, but the explicit
interface definition makes it clearer.
Specific session implementation for running Memgraph is now implemented
in memgraph_bolt, which instantiates the concrete session type. This may
not be 100% appropriate place, but Memgraph specific session isn't
needed anywhere else.
Bolt/communication tests now use a dummy session and depend only on
communication, which significantly improves test run times.
All these changes make the communication a library which doesn't depend
on storage nor the database. Only shared connection points, which aren't
part of the base communication library are:
* glue/conversion -- which converts between storage and bolt types, and
* communication/result_stream_faker -- templated, but used in tests and query/repl
Depends on D1453
Reviewers: mferencevic, buda, mtomic, msantl
Reviewed By: mferencevic, mtomic
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1456
2018-07-10 22:18:19 +08:00
|
|
|
std::map<std::string, TypedValue> summary;
|
2017-12-22 20:39:31 +08:00
|
|
|
summary["parsing_time"] = frontend_time.count();
|
|
|
|
summary["planning_time"] = planning_time.count();
|
2017-12-27 19:11:20 +08:00
|
|
|
summary["cost_estimate"] = plan->cost();
|
2017-12-22 20:39:31 +08:00
|
|
|
// TODO: set summary['type'] based on transaction metadata
|
|
|
|
// the type can't be determined based only on top level LogicalOp
|
|
|
|
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
|
|
|
// for now always use "rw" because something must be set, but it doesn't
|
|
|
|
// have to be correct (for Bolt clients)
|
|
|
|
summary["type"] = "rw";
|
|
|
|
|
2018-02-14 23:33:01 +08:00
|
|
|
auto cursor = plan->plan().MakeCursor(ctx.db_accessor_);
|
2017-12-22 20:39:31 +08:00
|
|
|
std::vector<std::string> header;
|
|
|
|
std::vector<Symbol> output_symbols(
|
2018-02-14 23:33:01 +08:00
|
|
|
plan->plan().OutputSymbols(ctx.symbol_table_));
|
2017-12-22 20:39:31 +08:00
|
|
|
for (const auto &symbol : output_symbols) {
|
|
|
|
// When the symbol is aliased or expanded from '*' (inside RETURN or
|
|
|
|
// WITH), then there is no token position, so use symbol name.
|
|
|
|
// Otherwise, find the name from stripped query.
|
|
|
|
header.push_back(utils::FindOr(stripped.named_expressions(),
|
|
|
|
symbol.token_position(), symbol.name())
|
|
|
|
.first);
|
|
|
|
}
|
|
|
|
|
2017-12-27 19:11:20 +08:00
|
|
|
return Results(std::move(ctx), plan, std::move(cursor), output_symbols,
|
2018-08-16 16:13:04 +08:00
|
|
|
header, summary, plan_cache_, required_privileges);
|
2017-12-22 20:39:31 +08:00
|
|
|
}
|
|
|
|
|
2018-08-16 16:13:04 +08:00
|
|
|
std::shared_ptr<Interpreter::CachedPlan> Interpreter::AstToPlan(
|
2018-08-24 16:12:04 +08:00
|
|
|
AstStorage ast_storage, Context *ctx) {
|
|
|
|
SymbolGenerator symbol_generator(ctx->symbol_table_);
|
2018-03-13 17:35:14 +08:00
|
|
|
ast_storage.query()->Accept(symbol_generator);
|
2018-08-24 16:12:04 +08:00
|
|
|
return std::make_shared<CachedPlan>(
|
|
|
|
MakeLogicalPlan(std::move(ast_storage), ctx));
|
2018-03-13 17:35:14 +08:00
|
|
|
}
|
|
|
|
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage Interpreter::QueryToAst(const StrippedQuery &stripped,
|
2018-09-03 21:42:43 +08:00
|
|
|
const ParsingContext &context,
|
|
|
|
database::GraphDbAccessor *db_accessor) {
|
|
|
|
if (!context.is_query_cached) {
|
2017-09-19 22:58:22 +08:00
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-09-19 22:58:22 +08:00
|
|
|
return std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
2018-09-03 21:42:43 +08:00
|
|
|
frontend::CypherMainVisitor visitor(context, db_accessor);
|
2017-09-19 22:58:22 +08:00
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
return std::move(visitor.storage());
|
|
|
|
}
|
|
|
|
auto ast_cache_accessor = ast_cache_.access();
|
|
|
|
auto ast_it = ast_cache_accessor.find(stripped.hash());
|
|
|
|
if (ast_it == ast_cache_accessor.end()) {
|
|
|
|
// stripped query -> AST
|
|
|
|
auto parser = [&] {
|
|
|
|
// Be careful about unlocking since parser can throw.
|
2018-05-30 19:00:25 +08:00
|
|
|
std::unique_lock<utils::SpinLock> guard(antlr_lock_);
|
2017-10-05 00:38:17 +08:00
|
|
|
try {
|
|
|
|
return std::make_unique<frontend::opencypher::Parser>(stripped.query());
|
|
|
|
} catch (const SyntaxException &e) {
|
|
|
|
// There is syntax exception in stripped query. Rerun parser with
|
|
|
|
// original query to get appropriate error messsage.
|
|
|
|
auto parser = std::make_unique<frontend::opencypher::Parser>(
|
|
|
|
stripped.original_query());
|
|
|
|
// If exception was not thrown here, it means StrippedQuery messed up
|
|
|
|
// something.
|
|
|
|
LOG(FATAL) << "Stripped query can't be parsed, original can";
|
|
|
|
return parser;
|
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
}();
|
|
|
|
auto low_level_tree = parser->tree();
|
|
|
|
// AST -> high level tree
|
2018-09-03 21:42:43 +08:00
|
|
|
frontend::CypherMainVisitor visitor(context, db_accessor);
|
2017-09-19 22:58:22 +08:00
|
|
|
visitor.visit(low_level_tree);
|
|
|
|
// Cache it.
|
|
|
|
ast_it =
|
|
|
|
ast_cache_accessor.insert(stripped.hash(), std::move(visitor.storage()))
|
|
|
|
.first;
|
|
|
|
}
|
2018-05-22 22:45:52 +08:00
|
|
|
AstStorage new_ast;
|
2017-09-19 22:58:22 +08:00
|
|
|
ast_it->second.query()->Clone(new_ast);
|
|
|
|
return new_ast;
|
|
|
|
}
|
|
|
|
|
2018-08-24 16:12:04 +08:00
|
|
|
class SingleNodeLogicalPlan final : public LogicalPlan {
|
|
|
|
public:
|
|
|
|
SingleNodeLogicalPlan(std::unique_ptr<plan::LogicalOperator> root,
|
|
|
|
double cost, AstStorage storage,
|
|
|
|
const SymbolTable &symbol_table)
|
|
|
|
: root_(std::move(root)),
|
|
|
|
cost_(cost),
|
|
|
|
storage_(std::move(storage)),
|
|
|
|
symbol_table_(symbol_table) {}
|
|
|
|
|
|
|
|
const plan::LogicalOperator &GetRoot() const override { return *root_; }
|
|
|
|
double GetCost() const override { return cost_; }
|
|
|
|
const SymbolTable &GetSymbolTable() const override { return symbol_table_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::unique_ptr<plan::LogicalOperator> root_;
|
|
|
|
double cost_;
|
|
|
|
AstStorage storage_;
|
|
|
|
SymbolTable symbol_table_;
|
2017-09-19 22:58:22 +08:00
|
|
|
};
|
2018-08-24 16:12:04 +08:00
|
|
|
|
|
|
|
std::unique_ptr<LogicalPlan> Interpreter::MakeLogicalPlan(
|
|
|
|
AstStorage ast_storage, Context *context) {
|
|
|
|
auto vertex_counts = plan::MakeVertexCountCache(context->db_accessor_);
|
|
|
|
auto planning_context = plan::MakePlanningContext(
|
|
|
|
ast_storage, context->symbol_table_, vertex_counts);
|
|
|
|
std::unique_ptr<plan::LogicalOperator> root;
|
|
|
|
double cost;
|
|
|
|
std::tie(root, cost) = plan::MakeLogicalPlan(
|
2018-09-11 19:32:25 +08:00
|
|
|
planning_context, context->evaluation_context_.parameters,
|
|
|
|
FLAGS_query_cost_planner);
|
2018-08-24 16:12:04 +08:00
|
|
|
return std::make_unique<SingleNodeLogicalPlan>(
|
|
|
|
std::move(root), cost, std::move(ast_storage), context->symbol_table_);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
} // namespace query
|