2017-03-22 23:38:43 +08:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <ctime>
|
2017-06-05 18:44:18 +08:00
|
|
|
#include <limits>
|
2017-03-22 23:38:43 +08:00
|
|
|
|
2017-06-21 17:29:13 +08:00
|
|
|
#include <gflags/gflags.h>
|
|
|
|
#include <glog/logging.h>
|
|
|
|
|
2017-03-22 23:38:43 +08:00
|
|
|
#include "database/graph_db_accessor.hpp"
|
|
|
|
#include "query/context.hpp"
|
2017-07-20 00:14:59 +08:00
|
|
|
#include "query/exceptions.hpp"
|
2017-03-22 23:38:43 +08:00
|
|
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
|
|
|
#include "query/frontend/opencypher/parser.hpp"
|
|
|
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
2017-06-15 00:53:02 +08:00
|
|
|
#include "query/frontend/stripped.hpp"
|
2017-04-13 16:01:16 +08:00
|
|
|
#include "query/interpret/frame.hpp"
|
2017-09-19 22:58:22 +08:00
|
|
|
#include "query/plan/operator.hpp"
|
2017-08-04 20:12:06 +08:00
|
|
|
#include "threading/sync/spinlock.hpp"
|
2017-07-15 01:33:45 +08:00
|
|
|
#include "utils/timer.hpp"
|
2017-03-22 23:38:43 +08:00
|
|
|
|
2017-06-16 15:22:30 +08:00
|
|
|
DECLARE_bool(query_cost_planner);
|
2017-09-19 22:58:22 +08:00
|
|
|
DECLARE_bool(query_plan_cache);
|
Flags cleanup and QueryEngine removal
Summary:
I started with cleaning flags up (removing unused ones, documenting undocumented ones). There were some flags to remove in `QueryEngine`. Seeing how we never use hardcoded queries (AFAIK last Mislav's testing also indicated they aren't faster then interpretation), when removing those unused flags the `QueryEngine` becomes obsolete. That means that a bunch of other stuff becomes obsolete, along with the hardcoded queries. So I removed it all (this has been discussed and approved on the daily).
Some flags that were previously undocumented in `docs/user_technical/installation` are now documented. The following flags are NOT documented and in my opinion should not be displayed when starting `./memgraph --help` (@mferencevic):
```
query_vertex_count_to_expand_existsing (from rule_based_planner.cpp)
query_max_plans (rule_based_planner.cpp)
```
If you think that another organization is needed w.r.t. flag visibility, comment.
@teon.banek: I had to remove some stuff from CMakeLists to make it buildable. Please review what I removed and clean up if necessary if/when this lands. If the needed changes are minor, you can also comment.
Reviewers: buda, mislav.bradac, teon.banek, mferencevic
Reviewed By: buda, mislav.bradac
Subscribers: pullbot, mferencevic, teon.banek
Differential Revision: https://phabricator.memgraph.io/D825
2017-09-22 22:17:09 +08:00
|
|
|
DECLARE_int32(query_plan_cache_ttl);
|
2017-06-15 00:53:02 +08:00
|
|
|
|
2017-03-22 23:38:43 +08:00
|
|
|
namespace query {
|
|
|
|
|
2017-06-21 17:29:13 +08:00
|
|
|
class Interpreter {
|
2017-09-19 22:58:22 +08:00
|
|
|
private:
|
|
|
|
class CachedPlan {
|
|
|
|
public:
|
|
|
|
CachedPlan(std::unique_ptr<plan::LogicalOperator> plan, double cost,
|
|
|
|
SymbolTable symbol_table, AstTreeStorage storage)
|
|
|
|
: plan_(std::move(plan)),
|
|
|
|
cost_(cost),
|
|
|
|
symbol_table_(symbol_table),
|
|
|
|
ast_storage_(std::move(storage)) {}
|
|
|
|
|
|
|
|
const auto &plan() const { return *plan_; }
|
|
|
|
double cost() const { return cost_; }
|
|
|
|
const auto &symbol_table() const { return symbol_table_; }
|
|
|
|
|
|
|
|
bool IsExpired() const {
|
|
|
|
auto elapsed = cache_timer_.Elapsed();
|
|
|
|
return std::chrono::duration_cast<std::chrono::seconds>(elapsed) >
|
Flags cleanup and QueryEngine removal
Summary:
I started with cleaning flags up (removing unused ones, documenting undocumented ones). There were some flags to remove in `QueryEngine`. Seeing how we never use hardcoded queries (AFAIK last Mislav's testing also indicated they aren't faster then interpretation), when removing those unused flags the `QueryEngine` becomes obsolete. That means that a bunch of other stuff becomes obsolete, along with the hardcoded queries. So I removed it all (this has been discussed and approved on the daily).
Some flags that were previously undocumented in `docs/user_technical/installation` are now documented. The following flags are NOT documented and in my opinion should not be displayed when starting `./memgraph --help` (@mferencevic):
```
query_vertex_count_to_expand_existsing (from rule_based_planner.cpp)
query_max_plans (rule_based_planner.cpp)
```
If you think that another organization is needed w.r.t. flag visibility, comment.
@teon.banek: I had to remove some stuff from CMakeLists to make it buildable. Please review what I removed and clean up if necessary if/when this lands. If the needed changes are minor, you can also comment.
Reviewers: buda, mislav.bradac, teon.banek, mferencevic
Reviewed By: buda, mislav.bradac
Subscribers: pullbot, mferencevic, teon.banek
Differential Revision: https://phabricator.memgraph.io/D825
2017-09-22 22:17:09 +08:00
|
|
|
std::chrono::seconds(FLAGS_query_plan_cache_ttl);
|
2017-09-19 22:58:22 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::unique_ptr<plan::LogicalOperator> plan_;
|
|
|
|
double cost_;
|
|
|
|
SymbolTable symbol_table_;
|
|
|
|
AstTreeStorage ast_storage_;
|
|
|
|
utils::Timer cache_timer_;
|
|
|
|
};
|
|
|
|
|
2017-06-08 00:28:31 +08:00
|
|
|
public:
|
2017-06-21 17:29:13 +08:00
|
|
|
Interpreter() {}
|
2017-09-19 22:58:22 +08:00
|
|
|
|
2017-06-08 00:28:31 +08:00
|
|
|
template <typename Stream>
|
|
|
|
void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
|
2017-07-20 00:14:59 +08:00
|
|
|
Stream &stream,
|
|
|
|
const std::map<std::string, TypedValue> ¶ms) {
|
2017-07-15 01:33:45 +08:00
|
|
|
utils::Timer frontend_timer;
|
2017-09-13 16:27:12 +08:00
|
|
|
Context ctx(db_accessor);
|
2017-09-26 19:37:38 +08:00
|
|
|
ctx.is_query_cached_ = true;
|
2017-06-08 00:28:31 +08:00
|
|
|
std::map<std::string, TypedValue> summary;
|
|
|
|
|
2017-09-13 22:48:46 +08:00
|
|
|
// query -> stripped query
|
|
|
|
StrippedQuery stripped(query);
|
2017-06-08 00:28:31 +08:00
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
// Update context with provided parameters.
|
|
|
|
ctx.parameters_ = stripped.literals();
|
|
|
|
for (const auto ¶m_pair : stripped.parameters()) {
|
|
|
|
auto param_it = params.find(param_pair.second);
|
|
|
|
if (param_it == params.end()) {
|
|
|
|
throw query::UnprovidedParameterError(
|
|
|
|
fmt::format("Parameter$ {} not provided", param_pair.second));
|
2017-06-15 00:53:02 +08:00
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
ctx.parameters_.Add(param_pair.first, param_it->second);
|
|
|
|
}
|
2017-07-20 00:14:59 +08:00
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::shared_ptr<CachedPlan> cached_plan;
|
|
|
|
std::experimental::optional<AstTreeStorage> ast_storage;
|
|
|
|
// Check if we have a cached logical plan ready, so that we can skip the
|
|
|
|
// whole query -> AST -> logical_plan process.
|
|
|
|
auto plan_cache_accessor = plan_cache_.access();
|
|
|
|
auto plan_cache_it = plan_cache_accessor.find(stripped.hash());
|
|
|
|
if (plan_cache_it != plan_cache_accessor.end() &&
|
|
|
|
plan_cache_it->second->IsExpired()) {
|
|
|
|
// Remove the expired plan.
|
|
|
|
plan_cache_accessor.remove(stripped.hash());
|
|
|
|
plan_cache_it = plan_cache_accessor.end();
|
|
|
|
}
|
|
|
|
if (plan_cache_it == plan_cache_accessor.end()) {
|
|
|
|
// We didn't find a cached plan or it was expired.
|
|
|
|
// stripped query -> high level tree
|
|
|
|
ast_storage = QueryToAst(stripped, ctx);
|
|
|
|
} else {
|
|
|
|
cached_plan = plan_cache_it->second;
|
|
|
|
}
|
2017-07-20 00:14:59 +08:00
|
|
|
|
2017-07-15 01:33:45 +08:00
|
|
|
auto frontend_time = frontend_timer.Elapsed();
|
2017-06-08 00:28:31 +08:00
|
|
|
|
2017-07-15 01:33:45 +08:00
|
|
|
utils::Timer planning_timer;
|
2017-06-08 00:28:31 +08:00
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
auto fill_symbol_table = [](auto &ast_storage, auto &symbol_table) {
|
|
|
|
SymbolGenerator symbol_generator(symbol_table);
|
|
|
|
ast_storage.query()->Accept(symbol_generator);
|
|
|
|
};
|
|
|
|
|
|
|
|
// If the plan is not stored in the cache, `tmp_logical_plan` owns the newly
|
|
|
|
// generated plan. Otherwise, it is empty and `cached_plan` owns the plan.
|
|
|
|
// In all cases, `logical_plan` references the plan to be used.
|
|
|
|
std::unique_ptr<plan::LogicalOperator> tmp_logical_plan;
|
|
|
|
const plan::LogicalOperator *logical_plan = nullptr;
|
2017-06-08 00:28:31 +08:00
|
|
|
double query_plan_cost_estimation = 0.0;
|
2017-09-19 22:58:22 +08:00
|
|
|
if (FLAGS_query_plan_cache) {
|
|
|
|
if (!cached_plan) {
|
|
|
|
debug_assert(ast_storage, "AST is required to generate a plan");
|
|
|
|
fill_symbol_table(*ast_storage, ctx.symbol_table_);
|
|
|
|
std::tie(tmp_logical_plan, query_plan_cost_estimation) =
|
|
|
|
MakeLogicalPlan(*ast_storage, db_accessor, ctx);
|
|
|
|
// Cache the generated plan.
|
|
|
|
auto plan_cache_accessor = plan_cache_.access();
|
|
|
|
auto plan_cache_it =
|
|
|
|
plan_cache_accessor
|
|
|
|
.insert(
|
|
|
|
stripped.hash(),
|
|
|
|
std::make_shared<CachedPlan>(
|
|
|
|
std::move(tmp_logical_plan), query_plan_cost_estimation,
|
|
|
|
ctx.symbol_table_, std::move(*ast_storage)))
|
|
|
|
.first;
|
|
|
|
cached_plan = plan_cache_it->second;
|
2017-06-05 18:44:18 +08:00
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
query_plan_cost_estimation = cached_plan->cost();
|
|
|
|
ctx.symbol_table_ = cached_plan->symbol_table();
|
|
|
|
logical_plan = &cached_plan->plan();
|
2017-06-08 00:28:31 +08:00
|
|
|
} else {
|
2017-09-19 22:58:22 +08:00
|
|
|
debug_assert(ast_storage, "Without plan caching, AST must be generated.");
|
|
|
|
fill_symbol_table(*ast_storage, ctx.symbol_table_);
|
|
|
|
std::tie(tmp_logical_plan, query_plan_cost_estimation) =
|
|
|
|
MakeLogicalPlan(*ast_storage, db_accessor, ctx);
|
|
|
|
logical_plan = tmp_logical_plan.get();
|
2017-06-05 18:44:18 +08:00
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
// Below this point, ast_storage should not be used. Other than not allowing
|
|
|
|
// modifications, the ast_storage may have moved to a cache.
|
2017-05-23 17:28:41 +08:00
|
|
|
|
2017-06-08 00:28:31 +08:00
|
|
|
// generate frame based on symbol table max_position
|
2017-09-13 16:27:12 +08:00
|
|
|
Frame frame(ctx.symbol_table_.max_position());
|
2017-07-15 01:33:45 +08:00
|
|
|
auto planning_time = planning_timer.Elapsed();
|
2017-06-08 00:28:31 +08:00
|
|
|
|
2017-07-15 01:33:45 +08:00
|
|
|
utils::Timer execution_timer;
|
2017-06-08 00:28:31 +08:00
|
|
|
std::vector<std::string> header;
|
|
|
|
std::vector<Symbol> output_symbols(
|
2017-09-13 16:27:12 +08:00
|
|
|
logical_plan->OutputSymbols(ctx.symbol_table_));
|
2017-06-08 00:28:31 +08:00
|
|
|
if (!output_symbols.empty()) {
|
|
|
|
// Since we have output symbols, this means that the query contains RETURN
|
|
|
|
// clause, so stream out the results.
|
|
|
|
|
|
|
|
// generate header
|
2017-09-13 22:48:46 +08:00
|
|
|
for (const auto &symbol : output_symbols) {
|
|
|
|
// When the symbol is aliased or expanded from '*' (inside RETURN or
|
|
|
|
// WITH), then there is no token position, so use symbol name.
|
|
|
|
// Otherwise, find the name from stripped query.
|
2017-09-26 18:51:52 +08:00
|
|
|
header.push_back(utils::FindOr(stripped.named_expressions(),
|
|
|
|
symbol.token_position(), symbol.name())
|
2017-09-20 16:45:27 +08:00
|
|
|
.first);
|
2017-09-13 22:48:46 +08:00
|
|
|
}
|
2017-06-08 00:28:31 +08:00
|
|
|
stream.Header(header);
|
|
|
|
|
|
|
|
// stream out results
|
|
|
|
auto cursor = logical_plan->MakeCursor(db_accessor);
|
2017-09-13 16:27:12 +08:00
|
|
|
while (cursor->Pull(frame, ctx)) {
|
2017-06-08 00:28:31 +08:00
|
|
|
std::vector<TypedValue> values;
|
|
|
|
for (const auto &symbol : output_symbols)
|
|
|
|
values.emplace_back(frame[symbol]);
|
|
|
|
stream.Result(values);
|
|
|
|
}
|
2017-09-19 22:58:22 +08:00
|
|
|
} else if (dynamic_cast<const plan::CreateNode *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::CreateExpand *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::SetProperty *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::SetProperties *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::SetLabels *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::RemoveProperty *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::RemoveLabels *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::Delete *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::Merge *>(logical_plan) ||
|
|
|
|
dynamic_cast<const plan::CreateIndex *>(logical_plan)) {
|
2017-06-08 00:28:31 +08:00
|
|
|
stream.Header(header);
|
|
|
|
auto cursor = logical_plan->MakeCursor(db_accessor);
|
2017-09-13 16:27:12 +08:00
|
|
|
while (cursor->Pull(frame, ctx)) continue;
|
2017-06-08 00:28:31 +08:00
|
|
|
} else {
|
|
|
|
throw QueryRuntimeException("Unknown top level LogicalOperator");
|
2017-03-22 23:38:43 +08:00
|
|
|
}
|
2017-07-15 01:33:45 +08:00
|
|
|
auto execution_time = execution_timer.Elapsed();
|
2017-06-08 00:28:31 +08:00
|
|
|
|
2017-09-04 20:16:12 +08:00
|
|
|
summary["parsing_time"] = frontend_time.count();
|
|
|
|
summary["planning_time"] = planning_time.count();
|
|
|
|
summary["plan_execution_time"] = execution_time.count();
|
|
|
|
summary["cost_estimate"] = query_plan_cost_estimation;
|
2017-06-08 00:28:31 +08:00
|
|
|
|
|
|
|
// TODO: set summary['type'] based on transaction metadata
|
|
|
|
// the type can't be determined based only on top level LogicalOp
|
|
|
|
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
|
|
|
// for now always use "rw" because something must be set, but it doesn't
|
|
|
|
// have to be correct (for Bolt clients)
|
|
|
|
summary["type"] = "rw";
|
|
|
|
stream.Summary(summary);
|
2017-09-06 21:54:21 +08:00
|
|
|
DLOG(INFO) << "Executed '" << query << "', params: " << params
|
|
|
|
<< ", summary: " << summary;
|
2017-03-22 23:38:43 +08:00
|
|
|
}
|
|
|
|
|
2017-06-08 00:28:31 +08:00
|
|
|
private:
|
2017-09-19 22:58:22 +08:00
|
|
|
// stripped query -> high level tree
|
|
|
|
AstTreeStorage QueryToAst(const StrippedQuery &stripped, Context &ctx);
|
|
|
|
|
|
|
|
// high level tree -> (logical plan, plan cost)
|
|
|
|
// AstTreeStorage and SymbolTable may be modified during planning.
|
|
|
|
std::pair<std::unique_ptr<plan::LogicalOperator>, double> MakeLogicalPlan(
|
|
|
|
AstTreeStorage &, const GraphDbAccessor &, Context &);
|
|
|
|
|
2017-09-13 22:48:46 +08:00
|
|
|
ConcurrentMap<HashType, AstTreeStorage> ast_cache_;
|
2017-09-19 22:58:22 +08:00
|
|
|
ConcurrentMap<HashType, std::shared_ptr<CachedPlan>> plan_cache_;
|
2017-08-03 19:28:50 +08:00
|
|
|
// Antlr has singleton instance that is shared between threads. It is
|
|
|
|
// protected by locks inside of antlr. Unfortunately, they are not protected
|
|
|
|
// in a very good way. Once we have antlr version without race conditions we
|
|
|
|
// can remove this lock. This will probably never happen since antlr
|
|
|
|
// developers introduce more bugs in each version. Fortunately, we have cache
|
|
|
|
// so this lock probably won't impact performance much...
|
|
|
|
SpinLock antlr_lock_;
|
2017-06-08 00:28:31 +08:00
|
|
|
};
|
2017-04-26 22:12:39 +08:00
|
|
|
|
|
|
|
} // namespace query
|