Refactor query parsing in the Interpreter

Summary: Depends on D2482

Reviewers: teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2499
This commit is contained in:
Lovro Lugovic 2019-10-18 17:12:48 +02:00
parent b0cc564f8d
commit bd998dc618
3 changed files with 144 additions and 152 deletions
src/query
tests/benchmark/query

View File

@ -71,6 +71,99 @@ class DumpClosure final {
} // namespace
#endif
/**
* A container for data related to the parsing of a query.
*/
struct ParsedQuery {
std::string query_string;
std::map<std::string, PropertyValue> user_parameters;
Parameters parameters;
frontend::StrippedQuery stripped_query;
AstStorage ast_storage;
Query *query;
std::vector<AuthQuery::Privilege> required_privileges;
};
ParsedQuery ParseQuery(const std::string &query_string,
const std::map<std::string, PropertyValue> &params,
utils::SkipList<QueryCacheEntry> *cache,
utils::SpinLock *antlr_lock) {
// Strip the query for caching purposes. The process of stripping a query
// "normalizes" it by replacing any literals with new parameters . This
// results in just the *structure* of the query being taken into account for
// caching.
frontend::StrippedQuery stripped_query{query_string};
// Copy over the parameters that were introduced during stripping.
Parameters parameters{stripped_query.literals()};
// Check that all user-specified parameters are provided.
for (const auto &param_pair : stripped_query.parameters()) {
auto it = params.find(param_pair.second);
if (it == params.end()) {
throw query::UnprovidedParameterError("Parameter ${} not provided.",
param_pair.second);
}
parameters.Add(param_pair.first, it->second);
}
// Cache the query's AST if it isn't already.
auto hash = stripped_query.hash();
auto accessor = cache->access();
auto it = accessor.find(hash);
std::unique_ptr<frontend::opencypher::Parser> parser;
if (it == accessor.end()) {
{
std::unique_lock<utils::SpinLock> guard(*antlr_lock);
try {
parser = std::make_unique<frontend::opencypher::Parser>(
stripped_query.query());
} catch (const SyntaxException &e) {
// There is a syntax exception in the stripped query. Re-run the parser
// on the original query to get an appropriate error messsage.
parser = std::make_unique<frontend::opencypher::Parser>(query_string);
// If an exception was not thrown here, the stripper messed something
// up.
LOG(FATAL)
<< "The stripped query can't be parsed, but the original can.";
}
}
// Convert the ANTLR4 parse tree into an AST.
AstStorage ast_storage;
frontend::ParsingContext context{true};
frontend::CypherMainVisitor visitor(context, &ast_storage);
visitor.visit(parser->tree());
CachedQuery cached_query{std::move(ast_storage), visitor.query(),
query::GetRequiredPrivileges(visitor.query())};
it = accessor.insert({hash, std::move(cached_query)}).first;
}
// Return a copy of both the AST storage and the query.
AstStorage ast_storage;
ast_storage.properties_ = it->second.ast_storage.properties_;
ast_storage.labels_ = it->second.ast_storage.labels_;
ast_storage.edge_types_ = it->second.ast_storage.edge_types_;
Query *query = it->second.query->Clone(&ast_storage);
return ParsedQuery{query_string,
params,
std::move(parameters),
std::move(stripped_query),
std::move(ast_storage),
query,
it->second.required_privileges};
}
class SingleNodeLogicalPlan final : public LogicalPlan {
public:
SingleNodeLogicalPlan(std::unique_ptr<plan::LogicalOperator> root,
@ -911,17 +1004,13 @@ Interpreter::Results Interpreter::Prepare(
const std::string &query_string,
const std::map<std::string, PropertyValue> &params,
DbAccessor *db_accessor) {
AstStorage ast_storage;
Parameters parameters;
std::map<std::string, TypedValue> summary;
utils::Timer parsing_timer;
auto queries =
StripAndParseQuery(query_string, &parameters, &ast_storage, params);
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
ParsedQuery parsed_query =
ParseQuery(query_string, params, &interpreter_context_->ast_cache,
&interpreter_context_->antlr_lock);
auto parsing_time = parsing_timer.Elapsed();
summary["parsing_time"] = parsing_time.count();
// TODO: set summary['type'] based on transaction metadata
// the type can't be determined based only on top level LogicalOp
@ -948,8 +1037,9 @@ Interpreter::Results Interpreter::Prepare(
#endif
if (auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query)) {
plan = CypherQueryToPlan(stripped_query.hash(), cypher_query,
std::move(ast_storage), parameters, db_accessor);
plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query,
std::move(parsed_query.ast_storage),
parsed_query.parameters, db_accessor);
auto planning_time = planning_timer.Elapsed();
summary["planning_time"] = planning_time.count();
summary["cost_estimate"] = plan->cost();
@ -957,24 +1047,29 @@ Interpreter::Results Interpreter::Prepare(
auto output_symbols = plan->plan().OutputSymbols(plan->symbol_table());
std::vector<std::string> header;
header.reserve(output_symbols.size());
for (const auto &symbol : output_symbols) {
// When the symbol is aliased or expanded from '*' (inside RETURN or
// WITH), then there is no token position, so use symbol name.
// Otherwise, find the name from stripped query.
header.push_back(utils::FindOr(stripped_query.named_expressions(),
symbol.token_position(), symbol.name())
.first);
header.push_back(
utils::FindOr(parsed_query.stripped_query.named_expressions(),
symbol.token_position(), symbol.name())
.first);
}
return Results(db_accessor, parameters, plan, std::move(output_symbols),
std::move(header), std::move(summary),
parsed_query.required_privileges, &execution_memory_);
return Results(db_accessor, parsed_query.parameters, plan,
std::move(output_symbols), std::move(header),
std::move(summary), parsed_query.required_privileges,
&execution_memory_);
}
if (utils::IsSubtype(*parsed_query.query, ExplainQuery::kType)) {
const std::string kExplainQueryStart = "explain ";
CHECK(utils::StartsWith(utils::ToLowerCase(stripped_query.query()),
kExplainQueryStart))
CHECK(utils::StartsWith(
utils::ToLowerCase(parsed_query.stripped_query.query()),
kExplainQueryStart))
<< "Expected stripped query to start with '" << kExplainQueryStart
<< "'";
@ -999,17 +1094,16 @@ Interpreter::Results Interpreter::Prepare(
// totally different plan if we were to create a new one right now. Doing so
// would result in discrepancies between the explained (or profiled) plan
// and the one that's executed when the query is ran standalone.
auto queries =
StripAndParseQuery(query_string.substr(kExplainQueryStart.size()),
&parameters, &ast_storage, params);
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
ParsedQuery parsed_query = ParseQuery(
query_string.substr(kExplainQueryStart.size()), params,
&interpreter_context_->ast_cache, &interpreter_context_->antlr_lock);
auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
CHECK(cypher_query)
<< "Cypher grammar should not allow other queries in EXPLAIN";
std::shared_ptr<CachedPlan> cypher_query_plan =
CypherQueryToPlan(stripped_query.hash(), cypher_query,
std::move(ast_storage), parameters, db_accessor);
CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query,
std::move(parsed_query.ast_storage),
parsed_query.parameters, db_accessor);
std::stringstream printed_plan;
PrettyPrintPlan(*db_accessor, &cypher_query_plan->plan(), &printed_plan);
@ -1037,15 +1131,17 @@ Interpreter::Results Interpreter::Prepare(
std::vector<std::string> header{query_plan_symbol.name()};
return Results(db_accessor, parameters, plan, std::move(output_symbols),
std::move(header), std::move(summary),
parsed_query.required_privileges, &execution_memory_);
return Results(db_accessor, parsed_query.parameters, plan,
std::move(output_symbols), std::move(header),
std::move(summary), parsed_query.required_privileges,
&execution_memory_);
}
if (utils::IsSubtype(*parsed_query.query, ProfileQuery::kType)) {
const std::string kProfileQueryStart = "profile ";
CHECK(utils::StartsWith(utils::ToLowerCase(stripped_query.query()),
kProfileQueryStart))
CHECK(utils::StartsWith(
utils::ToLowerCase(parsed_query.stripped_query.query()),
kProfileQueryStart))
<< "Expected stripped query to start with '" << kProfileQueryStart
<< "'";
@ -1059,17 +1155,16 @@ Interpreter::Results Interpreter::Prepare(
// See the comment regarding the caching of Cypher queries within
// "metaqueries" for explain queries
auto queries =
StripAndParseQuery(query_string.substr(kProfileQueryStart.size()),
&parameters, &ast_storage, params);
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
ParsedQuery parsed_query = ParseQuery(
query_string.substr(kProfileQueryStart.size()), params,
&interpreter_context_->ast_cache, &interpreter_context_->antlr_lock);
auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
CHECK(cypher_query)
<< "Cypher grammar should not allow other queries in PROFILE";
auto cypher_query_plan =
CypherQueryToPlan(stripped_query.hash(), cypher_query,
std::move(ast_storage), parameters, db_accessor);
CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query,
std::move(parsed_query.ast_storage),
parsed_query.parameters, db_accessor);
// Copy the symbol table and add our own symbols (used by the `OutputTable`
// operator below)
@ -1119,9 +1214,10 @@ Interpreter::Results Interpreter::Prepare(
auto planning_time = planning_timer.Elapsed();
summary["planning_time"] = planning_time.count();
return Results(db_accessor, parameters, plan, std::move(output_symbols),
std::move(header), std::move(summary),
parsed_query.required_privileges, &execution_memory_,
return Results(db_accessor, parsed_query.parameters, plan,
std::move(output_symbols), std::move(header),
std::move(summary), parsed_query.required_privileges,
&execution_memory_,
/* is_profile_query */ true, /* should_abort_query */ true);
}
@ -1140,9 +1236,10 @@ Interpreter::Results Interpreter::Prepare(
summary["planning_time"] = planning_timer.Elapsed().count();
return Results(db_accessor, parameters, plan, std::move(output_symbols),
std::move(header), std::move(summary),
parsed_query.required_privileges, &execution_memory_,
return Results(db_accessor, parsed_query.parameters, plan,
std::move(output_symbols), std::move(header),
std::move(summary), parsed_query.required_privileges,
&execution_memory_,
/* is_profile_query */ false,
/* should_abort_query */ false);
#else
@ -1176,7 +1273,7 @@ Interpreter::Results Interpreter::Prepare(
throw UserModificationInMulticommandTxException();
}
callback = HandleAuthQuery(auth_query, interpreter_context_->auth,
parameters, db_accessor);
parsed_query.parameters, db_accessor);
#endif
} else if (auto *stream_query =
utils::Downcast<StreamQuery>(parsed_query.query)) {
@ -1189,7 +1286,7 @@ Interpreter::Results Interpreter::Prepare(
}
callback =
HandleStreamQuery(stream_query, interpreter_context_->kafka_streams,
parameters, db_accessor);
parsed_query.parameters, db_accessor);
#endif
} else if (auto *info_query =
utils::Downcast<InfoQuery>(parsed_query.query)) {
@ -1217,8 +1314,8 @@ Interpreter::Results Interpreter::Prepare(
summary["planning_time"] = planning_time.count();
summary["cost_estimate"] = 0.0;
return Results(db_accessor, parameters, plan, std::move(output_symbols),
callback.header, std::move(summary),
return Results(db_accessor, parsed_query.parameters, plan,
std::move(output_symbols), callback.header, std::move(summary),
parsed_query.required_privileges, &execution_memory_,
/* is_profile_query */ false, callback.should_abort_query);
}
@ -1295,89 +1392,6 @@ std::shared_ptr<CachedPlan> Interpreter::CypherQueryToPlan(
.first->second;
}
Interpreter::ParsedQuery Interpreter::ParseQuery(
const std::string &stripped_query, const std::string &original_query,
const frontend::ParsingContext &context, AstStorage *ast_storage) {
if (!context.is_query_cached) {
// Parse original query into antlr4 AST.
auto parser = [&] {
// Be careful about unlocking since parser can throw.
std::unique_lock<utils::SpinLock> guard(interpreter_context_->antlr_lock);
return std::make_unique<frontend::opencypher::Parser>(original_query);
}();
// Convert antlr4 AST into Memgraph AST.
frontend::CypherMainVisitor visitor(context, ast_storage);
visitor.visit(parser->tree());
return ParsedQuery{visitor.query(),
query::GetRequiredPrivileges(visitor.query())};
}
auto stripped_query_hash = fnv(stripped_query);
auto ast_cache_accessor = interpreter_context_->ast_cache.access();
auto ast_it = ast_cache_accessor.find(stripped_query_hash);
if (ast_it == ast_cache_accessor.end()) {
// Parse stripped query into antlr4 AST.
auto parser = [&] {
// Be careful about unlocking since parser can throw.
std::unique_lock<utils::SpinLock> guard(interpreter_context_->antlr_lock);
try {
return std::make_unique<frontend::opencypher::Parser>(stripped_query);
} catch (const SyntaxException &e) {
// There is syntax exception in stripped query. Rerun parser on
// the original query to get appropriate error messsage.
auto parser =
std::make_unique<frontend::opencypher::Parser>(original_query);
// If exception was not thrown here, StrippedQuery messed
// something up.
LOG(FATAL) << "Stripped query can't be parsed, but the original can.";
return parser;
}
}();
// Convert antlr4 AST into Memgraph AST.
AstStorage cached_ast_storage;
frontend::CypherMainVisitor visitor(context, &cached_ast_storage);
visitor.visit(parser->tree());
CachedQuery cached_query{std::move(cached_ast_storage), visitor.query(),
query::GetRequiredPrivileges(visitor.query())};
// Cache it.
ast_it = ast_cache_accessor
.insert({stripped_query_hash, std::move(cached_query)})
.first;
}
ast_storage->properties_ = ast_it->second.ast_storage.properties_;
ast_storage->labels_ = ast_it->second.ast_storage.labels_;
ast_storage->edge_types_ = ast_it->second.ast_storage.edge_types_;
return ParsedQuery{ast_it->second.query->Clone(ast_storage),
ast_it->second.required_privileges};
}
std::pair<frontend::StrippedQuery, Interpreter::ParsedQuery>
Interpreter::StripAndParseQuery(
const std::string &query_string, Parameters *parameters,
AstStorage *ast_storage,
const std::map<std::string, PropertyValue> &params) {
frontend::StrippedQuery stripped_query(query_string);
*parameters = stripped_query.literals();
for (const auto &param_pair : stripped_query.parameters()) {
auto param_it = params.find(param_pair.second);
if (param_it == params.end()) {
throw query::UnprovidedParameterError("Parameter ${} not provided.",
param_pair.second);
}
parameters->Add(param_pair.first, param_it->second);
}
frontend::ParsingContext parsing_context;
parsing_context.is_query_cached = true;
auto parsed_query = ParseQuery(stripped_query.query(), query_string,
parsing_context, ast_storage);
return {std::move(stripped_query), std::move(parsed_query)};
}
std::unique_ptr<LogicalPlan> Interpreter::MakeLogicalPlan(
CypherQuery *query, AstStorage ast_storage, const Parameters &parameters,
DbAccessor *db_accessor) {

View File

@ -143,15 +143,6 @@ struct InterpreterContext {
class Interpreter {
public:
/**
* Wraps a `Query` that was created as a result of parsing a query string
* along with its privileges.
*/
struct ParsedQuery {
Query *query;
std::vector<AuthQuery::Privilege> required_privileges;
};
/**
* Encapsulates all what's necessary for the interpretation of a query
* into a single object that can be pulled (into the given Stream).
@ -336,10 +327,6 @@ class Interpreter {
void Abort();
protected:
std::pair<frontend::StrippedQuery, ParsedQuery> StripAndParseQuery(
const std::string &, Parameters *, AstStorage *ast_storage,
const std::map<std::string, PropertyValue> &);
// high level tree -> logical plan
// AstStorage and SymbolTable may be modified during planning. The created
// LogicalPlan must take ownership of AstStorage and SymbolTable.
@ -382,12 +369,6 @@ class Interpreter {
AstStorage ast_storage,
const Parameters &parameters,
DbAccessor *db_accessor);
// stripped query -> high level tree
ParsedQuery ParseQuery(const std::string &stripped_query,
const std::string &original_query,
const frontend::ParsingContext &context,
AstStorage *ast_storage);
};
} // namespace query

View File

@ -90,10 +90,7 @@ static query::CypherQuery *ParseCypherQuery(const std::string &query_string,
// Convert antlr4 AST into Memgraph AST.
query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast);
cypher_visitor.visit(parser.tree());
query::Interpreter::ParsedQuery parsed_query{
cypher_visitor.query(),
query::GetRequiredPrivileges(cypher_visitor.query())};
return utils::Downcast<query::CypherQuery>(parsed_query.query);
return utils::Downcast<query::CypherQuery>(cypher_visitor.query());
};
template <class TMemory>