From bd998dc61813b435e356a11e211263cf94612949 Mon Sep 17 00:00:00 2001 From: Lovro Lugovic <lovro.lugovic@memgraph.io> Date: Fri, 18 Oct 2019 17:12:48 +0200 Subject: [PATCH] Refactor query parsing in the `Interpreter` Summary: Depends on D2482 Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2499 --- src/query/interpreter.cpp | 272 +++++++++++++++------------- src/query/interpreter.hpp | 19 -- tests/benchmark/query/execution.cpp | 5 +- 3 files changed, 144 insertions(+), 152 deletions(-) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 7085efac8..3315c406c 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -71,6 +71,99 @@ class DumpClosure final { } // namespace #endif +/** + * A container for data related to the parsing of a query. + */ +struct ParsedQuery { + std::string query_string; + std::map<std::string, PropertyValue> user_parameters; + Parameters parameters; + frontend::StrippedQuery stripped_query; + AstStorage ast_storage; + Query *query; + std::vector<AuthQuery::Privilege> required_privileges; +}; + +ParsedQuery ParseQuery(const std::string &query_string, + const std::map<std::string, PropertyValue> ¶ms, + utils::SkipList<QueryCacheEntry> *cache, + utils::SpinLock *antlr_lock) { + // Strip the query for caching purposes. The process of stripping a query + // "normalizes" it by replacing any literals with new parameters . This + // results in just the *structure* of the query being taken into account for + // caching. + frontend::StrippedQuery stripped_query{query_string}; + + // Copy over the parameters that were introduced during stripping. + Parameters parameters{stripped_query.literals()}; + + // Check that all user-specified parameters are provided. + for (const auto ¶m_pair : stripped_query.parameters()) { + auto it = params.find(param_pair.second); + + if (it == params.end()) { + throw query::UnprovidedParameterError("Parameter ${} not provided.", + param_pair.second); + } + + parameters.Add(param_pair.first, it->second); + } + + // Cache the query's AST if it isn't already. + auto hash = stripped_query.hash(); + auto accessor = cache->access(); + auto it = accessor.find(hash); + std::unique_ptr<frontend::opencypher::Parser> parser; + + if (it == accessor.end()) { + { + std::unique_lock<utils::SpinLock> guard(*antlr_lock); + + try { + parser = std::make_unique<frontend::opencypher::Parser>( + stripped_query.query()); + } catch (const SyntaxException &e) { + // There is a syntax exception in the stripped query. Re-run the parser + // on the original query to get an appropriate error messsage. + parser = std::make_unique<frontend::opencypher::Parser>(query_string); + + // If an exception was not thrown here, the stripper messed something + // up. + LOG(FATAL) + << "The stripped query can't be parsed, but the original can."; + } + } + + // Convert the ANTLR4 parse tree into an AST. + AstStorage ast_storage; + frontend::ParsingContext context{true}; + frontend::CypherMainVisitor visitor(context, &ast_storage); + + visitor.visit(parser->tree()); + + CachedQuery cached_query{std::move(ast_storage), visitor.query(), + query::GetRequiredPrivileges(visitor.query())}; + + it = accessor.insert({hash, std::move(cached_query)}).first; + } + + // Return a copy of both the AST storage and the query. + AstStorage ast_storage; + ast_storage.properties_ = it->second.ast_storage.properties_; + ast_storage.labels_ = it->second.ast_storage.labels_; + ast_storage.edge_types_ = it->second.ast_storage.edge_types_; + + Query *query = it->second.query->Clone(&ast_storage); + + return ParsedQuery{query_string, + params, + std::move(parameters), + std::move(stripped_query), + std::move(ast_storage), + query, + it->second.required_privileges}; +} + class SingleNodeLogicalPlan final : public LogicalPlan { public: SingleNodeLogicalPlan(std::unique_ptr<plan::LogicalOperator> root, @@ -911,17 +1004,13 @@ Interpreter::Results Interpreter::Prepare( const std::string &query_string, const std::map<std::string, PropertyValue> ¶ms, DbAccessor *db_accessor) { - AstStorage ast_storage; - Parameters parameters; std::map<std::string, TypedValue> summary; utils::Timer parsing_timer; - auto queries = - StripAndParseQuery(query_string, ¶meters, &ast_storage, params); - frontend::StrippedQuery &stripped_query = queries.first; - ParsedQuery &parsed_query = queries.second; + ParsedQuery parsed_query = + ParseQuery(query_string, params, &interpreter_context_->ast_cache, + &interpreter_context_->antlr_lock); auto parsing_time = parsing_timer.Elapsed(); - summary["parsing_time"] = parsing_time.count(); // TODO: set summary['type'] based on transaction metadata // the type can't be determined based only on top level LogicalOp @@ -948,8 +1037,9 @@ Interpreter::Results Interpreter::Prepare( #endif if (auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query)) { - plan = CypherQueryToPlan(stripped_query.hash(), cypher_query, - std::move(ast_storage), parameters, db_accessor); + plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query, + std::move(parsed_query.ast_storage), + parsed_query.parameters, db_accessor); auto planning_time = planning_timer.Elapsed(); summary["planning_time"] = planning_time.count(); summary["cost_estimate"] = plan->cost(); @@ -957,24 +1047,29 @@ Interpreter::Results Interpreter::Prepare( auto output_symbols = plan->plan().OutputSymbols(plan->symbol_table()); std::vector<std::string> header; + header.reserve(output_symbols.size()); + for (const auto &symbol : output_symbols) { // When the symbol is aliased or expanded from '*' (inside RETURN or // WITH), then there is no token position, so use symbol name. // Otherwise, find the name from stripped query. - header.push_back(utils::FindOr(stripped_query.named_expressions(), - symbol.token_position(), symbol.name()) - .first); + header.push_back( + utils::FindOr(parsed_query.stripped_query.named_expressions(), + symbol.token_position(), symbol.name()) + .first); } - return Results(db_accessor, parameters, plan, std::move(output_symbols), - std::move(header), std::move(summary), - parsed_query.required_privileges, &execution_memory_); + return Results(db_accessor, parsed_query.parameters, plan, + std::move(output_symbols), std::move(header), + std::move(summary), parsed_query.required_privileges, + &execution_memory_); } if (utils::IsSubtype(*parsed_query.query, ExplainQuery::kType)) { const std::string kExplainQueryStart = "explain "; - CHECK(utils::StartsWith(utils::ToLowerCase(stripped_query.query()), - kExplainQueryStart)) + CHECK(utils::StartsWith( + utils::ToLowerCase(parsed_query.stripped_query.query()), + kExplainQueryStart)) << "Expected stripped query to start with '" << kExplainQueryStart << "'"; @@ -999,17 +1094,16 @@ Interpreter::Results Interpreter::Prepare( // totally different plan if we were to create a new one right now. Doing so // would result in discrepancies between the explained (or profiled) plan // and the one that's executed when the query is ran standalone. - auto queries = - StripAndParseQuery(query_string.substr(kExplainQueryStart.size()), - ¶meters, &ast_storage, params); - frontend::StrippedQuery &stripped_query = queries.first; - ParsedQuery &parsed_query = queries.second; + ParsedQuery parsed_query = ParseQuery( + query_string.substr(kExplainQueryStart.size()), params, + &interpreter_context_->ast_cache, &interpreter_context_->antlr_lock); auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query); CHECK(cypher_query) << "Cypher grammar should not allow other queries in EXPLAIN"; std::shared_ptr<CachedPlan> cypher_query_plan = - CypherQueryToPlan(stripped_query.hash(), cypher_query, - std::move(ast_storage), parameters, db_accessor); + CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query, + std::move(parsed_query.ast_storage), + parsed_query.parameters, db_accessor); std::stringstream printed_plan; PrettyPrintPlan(*db_accessor, &cypher_query_plan->plan(), &printed_plan); @@ -1037,15 +1131,17 @@ Interpreter::Results Interpreter::Prepare( std::vector<std::string> header{query_plan_symbol.name()}; - return Results(db_accessor, parameters, plan, std::move(output_symbols), - std::move(header), std::move(summary), - parsed_query.required_privileges, &execution_memory_); + return Results(db_accessor, parsed_query.parameters, plan, + std::move(output_symbols), std::move(header), + std::move(summary), parsed_query.required_privileges, + &execution_memory_); } if (utils::IsSubtype(*parsed_query.query, ProfileQuery::kType)) { const std::string kProfileQueryStart = "profile "; - CHECK(utils::StartsWith(utils::ToLowerCase(stripped_query.query()), - kProfileQueryStart)) + CHECK(utils::StartsWith( + utils::ToLowerCase(parsed_query.stripped_query.query()), + kProfileQueryStart)) << "Expected stripped query to start with '" << kProfileQueryStart << "'"; @@ -1059,17 +1155,16 @@ Interpreter::Results Interpreter::Prepare( // See the comment regarding the caching of Cypher queries within // "metaqueries" for explain queries - auto queries = - StripAndParseQuery(query_string.substr(kProfileQueryStart.size()), - ¶meters, &ast_storage, params); - frontend::StrippedQuery &stripped_query = queries.first; - ParsedQuery &parsed_query = queries.second; + ParsedQuery parsed_query = ParseQuery( + query_string.substr(kProfileQueryStart.size()), params, + &interpreter_context_->ast_cache, &interpreter_context_->antlr_lock); auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query); CHECK(cypher_query) << "Cypher grammar should not allow other queries in PROFILE"; auto cypher_query_plan = - CypherQueryToPlan(stripped_query.hash(), cypher_query, - std::move(ast_storage), parameters, db_accessor); + CypherQueryToPlan(parsed_query.stripped_query.hash(), cypher_query, + std::move(parsed_query.ast_storage), + parsed_query.parameters, db_accessor); // Copy the symbol table and add our own symbols (used by the `OutputTable` // operator below) @@ -1119,9 +1214,10 @@ Interpreter::Results Interpreter::Prepare( auto planning_time = planning_timer.Elapsed(); summary["planning_time"] = planning_time.count(); - return Results(db_accessor, parameters, plan, std::move(output_symbols), - std::move(header), std::move(summary), - parsed_query.required_privileges, &execution_memory_, + return Results(db_accessor, parsed_query.parameters, plan, + std::move(output_symbols), std::move(header), + std::move(summary), parsed_query.required_privileges, + &execution_memory_, /* is_profile_query */ true, /* should_abort_query */ true); } @@ -1140,9 +1236,10 @@ Interpreter::Results Interpreter::Prepare( summary["planning_time"] = planning_timer.Elapsed().count(); - return Results(db_accessor, parameters, plan, std::move(output_symbols), - std::move(header), std::move(summary), - parsed_query.required_privileges, &execution_memory_, + return Results(db_accessor, parsed_query.parameters, plan, + std::move(output_symbols), std::move(header), + std::move(summary), parsed_query.required_privileges, + &execution_memory_, /* is_profile_query */ false, /* should_abort_query */ false); #else @@ -1176,7 +1273,7 @@ Interpreter::Results Interpreter::Prepare( throw UserModificationInMulticommandTxException(); } callback = HandleAuthQuery(auth_query, interpreter_context_->auth, - parameters, db_accessor); + parsed_query.parameters, db_accessor); #endif } else if (auto *stream_query = utils::Downcast<StreamQuery>(parsed_query.query)) { @@ -1189,7 +1286,7 @@ Interpreter::Results Interpreter::Prepare( } callback = HandleStreamQuery(stream_query, interpreter_context_->kafka_streams, - parameters, db_accessor); + parsed_query.parameters, db_accessor); #endif } else if (auto *info_query = utils::Downcast<InfoQuery>(parsed_query.query)) { @@ -1217,8 +1314,8 @@ Interpreter::Results Interpreter::Prepare( summary["planning_time"] = planning_time.count(); summary["cost_estimate"] = 0.0; - return Results(db_accessor, parameters, plan, std::move(output_symbols), - callback.header, std::move(summary), + return Results(db_accessor, parsed_query.parameters, plan, + std::move(output_symbols), callback.header, std::move(summary), parsed_query.required_privileges, &execution_memory_, /* is_profile_query */ false, callback.should_abort_query); } @@ -1295,89 +1392,6 @@ std::shared_ptr<CachedPlan> Interpreter::CypherQueryToPlan( .first->second; } -Interpreter::ParsedQuery Interpreter::ParseQuery( - const std::string &stripped_query, const std::string &original_query, - const frontend::ParsingContext &context, AstStorage *ast_storage) { - if (!context.is_query_cached) { - // Parse original query into antlr4 AST. - auto parser = [&] { - // Be careful about unlocking since parser can throw. - std::unique_lock<utils::SpinLock> guard(interpreter_context_->antlr_lock); - return std::make_unique<frontend::opencypher::Parser>(original_query); - }(); - // Convert antlr4 AST into Memgraph AST. - frontend::CypherMainVisitor visitor(context, ast_storage); - visitor.visit(parser->tree()); - return ParsedQuery{visitor.query(), - query::GetRequiredPrivileges(visitor.query())}; - } - - auto stripped_query_hash = fnv(stripped_query); - - auto ast_cache_accessor = interpreter_context_->ast_cache.access(); - auto ast_it = ast_cache_accessor.find(stripped_query_hash); - if (ast_it == ast_cache_accessor.end()) { - // Parse stripped query into antlr4 AST. - auto parser = [&] { - // Be careful about unlocking since parser can throw. - std::unique_lock<utils::SpinLock> guard(interpreter_context_->antlr_lock); - try { - return std::make_unique<frontend::opencypher::Parser>(stripped_query); - } catch (const SyntaxException &e) { - // There is syntax exception in stripped query. Rerun parser on - // the original query to get appropriate error messsage. - auto parser = - std::make_unique<frontend::opencypher::Parser>(original_query); - // If exception was not thrown here, StrippedQuery messed - // something up. - LOG(FATAL) << "Stripped query can't be parsed, but the original can."; - return parser; - } - }(); - // Convert antlr4 AST into Memgraph AST. - AstStorage cached_ast_storage; - frontend::CypherMainVisitor visitor(context, &cached_ast_storage); - visitor.visit(parser->tree()); - CachedQuery cached_query{std::move(cached_ast_storage), visitor.query(), - query::GetRequiredPrivileges(visitor.query())}; - // Cache it. - ast_it = ast_cache_accessor - .insert({stripped_query_hash, std::move(cached_query)}) - .first; - } - ast_storage->properties_ = ast_it->second.ast_storage.properties_; - ast_storage->labels_ = ast_it->second.ast_storage.labels_; - ast_storage->edge_types_ = ast_it->second.ast_storage.edge_types_; - return ParsedQuery{ast_it->second.query->Clone(ast_storage), - ast_it->second.required_privileges}; -} - -std::pair<frontend::StrippedQuery, Interpreter::ParsedQuery> -Interpreter::StripAndParseQuery( - const std::string &query_string, Parameters *parameters, - AstStorage *ast_storage, - const std::map<std::string, PropertyValue> ¶ms) { - frontend::StrippedQuery stripped_query(query_string); - - *parameters = stripped_query.literals(); - for (const auto ¶m_pair : stripped_query.parameters()) { - auto param_it = params.find(param_pair.second); - if (param_it == params.end()) { - throw query::UnprovidedParameterError("Parameter ${} not provided.", - param_pair.second); - } - parameters->Add(param_pair.first, param_it->second); - } - - frontend::ParsingContext parsing_context; - parsing_context.is_query_cached = true; - - auto parsed_query = ParseQuery(stripped_query.query(), query_string, - parsing_context, ast_storage); - - return {std::move(stripped_query), std::move(parsed_query)}; -} - std::unique_ptr<LogicalPlan> Interpreter::MakeLogicalPlan( CypherQuery *query, AstStorage ast_storage, const Parameters ¶meters, DbAccessor *db_accessor) { diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 0e316f65b..136bf2bba 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -143,15 +143,6 @@ struct InterpreterContext { class Interpreter { public: - /** - * Wraps a `Query` that was created as a result of parsing a query string - * along with its privileges. - */ - struct ParsedQuery { - Query *query; - std::vector<AuthQuery::Privilege> required_privileges; - }; - /** * Encapsulates all what's necessary for the interpretation of a query * into a single object that can be pulled (into the given Stream). @@ -336,10 +327,6 @@ class Interpreter { void Abort(); protected: - std::pair<frontend::StrippedQuery, ParsedQuery> StripAndParseQuery( - const std::string &, Parameters *, AstStorage *ast_storage, - const std::map<std::string, PropertyValue> &); - // high level tree -> logical plan // AstStorage and SymbolTable may be modified during planning. The created // LogicalPlan must take ownership of AstStorage and SymbolTable. @@ -382,12 +369,6 @@ class Interpreter { AstStorage ast_storage, const Parameters ¶meters, DbAccessor *db_accessor); - - // stripped query -> high level tree - ParsedQuery ParseQuery(const std::string &stripped_query, - const std::string &original_query, - const frontend::ParsingContext &context, - AstStorage *ast_storage); }; } // namespace query diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index a4e63a25e..27a349ec1 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -90,10 +90,7 @@ static query::CypherQuery *ParseCypherQuery(const std::string &query_string, // Convert antlr4 AST into Memgraph AST. query::frontend::CypherMainVisitor cypher_visitor(parsing_context, ast); cypher_visitor.visit(parser.tree()); - query::Interpreter::ParsedQuery parsed_query{ - cypher_visitor.query(), - query::GetRequiredPrivileges(cypher_visitor.query())}; - return utils::Downcast<query::CypherQuery>(parsed_query.query); + return utils::Downcast<query::CypherQuery>(cypher_visitor.query()); }; template <class TMemory>