Add query profiling: Tree

Reviewers: mtomic, teon.banek, mferencevic

Reviewed By: mtomic, teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1772
This commit is contained in:
Lovro Lugovic 2019-01-15 11:11:06 +01:00
parent 3421eb6ae7
commit 2730f2d35f
30 changed files with 1324 additions and 98 deletions

View File

@ -38,6 +38,7 @@ set(mg_single_node_sources
query/plan/operator.cpp
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp
@ -144,6 +145,7 @@ set(mg_distributed_sources
query/plan/operator.cpp
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp
@ -272,6 +274,7 @@ set(mg_single_node_ha_sources
query/plan/operator.cpp
query/plan/preprocess.cpp
query/plan/pretty_print.cpp
query/plan/profile.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/repl.cpp

View File

@ -4,12 +4,13 @@
#include "database/graph_db_accessor.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/parameters.hpp"
#include "query/plan/profile.hpp"
namespace query {
struct EvaluationContext {
int64_t timestamp{-1};
query::Parameters parameters;
Parameters parameters;
};
class Context {
@ -21,9 +22,13 @@ class Context {
explicit Context(database::GraphDbAccessor &db_accessor)
: db_accessor_(db_accessor) {}
database::GraphDbAccessor &db_accessor_;
SymbolTable symbol_table_;
EvaluationContext evaluation_context_;
bool is_profile_query_{false};
plan::ProfilingStats stats_;
plan::ProfilingStats *stats_root_{nullptr};
};
struct ParsingContext {

View File

@ -80,6 +80,25 @@ std::unique_ptr<LogicalPlan> DistributedInterpreter::MakeLogicalPlan(
plan_dispatcher_);
}
Interpreter::Results DistributedInterpreter::operator()(
const std::string &query_string, database::GraphDbAccessor &db_accessor,
const std::map<std::string, PropertyValue> &params,
bool in_explicit_transaction) {
AstStorage ast_storage;
Context execution_context(db_accessor);
auto queries = StripAndParseQuery(query_string, &execution_context,
&ast_storage, &db_accessor, params);
ParsedQuery &parsed_query = queries.second;
if (auto *profile_query = dynamic_cast<ProfileQuery *>(parsed_query.query)) {
throw utils::NotYetImplemented("PROFILE in a distributed query");
}
return Interpreter::operator()(query_string, db_accessor, params,
in_explicit_transaction);
}
void DistributedInterpreter::PrettyPrintPlan(
const database::GraphDbAccessor &dba,
const plan::LogicalOperator *plan_root, std::ostream *out) {

View File

@ -16,6 +16,10 @@ class DistributedInterpreter final : public Interpreter {
public:
DistributedInterpreter(database::Master *db);
Results operator()(const std::string &, database::GraphDbAccessor &,
const std::map<std::string, PropertyValue> &,
bool in_explicit_transaction) override;
private:
std::unique_ptr<LogicalPlan> MakeLogicalPlan(
CypherQuery *, AstStorage, const Parameters &,

View File

@ -69,6 +69,13 @@ class UnprovidedParameterError : public QueryException {
using QueryException::QueryException;
};
class ProfileInMulticommandTxException : public QueryException {
public:
using QueryException::QueryException;
ProfileInMulticommandTxException()
: QueryException("PROFILE not allowed in multicommand transactions.") {}
};
class IndexInMulticommandTxException : public QueryException {
public:
using QueryException::QueryException;

View File

@ -1792,7 +1792,7 @@ cpp<#
:capnp-type "Tree" :capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "CypherQuery *")
:documentation "CypherQuery to be explained."))
:documentation "The CypherQuery to explain."))
(:public
#>cpp
ExplainQuery() = default;
@ -1815,6 +1815,36 @@ cpp<#
cpp<#)
(:serialize (:slk) (:capnp)))
(lcp:define-class profile-query (query)
((cypher-query "CypherQuery *"
:initval "nullptr"
:scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "CypherQuery")
:capnp-type "Tree"
:capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "CypherQuery *")
:documentation "The CypherQuery to profile."))
(:public
#>cpp
ProfileQuery() = default;
DEFVISITABLE(QueryVisitor<void>);
ProfileQuery *Clone(AstStorage &storage) const override {
auto *profile_query = storage.Create<ProfileQuery>();
profile_query->cypher_query_ = cypher_query_->Clone(storage);
return profile_query;
}
cpp<#)
(:private
#>cpp
explicit ProfileQuery(int uid) : Query(uid) {}
friend class AstStorage;
cpp<#)
(:serialize (:slk) (:capnp)))
(lcp:define-class index-query (query)
((action "Action" :scope :public)
(label "storage::Label" :scope :public)

View File

@ -63,6 +63,7 @@ class Merge;
class Unwind;
class AuthQuery;
class ExplainQuery;
class ProfileQuery;
class IndexQuery;
class StreamQuery;
@ -107,7 +108,7 @@ class ExpressionVisitor
template <class TResult>
class QueryVisitor
: public ::utils::Visitor<TResult, CypherQuery, ExplainQuery, IndexQuery,
AuthQuery, StreamQuery> {};
: public ::utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery,
IndexQuery, AuthQuery, StreamQuery> {};
} // namespace query

View File

@ -36,6 +36,17 @@ antlrcpp::Any CypherMainVisitor::visitExplainQuery(
return explain_query;
}
antlrcpp::Any CypherMainVisitor::visitProfileQuery(
MemgraphCypher::ProfileQueryContext *ctx) {
CHECK(ctx->children.size() == 2)
<< "ProfileQuery should have exactly two children!";
auto *cypher_query = ctx->children[1]->accept(this).as<CypherQuery *>();
auto *profile_query = storage_->Create<ProfileQuery>();
profile_query->cypher_query_ = cypher_query;
query_ = profile_query;
return profile_query;
}
antlrcpp::Any CypherMainVisitor::visitCypherQuery(
MemgraphCypher::CypherQueryContext *ctx) {
auto *cypher_query = storage_->Create<CypherQuery>();

View File

@ -152,6 +152,12 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
antlrcpp::Any visitExplainQuery(
MemgraphCypher::ExplainQueryContext *ctx) override;
/**
* @return ProfileQuery*
*/
antlrcpp::Any visitProfileQuery(
MemgraphCypher::ProfileQueryContext *ctx) override;
/**
* @return AuthQuery*
*/

View File

@ -26,10 +26,13 @@ statement : query ;
query : cypherQuery
| indexQuery
| explainQuery
| profileQuery
;
explainQuery : EXPLAIN cypherQuery ;
profileQuery : PROFILE cypherQuery ;
cypherQuery : singleQuery ( cypherUnion )* ;
indexQuery : createIndex | createUniqueIndex | dropIndex;
@ -319,6 +322,7 @@ cypherKeyword : ALL
| OPTIONAL
| OR
| ORDER
| PROFILE
| REDUCE
| REMOVE
| RETURN

View File

@ -108,6 +108,7 @@ ON : O N ;
OPTIONAL : O P T I O N A L ;
OR : O R ;
ORDER : O R D E R ;
PROFILE : P R O F I L E ;
REDUCE : R E D U C E ;
REMOVE : R E M O V E ;
RETURN : R E T U R N ;

View File

@ -48,6 +48,7 @@ symbolicName : UnescapedSymbolicName
query : cypherQuery
| indexQuery
| explainQuery
| profileQuery
| authQuery
| streamQuery
;

View File

@ -16,9 +16,7 @@ class PrivilegeExtractor : public QueryVisitor<void>,
AddPrivilege(AuthQuery::Privilege::INDEX);
}
void Visit(AuthQuery &) override {
AddPrivilege(AuthQuery::Privilege::AUTH);
}
void Visit(AuthQuery &) override { AddPrivilege(AuthQuery::Privilege::AUTH); }
void Visit(StreamQuery &) override {
AddPrivilege(AuthQuery::Privilege::STREAM);
@ -28,6 +26,10 @@ class PrivilegeExtractor : public QueryVisitor<void>,
query.cypher_query_->Accept(*this);
}
void Visit(ProfileQuery &query) override {
query.cypher_query_->Accept(*this);
}
void Visit(CypherQuery &query) override {
query.single_query_->Accept(*this);
for (auto *cypher_union : query.cypher_unions_) {

View File

@ -73,16 +73,16 @@ class StrippedQuery {
std::string query_;
// Token positions of stripped out literals mapped to their values.
// TODO: Parameters class really doesn't provided anything interesting. This
// TODO: Parameters class really doesn't provide anything interesting. This
// could be changed to std::unordered_map, but first we need to rewrite (or
// get rid of) hardcoded queries which expect Parameters.
Parameters literals_;
// Token positions of query parameters mapped to theirs names.
// Token positions of query parameters mapped to their names.
std::unordered_map<int, std::string> parameters_;
// Token positions of nonaliased named expressions in return statement mapped
// to theirs original/unstripped string.
// to their original (unstripped) string.
std::unordered_map<int, std::string> named_exprs_;
// Hash based on the stripped query.

View File

@ -90,7 +90,7 @@ const trie::Trie kKeywords = {
"reduce", "coalesce", "user", "password", "alter", "drop",
"stream", "streams", "load", "data", "kafka", "transform",
"batch", "interval", "show", "start", "stop", "size",
"topic", "test", "unique", "explain"};
"topic", "test", "unique", "explain", "profile"};
// Unicode codepoints that are allowed at the start of the unescaped name.
const std::bitset<kBitsetSize> kUnescapedNameAllowedStarts(std::string(

View File

@ -15,9 +15,11 @@
#include "query/frontend/semantic/symbol_generator.hpp"
#include "query/interpret/eval.hpp"
#include "query/plan/planner.hpp"
#include "query/plan/profile.hpp"
#include "query/plan/vertex_count_cache.hpp"
#include "utils/flag_validation.hpp"
#include "utils/string.hpp"
#include "utils/tsc.hpp"
DEFINE_HIDDEN_bool(query_cost_planner, true,
"Use the cost-estimating query planner.");
@ -557,6 +559,8 @@ Callback HandleIndexQuery(IndexQuery *index_query,
}
}
Interpreter::Interpreter() : is_tsc_available_(utils::CheckAvailableTSC()) {}
Interpreter::Results Interpreter::operator()(
const std::string &query_string, database::GraphDbAccessor &db_accessor,
const std::map<std::string, PropertyValue> &params,
@ -568,40 +572,19 @@ Interpreter::Results Interpreter::operator()(
}
#endif
utils::Timer frontend_timer;
// Strip the input query.
StrippedQuery stripped_query(query_string);
Context execution_context(db_accessor);
auto &evaluation_context = execution_context.evaluation_context_;
evaluation_context.timestamp =
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
evaluation_context.parameters = stripped_query.literals();
for (const auto &param_pair : stripped_query.parameters()) {
auto param_it = params.find(param_pair.second);
if (param_it == params.end()) {
throw query::UnprovidedParameterError("Parameter ${} not provided.",
param_pair.second);
}
evaluation_context.parameters.Add(param_pair.first, param_it->second);
}
ParsingContext parsing_context;
parsing_context.is_query_cached = true;
AstStorage ast_storage;
auto parsed_query = ParseQuery(stripped_query.query(), query_string,
parsing_context, &ast_storage, &db_accessor);
auto frontend_time = frontend_timer.Elapsed();
// Build summary.
Context execution_context(db_accessor);
auto &evaluation_context = execution_context.evaluation_context_;
std::map<std::string, TypedValue> summary;
summary["parsing_time"] = frontend_time.count();
utils::Timer parsing_timer;
auto queries = StripAndParseQuery(query_string, &execution_context,
&ast_storage, &db_accessor, params);
StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
auto parsing_time = parsing_timer.Elapsed();
summary["parsing_time"] = parsing_time.count();
// TODO: set summary['type'] based on transaction metadata
// the type can't be determined based only on top level LogicalOp
// (for example MATCH DELETE RETURN will have Produce as it's top).
@ -668,9 +651,11 @@ Interpreter::Results Interpreter::operator()(
auto query_plan_symbol =
execution_context.symbol_table_.CreateSymbol("QUERY PLAN", false);
std::unique_ptr<plan::OutputTable> output_plan =
std::make_unique<plan::OutputTable>(
std::vector<Symbol>{query_plan_symbol}, printed_plan_rows);
std::vector<Symbol> output_symbols{query_plan_symbol};
std::vector<std::string> header{query_plan_symbol.name()};
auto output_plan =
std::make_unique<plan::OutputTable>(output_symbols, printed_plan_rows);
plan = std::make_shared<CachedPlan>(std::make_unique<SingleNodeLogicalPlan>(
std::move(output_plan), 0.0, AstStorage{},
@ -680,9 +665,74 @@ Interpreter::Results Interpreter::operator()(
summary["planning_time"] = planning_time.count();
execution_context.symbol_table_ = plan->symbol_table();
std::vector<Symbol> output_symbols{query_plan_symbol};
std::vector<std::string> header{query_plan_symbol.name()};
auto cursor = plan->plan().MakeCursor(db_accessor);
return Results(std::move(execution_context), plan, std::move(cursor),
output_symbols, header, summary,
parsed_query.required_privileges);
}
if (auto *profile_query = dynamic_cast<ProfileQuery *>(parsed_query.query)) {
const std::string kProfileQueryStart = "profile ";
CHECK(utils::StartsWith(utils::ToLowerCase(stripped_query.query()),
kProfileQueryStart))
<< "Expected stripped query to start with '" << kProfileQueryStart
<< "'";
if (in_explicit_transaction) {
throw ProfileInMulticommandTxException();
}
if (!is_tsc_available_) {
throw QueryException("TSC support is missing for PROFILE");
}
auto cypher_query_hash =
fnv(stripped_query.query().substr(kProfileQueryStart.size()));
auto cypher_query_plan = CypherQueryToPlan(
cypher_query_hash, profile_query->cypher_query_, std::move(ast_storage),
evaluation_context.parameters, &db_accessor);
// Copy the symbol table and add our own symbols (used by the `OutputTable`
// operator below)
execution_context.symbol_table_ = cypher_query_plan->symbol_table();
auto operator_symbol =
execution_context.symbol_table_.CreateSymbol("OPERATOR", false);
auto actual_hits_symbol =
execution_context.symbol_table_.CreateSymbol("ACTUAL HITS", false);
auto relative_time_symbol =
execution_context.symbol_table_.CreateSymbol("RELATIVE TIME", false);
auto absolute_time_symbol =
execution_context.symbol_table_.CreateSymbol("ABSOLUTE TIME", false);
std::vector<Symbol> output_symbols = {operator_symbol, actual_hits_symbol,
relative_time_symbol,
absolute_time_symbol};
std::vector<std::string> header{
operator_symbol.name(), actual_hits_symbol.name(),
relative_time_symbol.name(), absolute_time_symbol.name()};
auto output_plan = std::make_unique<plan::OutputTable>(
output_symbols,
[cypher_query_plan, &db_accessor](Frame *frame, Context *context) {
auto cursor = cypher_query_plan->plan().MakeCursor(db_accessor);
// Pull everything to profile the execution
utils::Timer timer;
while (cursor->Pull(*frame, *context)) continue;
return FormatProfilingStats(context->stats_, timer.Elapsed());
});
plan = std::make_shared<CachedPlan>(std::make_unique<SingleNodeLogicalPlan>(
std::move(output_plan), 0.0, AstStorage{},
execution_context.symbol_table_));
auto planning_time = planning_timer.Elapsed();
summary["planning_time"] = planning_time.count();
execution_context.is_profile_query_ = true;
auto cursor = plan->plan().MakeCursor(db_accessor);
return Results(std::move(execution_context), plan, std::move(cursor),
@ -728,8 +778,10 @@ Interpreter::Results Interpreter::operator()(
}
plan = std::make_shared<CachedPlan>(std::make_unique<SingleNodeLogicalPlan>(
std::make_unique<plan::OutputTable>(output_symbols, callback.fn), 0.0,
AstStorage{}, execution_context.symbol_table_));
std::make_unique<plan::OutputTable>(
output_symbols,
[fn = callback.fn](Frame *, Context *) { return fn(); }),
0.0, AstStorage{}, execution_context.symbol_table_));
auto planning_time = planning_timer.Elapsed();
summary["planning_time"] = planning_time.count();
@ -817,6 +869,38 @@ Interpreter::ParsedQuery Interpreter::ParseQuery(
ast_it->second.required_privileges};
}
std::pair<StrippedQuery, Interpreter::ParsedQuery>
Interpreter::StripAndParseQuery(
const std::string &query_string, Context *execution_context,
AstStorage *ast_storage, database::GraphDbAccessor *db_accessor,
const std::map<std::string, PropertyValue> &params) {
StrippedQuery stripped_query(query_string);
auto &evaluation_context = execution_context->evaluation_context_;
evaluation_context.timestamp =
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
evaluation_context.parameters = stripped_query.literals();
for (const auto &param_pair : stripped_query.parameters()) {
auto param_it = params.find(param_pair.second);
if (param_it == params.end()) {
throw query::UnprovidedParameterError("Parameter ${} not provided.",
param_pair.second);
}
evaluation_context.parameters.Add(param_pair.first, param_it->second);
}
ParsingContext parsing_context;
parsing_context.is_query_cached = true;
auto parsed_query = ParseQuery(stripped_query.query(), query_string,
parsing_context, ast_storage, db_accessor);
return {std::move(stripped_query), std::move(parsed_query)};
}
std::unique_ptr<LogicalPlan> Interpreter::MakeLogicalPlan(
CypherQuery *query, AstStorage ast_storage, const Parameters &parameters,
database::GraphDbAccessor *db_accessor) {

View File

@ -63,14 +63,18 @@ class Interpreter {
std::vector<AuthQuery::Privilege> required_privileges;
};
using PlanCacheT = ConcurrentMap<HashType, std::shared_ptr<CachedPlan>>;
public:
/**
* Wraps a `Query` that was created as a result of parsing a query string
* along with its privileges.
*/
struct ParsedQuery {
Query *query;
std::vector<AuthQuery::Privilege> required_privileges;
};
using PlanCacheT = ConcurrentMap<HashType, std::shared_ptr<CachedPlan>>;
public:
/**
* Encapsulates all what's necessary for the interpretation of a query
* into a single object that can be pulled (into the given Stream).
@ -142,6 +146,8 @@ class Interpreter {
return privileges_;
}
bool IsProfileQuery() const { return ctx_.is_profile_query_; }
private:
Context ctx_;
std::shared_ptr<CachedPlan> plan_;
@ -157,7 +163,7 @@ class Interpreter {
std::vector<AuthQuery::Privilege> privileges_;
};
Interpreter() = default;
Interpreter();
Interpreter(const Interpreter &) = delete;
Interpreter &operator=(const Interpreter &) = delete;
Interpreter(Interpreter &&) = delete;
@ -169,15 +175,20 @@ class Interpreter {
* Generates an Results object for the parameters. The resulting object
* can be Pulled with its results written to an arbitrary stream.
*/
Results operator()(const std::string &query,
database::GraphDbAccessor &db_accessor,
const std::map<std::string, PropertyValue> &params,
bool in_explicit_transaction);
virtual Results operator()(const std::string &query,
database::GraphDbAccessor &db_accessor,
const std::map<std::string, PropertyValue> &params,
bool in_explicit_transaction);
auth::Auth *auth_ = nullptr;
integrations::kafka::Streams *kafka_streams_ = nullptr;
protected:
std::pair<StrippedQuery, ParsedQuery> StripAndParseQuery(
const std::string &, Context *, AstStorage *ast_storage,
database::GraphDbAccessor *,
const std::map<std::string, PropertyValue> &);
// high level tree -> logical plan
// AstStorage and SymbolTable may be modified during planning. The created
// LogicalPlan must take ownership of AstStorage and SymbolTable.
@ -198,6 +209,7 @@ class Interpreter {
// developers introduce more bugs in each version. Fortunately, we have cache
// so this lock probably won't impact performance much...
utils::SpinLock antlr_lock_;
bool is_tsc_available_;
// high level tree -> CachedPlan
std::shared_ptr<CachedPlan> CypherQueryToPlan(

View File

@ -26,6 +26,7 @@
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/interpret/eval.hpp"
#include "query/path.hpp"
#include "query/plan/scoped_profile.hpp"
#include "utils/algorithm.hpp"
#include "utils/exceptions.hpp"
#include "utils/hashing/fnv.hpp"
@ -67,9 +68,20 @@ bool EvaluateFilter(ExpressionEvaluator &evaluator, Expression *filter) {
return result.Value<bool>();
}
template <typename T>
uint64_t ComputeProfilingKey(const T *obj) {
static_assert(sizeof(T *) == sizeof(uint64_t));
return reinterpret_cast<uint64_t>(obj);
}
} // namespace
bool Once::OnceCursor::Pull(Frame &, Context &) {
#define SCOPED_PROFILE_OP(name) \
ScopedProfile profile{ComputeProfilingKey(this), name, &context};
bool Once::OnceCursor::Pull(Frame &, Context &context) {
SCOPED_PROFILE_OP("Once");
if (!did_pull_) {
did_pull_ = true;
return true;
@ -128,10 +140,13 @@ CreateNode::CreateNodeCursor::CreateNodeCursor(const CreateNode &self,
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool CreateNode::CreateNodeCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("CreateNode");
if (input_cursor_->Pull(frame, context)) {
CreateLocalVertex(self_.node_info_, &frame, context);
return true;
}
return false;
}
@ -169,6 +184,8 @@ CreateExpand::CreateExpandCursor::CreateExpandCursor(
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("CreateExpand");
if (!input_cursor_->Pull(frame, context)) return false;
// get the origin vertex
@ -245,6 +262,8 @@ class ScanAllCursor : public Cursor {
db_(db) {}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("ScanAll");
if (db_.should_abort()) throw HintedAbortError();
while (!vertices_ || vertices_it_.value() == vertices_.value().end()) {
@ -450,6 +469,8 @@ Expand::ExpandCursor::ExpandCursor(const Expand &self,
: self_(self), input_cursor_(self.input_->MakeCursor(db)), db_(db) {}
bool Expand::ExpandCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Expand");
// A helper function for expanding a node from an edge.
auto pull_node = [this, &frame](const EdgeAccessor &new_edge,
EdgeAtom::Direction direction) {
@ -657,6 +678,8 @@ class ExpandVariableCursor : public Cursor {
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("ExpandVariable");
ExpressionEvaluator evaluator(
&frame, context.symbol_table_, context.evaluation_context_,
&context.db_accessor_, self_.common_.graph_view);
@ -901,6 +924,8 @@ class STShortestPathCursor : public query::plan::Cursor {
}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("STShortestPath");
ExpressionEvaluator evaluator(&frame, context.symbol_table_,
context.evaluation_context_,
&context.db_accessor_, GraphView::OLD);
@ -1135,6 +1160,8 @@ class SingleSourceShortestPathCursor : public query::plan::Cursor {
}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("SingleSourceShortestPath");
ExpressionEvaluator evaluator(&frame, context.symbol_table_,
context.evaluation_context_,
&context.db_accessor_, GraphView::OLD);
@ -1287,6 +1314,8 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor {
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("ExpandWeightedShortestPath");
ExpressionEvaluator evaluator(
&frame, context.symbol_table_, context.evaluation_context_,
&context.db_accessor_, self_.common_.graph_view);
@ -1547,6 +1576,8 @@ class ConstructNamedPathCursor : public Cursor {
: self_(self), input_cursor_(self_.input()->MakeCursor(db)) {}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("ConstructNamedPath");
if (!input_cursor_->Pull(frame, context)) return false;
auto symbol_it = self_.path_elements_.begin();
@ -1656,6 +1687,8 @@ Filter::FilterCursor::FilterCursor(const Filter &self,
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Filter::FilterCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Filter");
// Like all filters, newly set values should not affect filtering of old
// nodes and edges.
ExpressionEvaluator evaluator(&frame, context.symbol_table_,
@ -1701,6 +1734,8 @@ Produce::ProduceCursor::ProduceCursor(const Produce &self,
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Produce::ProduceCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Produce");
if (input_cursor_->Pull(frame, context)) {
// Produce should always yield the latest results.
ExpressionEvaluator evaluator(&frame, context.symbol_table_,
@ -1708,6 +1743,7 @@ bool Produce::ProduceCursor::Pull(Frame &frame, Context &context) {
&context.db_accessor_, GraphView::NEW);
for (auto named_expr : self_.named_expressions_)
named_expr->Accept(evaluator);
return true;
}
return false;
@ -1737,6 +1773,8 @@ Delete::DeleteCursor::DeleteCursor(const Delete &self,
: self_(self), db_(db), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Delete::DeleteCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Delete");
if (!input_cursor_->Pull(frame, context)) return false;
// Delete should get the latest information, this way it is also possible
@ -1815,6 +1853,8 @@ SetProperty::SetPropertyCursor::SetPropertyCursor(const SetProperty &self,
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool SetProperty::SetPropertyCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("SetProperty");
if (!input_cursor_->Pull(frame, context)) return false;
// Set, just like Create needs to see the latest changes.
@ -1873,6 +1913,8 @@ SetProperties::SetPropertiesCursor::SetPropertiesCursor(
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("SetProperties");
if (!input_cursor_->Pull(frame, context)) return false;
TypedValue &lhs = frame[self_.input_symbol_];
@ -1975,6 +2017,8 @@ SetLabels::SetLabelsCursor::SetLabelsCursor(const SetLabels &self,
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool SetLabels::SetLabelsCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("SetLabels");
if (!input_cursor_->Pull(frame, context)) return false;
TypedValue &vertex_value = frame[self_.input_symbol_];
@ -2018,6 +2062,8 @@ RemoveProperty::RemovePropertyCursor::RemovePropertyCursor(
bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame,
Context &context) {
SCOPED_PROFILE_OP("RemoveProperty");
if (!input_cursor_->Pull(frame, context)) return false;
// Remove, just like Delete needs to see the latest changes.
@ -2081,6 +2127,8 @@ RemoveLabels::RemoveLabelsCursor::RemoveLabelsCursor(
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("RemoveLabels");
if (!input_cursor_->Pull(frame, context)) return false;
TypedValue &vertex_value = frame[self_.input_symbol_];
@ -2148,6 +2196,8 @@ bool ContainsSameEdge(const TypedValue &a, const TypedValue &b) {
bool EdgeUniquenessFilter::EdgeUniquenessFilterCursor::Pull(
Frame &frame, Context &context) {
SCOPED_PROFILE_OP("EdgeUniquenessFilter");
auto expansion_ok = [&]() {
TypedValue &expand_value = frame[self_.expand_symbol_];
for (const auto &previous_symbol : self_.previous_symbols_) {
@ -2193,6 +2243,8 @@ Accumulate::AccumulateCursor::AccumulateCursor(const Accumulate &self,
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
bool Accumulate::AccumulateCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Accumulate");
// cache all the input
if (!pulled_all_input_) {
while (input_cursor_->Pull(frame, context)) {
@ -2277,6 +2329,8 @@ TypedValue DefaultAggregationOpValue(const Aggregate::Element &element) {
} // namespace
bool Aggregate::AggregateCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Aggregate");
if (!pulled_all_input_) {
ProcessAll(frame, context);
pulled_all_input_ = true;
@ -2539,6 +2593,8 @@ Skip::SkipCursor::SkipCursor(const Skip &self, database::GraphDbAccessor &db)
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Skip::SkipCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Skip");
while (input_cursor_->Pull(frame, context)) {
if (to_skip_ == -1) {
// First successful pull from the input, evaluate the skip expression.
@ -2597,6 +2653,8 @@ Limit::LimitCursor::LimitCursor(const Limit &self,
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool Limit::LimitCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Limit");
// We need to evaluate the limit expression before the first input Pull
// because it might be 0 and thereby we shouldn't Pull from input at all.
// We can do this before Pulling from the input because the limit expression
@ -2669,6 +2727,8 @@ OrderBy::OrderByCursor::OrderByCursor(const OrderBy &self,
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
bool OrderBy::OrderByCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("OrderBy");
if (!did_pull_all_) {
ExpressionEvaluator evaluator(&frame, context.symbol_table_,
context.evaluation_context_,
@ -2759,6 +2819,8 @@ Merge::MergeCursor::MergeCursor(const Merge &self,
merge_create_cursor_(self.merge_create_->MakeCursor(db)) {}
bool Merge::MergeCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Merge");
while (true) {
if (pull_input_) {
if (input_cursor_->Pull(frame, context)) {
@ -2841,6 +2903,8 @@ Optional::OptionalCursor::OptionalCursor(const Optional &self,
optional_cursor_(self.optional_->MakeCursor(db)) {}
bool Optional::OptionalCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Optional");
while (true) {
if (pull_input_) {
if (input_cursor_->Pull(frame, context)) {
@ -2912,6 +2976,8 @@ Unwind::UnwindCursor::UnwindCursor(const Unwind &self,
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
bool Unwind::UnwindCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Unwind");
while (true) {
if (db_.should_abort()) throw HintedAbortError();
// if we reached the end of our list of values
@ -2975,6 +3041,8 @@ Distinct::DistinctCursor::DistinctCursor(const Distinct &self,
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool Distinct::DistinctCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Distinct");
while (true) {
if (!input_cursor_->Pull(frame, context)) return false;
@ -3034,6 +3102,8 @@ Union::UnionCursor::UnionCursor(const Union &self,
right_cursor_(self.right_op_->MakeCursor(db)) {}
bool Union::UnionCursor::Pull(Frame &frame, Context &context) {
SCOPED_PROFILE_OP("Union");
std::unordered_map<std::string, TypedValue> results;
if (left_cursor_->Pull(frame, context)) {
// collect values from the left child
@ -3097,6 +3167,8 @@ class CartesianCursor : public Cursor {
}
bool Pull(Frame &frame, Context &context) override {
SCOPED_PROFILE_OP("Cartesian");
if (!cartesian_pull_initialized_) {
// Pull all left_op frames.
while (left_op_cursor_->Pull(frame, context)) {
@ -3173,11 +3245,12 @@ std::unique_ptr<Cursor> Cartesian::MakeCursor(
OutputTable::OutputTable(std::vector<Symbol> output_symbols,
std::vector<std::vector<TypedValue>> rows)
: output_symbols_(std::move(output_symbols)),
callback_([rows]() { return rows; }) {}
callback_([rows](Frame *, Context *) { return rows; }) {}
OutputTable::OutputTable(
std::vector<Symbol> output_symbols,
std::function<std::vector<std::vector<TypedValue>>()> callback)
std::function<std::vector<std::vector<TypedValue>>(Frame *, Context *)>
callback)
: output_symbols_(std::move(output_symbols)),
callback_(std::move(callback)) {}
@ -3189,7 +3262,7 @@ class OutputTableCursor : public Cursor {
bool Pull(Frame &frame, Context &context) override {
if (!pulled_) {
rows_ = self_.callback_();
rows_ = self_.callback_(&frame, &context);
for (const auto &row : rows_) {
CHECK(row.size() == self_.output_symbols_.size())
<< "Wrong number of columns in row!";

View File

@ -158,7 +158,7 @@ can serve as inputs to others and thus a sequence of operations is formed.")
/** Return @c Symbol vector where the query results will be stored.
*
* Currently, outputs symbols are generated in @c Produce and @c Union
* Currently, output symbols are generated in @c Produce and @c Union
* operators. @c Skip, @c Limit, @c OrderBy and @c Distinct propagate the
* symbols from @c Produce (if it exists as input operator).
*
@ -1198,7 +1198,7 @@ pulled.")
:capnp-save (lcp:capnp-save-vector "::query::capnp::Symbol" "Symbol")
:capnp-load (lcp:capnp-load-vector "::query::capnp::Symbol" "Symbol")))
(:documentation
"Constructs a named path from it's elements and places it on the frame.")
"Constructs a named path from its elements and places it on the frame.")
(:public
#>cpp
ConstructNamedPath() {}
@ -1285,7 +1285,7 @@ a boolean value.")
:capnp-load (load-ast-vector "NamedExpression *")))
(:documentation
"A logical operator that places an arbitrary number
if named expressions on the frame (the logical operator
of named expressions on the frame (the logical operator
for the RETURN clause).
Supports optional input. When the input is provided,
@ -1340,8 +1340,8 @@ RETURN clause) the Produce's pull succeeds exactly once.")
:capnp-save (save-ast-vector "Expression *")
:capnp-load (load-ast-vector "Expression *"))
(detach :bool :scope :public :documentation
"if the vertex should be detached before deletion if not detached,
and has connections, an error is raised ignored when deleting edges"))
"Whether the vertex should be detached before deletion. If not detached,
and has connections, an error is raised when deleting edges."))
(:documentation
"Operator for deleting vertices and edges.
@ -1397,7 +1397,7 @@ Has a flag for using DETACH DELETE when deleting vertices.")
:capnp-type "Ast.Tree" :capnp-init nil
:capnp-save #'save-ast-pointer :capnp-load (load-ast-pointer "Expression *")))
(:documentation
"Logical Op for setting a single property on a single vertex or edge.
"Logical operator for setting a single property on a single vertex or edge.
The property value is an expression that must evaluate to some type that
can be stored (a TypedValue that can be converted to PropertyValue).")
@ -1448,10 +1448,10 @@ can be stored (a TypedValue that can be converted to PropertyValue).")
:capnp-save #'save-ast-pointer :capnp-load (load-ast-pointer "Expression *"))
(op "Op" :scope :public))
(:documentation
"Logical op for setting the whole properties set on a vertex or an edge.
"Logical operator for setting the whole property set on a vertex or an edge.
The value being set is an expression that must evaluate to a vertex, edge
or map (literal or parameter).
The value being set is an expression that must evaluate to a vertex, edge or
map (literal or parameter).
Supports setting (replacing the whole properties set with another) and
updating.")
@ -1462,7 +1462,7 @@ updating.")
@c UPDATE means that the current property set is augmented with additional
ones (existing props of the same name are replaced), while @c REPLACE means
that the old props are discarded and replaced with new ones.")
that the old properties are discarded and replaced with new ones.")
(:serialize))
#>cpp
@ -1566,7 +1566,7 @@ It does NOT remove labels that are already set on that Vertex.")
:capnp-type "Ast.Tree" :capnp-init nil
:capnp-save #'save-ast-pointer :capnp-load (load-ast-pointer "PropertyLookup *")))
(:documentation
"Logical op for removing a property from an edge or a vertex.")
"Logical operator for removing a property from an edge or a vertex.")
(:public
#>cpp
RemoveProperty() {}
@ -1721,23 +1721,23 @@ edge lists).")
"Pulls everything from the input before passing it through.
Optionally advances the command after accumulation and before emitting.
On the first Pull from this Op's Cursor the input Cursor will be
Pulled until it is empty. The results will be accumulated in the
temporary cache. Once the input Cursor is empty, this Op's Cursor
will start returning cached stuff from it's Pull.
On the first Pull from this operator's Cursor the input Cursor will be Pulled
until it is empty. The results will be accumulated in the temporary cache. Once
the input Cursor is empty, this operator's Cursor will start returning cached
stuff from its Pull.
This technique is used for ensuring all the operations from the
previous LogicalOp have been performed before exposing data
to the next. A typical use-case is the `MATCH - SET - RETURN`
previous logical operator have been performed before exposing data
to the next. A typical use case is a `MATCH--SET--RETURN`
query in which every SET iteration must be performed before
RETURN starts iterating (see Memgraph Wiki for detailed reasoning).
IMPORTANT: This Op does not cache all the results but only those
IMPORTANT: This operator does not cache all the results but only those
elements from the frame whose symbols (frame positions) it was given.
All other frame positions will contain undefined junk after this
op has executed, and should not be used.
operator has executed, and should not be used.
This op can also advance the command after the accumulation and
This operator can also advance the command after the accumulation and
before emitting. If the command gets advanced, every value that
has been cached will be reconstructed before Pull returns.
@ -1843,7 +1843,7 @@ Input data is grouped based on the given set of named
expressions. Grouping is done on unique values.
IMPORTANT:
Ops taking their input from an aggregation are only
Operators taking their input from an aggregation are only
allowed to use frame values that are either aggregation
outputs or group-by named-expressions. All other frame
elements are in an undefined state after aggregation.")
@ -1903,12 +1903,12 @@ elements are in an undefined state after aggregation.")
private:
// Data structure for a single aggregation cache.
// does NOT include the group-by values since those
// Does NOT include the group-by values since those
// are a key in the aggregation map.
// The vectors in an AggregationValue contain one element
// for each aggregation in this LogicalOp.
struct AggregationValue {
// how many input rows has been aggregated in respective
// how many input rows have been aggregated in respective
// values_ element so far
std::vector<int> counts_;
// aggregated values. Initially Null (until at least one
@ -2446,8 +2446,8 @@ This implementation maintains input ordering.")
"A logical operator that applies UNION operator on inputs and places the
result on the frame.
This operator takes two inputs, a vector of symbols for the result, and
vectors of symbols used by each of the inputs.")
This operator takes two inputs, a vector of symbols for the result, and vectors
of symbols used by each of the inputs.")
(:public
#>cpp
Union() {}
@ -2530,14 +2530,16 @@ vectors of symbols used by each of the inputs.")
(lcp:define-class output-table (logical-operator)
((output-symbols "std::vector<Symbol>" :scope :public :dont-save t)
(callback "std::function<std::vector<std::vector<TypedValue>>()>"
(callback "std::function<std::vector<std::vector<TypedValue>>(Frame *, Context *)>"
:scope :public :dont-save t))
(:documentation "An operator that outputs a table, producing a single row on each pull")
(:public
#>cpp
OutputTable() {}
OutputTable(std::vector<Symbol> output_symbols,
std::function<std::vector<std::vector<TypedValue>>()> callback);
OutputTable(
std::vector<Symbol> output_symbols,
std::function<std::vector<std::vector<TypedValue>>(Frame *, Context *)>
callback);
OutputTable(std::vector<Symbol> output_symbols,
std::vector<std::vector<TypedValue>> rows);

View File

@ -171,8 +171,23 @@ bool PlanPrinter::PreVisit(query::plan::Optional &op) {
PRE_VISIT(Unwind);
PRE_VISIT(Distinct);
bool PlanPrinter::PreVisit(query::plan::Union &op) {
WithPrintLn([&op](auto &out) {
out << "* Union {";
utils::PrintIterable(out, op.left_symbols_, ", ",
[](auto &out, const auto &sym) { out << sym.name(); });
out << " : ";
utils::PrintIterable(out, op.right_symbols_, ", ",
[](auto &out, const auto &sym) { out << sym.name(); });
out << "}";
});
Branch(*op.right_op_);
op.left_op_->Accept(*this);
return false;
}
bool PlanPrinter::Visit(query::plan::Once &op) {
// Ignore checking Once, it is implicitly at the end.
WithPrintLn([&op](auto &out) { out << "* Once"; });
return true;
}

View File

@ -71,6 +71,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor {
bool PreVisit(Limit &) override;
bool PreVisit(OrderBy &) override;
bool PreVisit(Distinct &) override;
bool PreVisit(Union &) override;
bool PreVisit(Unwind &) override;
@ -82,7 +83,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor {
void WithPrintLn(TFun fun) {
*out_ << " ";
for (int i = 0; i < depth_; ++i) {
*out_ << "| ";
*out_ << "| ";
}
fun(*out_);
*out_ << std::endl;

104
src/query/plan/profile.cpp Normal file
View File

@ -0,0 +1,104 @@
#include "query/plan/profile.hpp"
#include <algorithm>
#include <chrono>
#include <fmt/format.h>
#include "query/context.hpp"
#include "utils/likely.hpp"
namespace query::plan {
//////////////////////////////////////////////////////////////////////////////
//
// FormatProfilingStats
namespace {
class FormatProfilingStatsHelper {
public:
FormatProfilingStatsHelper(unsigned long long total_cycles,
std::chrono::duration<double> total_time)
: total_cycles_(total_cycles), total_time_(total_time) {}
void Output(const ProfilingStats &cumulative_stats) {
auto cycles = IndividualCycles(cumulative_stats);
rows_.emplace_back(std::vector<TypedValue>{
FormatOperator(cumulative_stats.name), cumulative_stats.actual_hits,
FormatRelativeTime(cycles), FormatAbsoluteTime(cycles)});
for (size_t i = 1; i < cumulative_stats.children.size(); ++i) {
Branch(cumulative_stats.children[i]);
}
if (cumulative_stats.children.size() >= 1) {
Output(cumulative_stats.children[0]);
}
}
void Branch(const ProfilingStats &cumulative_stats) {
rows_.emplace_back(std::vector<TypedValue>{"|\\", "", "", ""});
++depth_;
Output(cumulative_stats);
--depth_;
}
unsigned long long IndividualCycles(const ProfilingStats &cumulative_stats) {
return cumulative_stats.num_cycles -
std::accumulate(
cumulative_stats.children.begin(),
cumulative_stats.children.end(), 0ULL,
[](auto acc, auto &stats) { return acc + stats.num_cycles; });
}
std::string Format(const char *str) {
std::ostringstream ss;
for (int i = 0; i < depth_; ++i) {
ss << "| ";
}
ss << str;
return ss.str();
}
std::string Format(const std::string &str) { return Format(str.c_str()); }
std::string FormatOperator(const char *str) {
return Format(std::string("* ") + str);
}
std::string FormatRelativeTime(unsigned long long num_cycles) {
return fmt::format("{: 10.6f} %",
static_cast<double>(num_cycles) / total_cycles_ * 100);
}
std::string FormatAbsoluteTime(unsigned long long num_cycles) {
return fmt::format(
"{: 10.6f} ms",
(static_cast<double>(num_cycles) / total_cycles_ *
static_cast<std::chrono::duration<double, std::milli>>(total_time_))
.count());
}
std::vector<std::vector<TypedValue>> rows() { return rows_; }
private:
int64_t depth_{0};
std::vector<std::vector<TypedValue>> rows_;
unsigned long long total_cycles_;
std::chrono::duration<double> total_time_;
};
} // namespace
std::vector<std::vector<TypedValue>> FormatProfilingStats(
const ProfilingStats &cumulative_stats,
std::chrono::duration<double> total_time) {
FormatProfilingStatsHelper helper{cumulative_stats.num_cycles, total_time};
helper.Output(cumulative_stats);
return helper.rows();
}
} // namespace query::plan

View File

@ -0,0 +1,27 @@
#pragma once
#include <cstdint>
#include <vector>
#include "query/typed_value.hpp"
namespace query {
namespace plan {
/**
* Stores profiling statistics for a single logical operator.
*/
struct ProfilingStats {
int64_t actual_hits{0};
unsigned long long num_cycles{0};
uint64_t key{0};
const char *name{nullptr};
std::vector<ProfilingStats> children;
};
std::vector<std::vector<TypedValue>> FormatProfilingStats(
const ProfilingStats &cumulative_stats, std::chrono::duration<double>);
} // namespace plan
} // namespace query

View File

@ -0,0 +1,71 @@
#pragma once
#include <cstdint>
#include "query/plan/profile.hpp"
#include "utils/tsc.hpp"
namespace query {
namespace plan {
/**
* A RAII class used for profiling logical operators. Instances of this class
* update the profiling data stored within the `Context` object and build up a
* tree of `ProfilingStats` instances. The structure of the `ProfilingStats`
* tree depends on the `LogicalOperator`s that were executed.
*/
class ScopedProfile {
public:
ScopedProfile(uint64_t key, const char *name,
query::Context *context) noexcept
: context_(context) {
if (UNLIKELY(context_->is_profile_query_)) {
root_ = context_->stats_root_;
// Are we the root logical operator?
if (!root_) {
stats_ = &context_->stats_;
stats_->key = key;
stats_->name = name;
} else {
stats_ = nullptr;
// Was this logical operator already hit on one of the previous pulls?
auto it = std::find_if(root_->children.begin(), root_->children.end(),
[key](auto &stats) { return stats.key == key; });
if (it == root_->children.end()) {
root_->children.emplace_back();
stats_ = &root_->children.back();
stats_->key = key;
stats_->name = name;
} else {
stats_ = &(*it);
}
}
context_->stats_root_ = stats_;
stats_->actual_hits++;
start_time_ = utils::ReadTSC();
}
}
~ScopedProfile() noexcept {
if (UNLIKELY(context_->is_profile_query_)) {
stats_->num_cycles += utils::ReadTSC() - start_time_;
// Restore the old root ("pop")
context_->stats_root_ = root_;
}
}
private:
query::Context *context_;
ProfilingStats *root_;
ProfilingStats *stats_;
unsigned long long start_time_;
};
} // namespace plan
} // namespace query

View File

@ -83,7 +83,14 @@ class TransactionEngine final {
// Make a copy of the summary because the `Commit` call will destroy the
// `results_` object.
auto summary = results_->summary();
if (!in_explicit_transaction_) Commit();
if (!in_explicit_transaction_) {
if (results_->IsProfileQuery()) {
Abort();
} else {
Commit();
}
}
return summary;
} catch (const utils::BasicException &) {
AbortCommand();

View File

@ -7,14 +7,16 @@ namespace utils {
// This class is threadsafe.
class Timer {
public:
/** Time elapsed since creation. */
std::chrono::duration<double> Elapsed() const {
return std::chrono::steady_clock::now() - start_time_;
Timer() : start_time_(std::chrono::steady_clock::now()) {}
template <typename TDuration = std::chrono::duration<double>>
TDuration Elapsed() const {
return std::chrono::duration_cast<TDuration>(
std::chrono::steady_clock::now() - start_time_);
}
private:
std::chrono::time_point<std::chrono::steady_clock> start_time_ =
std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point start_time_;
};
} // namespace utils

View File

@ -39,6 +39,9 @@ target_link_libraries(${test_prefix}ring_buffer mg-single-node kvstore_dummy_lib
add_benchmark(query/planner.cpp)
target_link_libraries(${test_prefix}planner mg-single-node kvstore_dummy_lib)
add_benchmark(query/profile.cpp)
target_link_libraries(${test_prefix}profile mg-single-node kvstore_dummy_lib)
add_benchmark(query/stripped.cpp)
target_link_libraries(${test_prefix}stripped mg-single-node kvstore_dummy_lib)

View File

@ -0,0 +1,669 @@
#include <memory>
#include <vector>
#include <benchmark/benchmark_api.h>
#include "query/context.hpp"
#include "utils/tsc.hpp"
/*
* We're benchmarking three different implementations of the profiling
* machinery:
*
* 1. Using `std::unordered_map` to associate every logical operator with its
* profiling stats.
*
* 2. Using a tree of profiling stats whose structure corresponds to the logical
* operators that were executed (no explicit association exists between a
* logical operator and its profiling stats).
*
* 3. The same as 2. but storing the nodes of the tree within a contiguous block
* of storage (pool).
*/
namespace {
//////////////////////////////////////////////////////////////////////////////
//
// NAryTree
struct NAryTree {
NAryTree(int64_t id) : id(id) {}
int64_t id;
std::vector<std::unique_ptr<NAryTree>> children;
};
std::unique_ptr<NAryTree> LiftTreeHelper(const std::vector<int64_t> &flattened,
size_t *index) {
int64_t id = flattened[(*index)++];
size_t num_children = flattened[(*index)++];
auto node = std::make_unique<NAryTree>(id);
if (num_children == 0) {
return node;
}
for (int64_t i = 0; i < num_children; ++i) {
node->children.emplace_back(LiftTreeHelper(flattened, index));
}
return node;
}
std::unique_ptr<NAryTree> LiftTree(const std::vector<int64_t> &flattened) {
size_t index = 0;
return LiftTreeHelper(flattened, &index);
}
//////////////////////////////////////////////////////////////////////////////
///
/// Map variant
namespace map {
//////////////////////////////////////////////////////////////////////////////
//
// ProfilingStats
struct ProfilingStats {
// We're starting from -1 to facilitate unconditional incrementing within
// `Pull` (which makes the code of the `Pull` easier to follow). The last
// increment (when `Pull` returns `false`) will be superfluous so compensate
// for it here.
int64_t actual_hits{-1};
unsigned long long elapsed_time{0};
};
//////////////////////////////////////////////////////////////////////////////
//
// Context
struct EvaluationContext {
bool is_profile_query{true};
std::unordered_map<std::uintptr_t, ProfilingStats> operator_stats;
};
class Context {
public:
Context() = default;
Context(const Context &) = delete;
Context &operator=(const Context &) = delete;
Context(Context &&) = default;
Context &operator=(Context &&) = default;
EvaluationContext evaluation_context_;
};
//////////////////////////////////////////////////////////////////////////////
//
// ScopedProfile
struct ScopedProfile {
ScopedProfile(std::uintptr_t key, Context *context) : context(context) {
if (context->evaluation_context_.is_profile_query) {
stats = &context->evaluation_context_.operator_stats[key];
start_time = utils::ReadTSC();
stats->actual_hits++;
}
}
~ScopedProfile() {
if (context->evaluation_context_.is_profile_query) {
stats->elapsed_time += utils::ReadTSC() - start_time;
}
}
Context *context;
ProfilingStats *stats;
unsigned long long start_time;
};
//////////////////////////////////////////////////////////////////////////////
//
// Profile
void ProfileHelper(const NAryTree *tree, Context *context) {
ScopedProfile profile(reinterpret_cast<std::uintptr_t>(tree), context);
for (auto &child : tree->children) {
ProfileHelper(child.get(), context);
}
}
void Profile(const std::unique_ptr<NAryTree> &tree) {
Context context;
context.evaluation_context_.is_profile_query = true;
ProfileHelper(tree.get(), &context);
}
} // namespace map
//////////////////////////////////////////////////////////////////////////////
///
/// Tree variant
namespace tree {
//////////////////////////////////////////////////////////////////////////////
///
/// ProfilingStats
struct ProfilingStats {
ProfilingStats() : ProfilingStats(-1, 0) {}
ProfilingStats(int64_t actual_hits, unsigned long long elapsed_time)
: actual_hits(actual_hits),
elapsed_time(elapsed_time),
key(reinterpret_cast<std::uintptr_t>(nullptr)) {
// The logical operator with the most children has at most 3 children (but
// most of the logical operators have just 1 child)
children.reserve(3);
}
// We're starting from -1 to facilitate unconditional incrementing within
// `Pull` (which makes the code of the `Pull` easier to follow). The last
// increment (when `Pull` returns `false`) will be superfluous so compensate
// for it here.
int64_t actual_hits;
unsigned long long elapsed_time;
std::uintptr_t key;
std::vector<ProfilingStats> children;
};
//////////////////////////////////////////////////////////////////////////////
///
/// Context
struct EvaluationContext {
bool is_profile_query{true};
ProfilingStats stats;
ProfilingStats *stats_root{nullptr};
};
class Context {
public:
Context() = default;
Context(const Context &) = delete;
Context &operator=(const Context &) = delete;
Context(Context &&) = default;
Context &operator=(Context &&) = default;
EvaluationContext evaluation_context_;
};
//////////////////////////////////////////////////////////////////////////////
///
/// ScopedProfile
struct ScopedProfile {
ScopedProfile(std::uintptr_t key, Context *context) : context(context) {
if (context->evaluation_context_.is_profile_query) {
root = context->evaluation_context_.stats_root;
// Are we the root logical operator?
if (!root) {
stats = &context->evaluation_context_.stats;
} else {
stats = nullptr;
// Was this logical operator already hit on one of the previous pulls?
for (size_t i = 0; i < root->children.size(); ++i) {
if (root->children[i].key == key) {
stats = &root->children[i];
break;
}
}
if (!stats) {
root->children.emplace_back();
stats = &root->children.back();
}
}
context->evaluation_context_.stats_root = stats;
stats->actual_hits++;
stats->key = key;
start_time = utils::ReadTSC();
}
}
~ScopedProfile() {
if (context->evaluation_context_.is_profile_query) {
stats->elapsed_time += utils::ReadTSC() - start_time;
// Restore the old root ("pop")
context->evaluation_context_.stats_root = root;
}
}
Context *context;
ProfilingStats *root, *stats;
unsigned long long start_time;
};
//////////////////////////////////////////////////////////////////////////////
//
// Profile
void ProfileHelper(const NAryTree *tree, Context *context) {
ScopedProfile profile(reinterpret_cast<std::uintptr_t>(tree), context);
for (auto &child : tree->children) {
ProfileHelper(child.get(), context);
}
}
void Profile(const std::unique_ptr<NAryTree> &tree) {
Context context;
context.evaluation_context_.is_profile_query = true;
ProfileHelper(tree.get(), &context);
}
} // namespace tree
//////////////////////////////////////////////////////////////////////////////
///
/// Tree + No reserve variant
namespace tree_no_reserve {
//////////////////////////////////////////////////////////////////////////////
///
/// ProfilingStats
struct ProfilingStats {
// We're starting from -1 to facilitate unconditional incrementing within
// `Pull` (which makes the code of the `Pull` easier to follow). The last
// increment (when `Pull` returns `false`) will be superfluous so compensate
// for it here.
int64_t actual_hits{-1};
unsigned long long elapsed_time{0};
std::uintptr_t key{reinterpret_cast<std::uintptr_t>(nullptr)};
std::vector<ProfilingStats> children;
};
//////////////////////////////////////////////////////////////////////////////
///
/// Context
struct EvaluationContext {
bool is_profile_query{true};
ProfilingStats stats;
ProfilingStats *stats_root{nullptr};
};
class Context {
public:
Context() = default;
Context(const Context &) = delete;
Context &operator=(const Context &) = delete;
Context(Context &&) = default;
Context &operator=(Context &&) = default;
EvaluationContext evaluation_context_;
};
//////////////////////////////////////////////////////////////////////////////
///
/// ScopedProfile
struct ScopedProfile {
ScopedProfile(std::uintptr_t key, Context *context) : context(context) {
if (context->evaluation_context_.is_profile_query) {
root = context->evaluation_context_.stats_root;
// Are we the root logical operator?
if (!root) {
stats = &context->evaluation_context_.stats;
} else {
stats = nullptr;
// Was this logical operator already hit on one of the previous pulls?
for (size_t i = 0; i < root->children.size(); ++i) {
if (root->children[i].key == key) {
stats = &root->children[i];
break;
}
}
if (!stats) {
root->children.emplace_back();
stats = &root->children.back();
}
}
context->evaluation_context_.stats_root = stats;
stats->actual_hits++;
stats->key = key;
start_time = utils::ReadTSC();
}
}
~ScopedProfile() {
if (context->evaluation_context_.is_profile_query) {
stats->elapsed_time += utils::ReadTSC() - start_time;
// Restore the old root ("pop")
context->evaluation_context_.stats_root = root;
}
}
Context *context;
ProfilingStats *root, *stats;
unsigned long long start_time;
};
//////////////////////////////////////////////////////////////////////////////
//
// Profile
void ProfileHelper(const NAryTree *tree, Context *context) {
ScopedProfile profile(reinterpret_cast<std::uintptr_t>(tree), context);
for (auto &child : tree->children) {
ProfileHelper(child.get(), context);
}
}
void Profile(const std::unique_ptr<NAryTree> &tree) {
Context context;
context.evaluation_context_.is_profile_query = true;
ProfileHelper(tree.get(), &context);
}
} // namespace tree
//////////////////////////////////////////////////////////////////////////////
///
/// Tree + Storage variant
namespace tree_storage {
//////////////////////////////////////////////////////////////////////////////
//
// ProfilingStats
constexpr size_t kMaxProfilingStatsChildren = 3;
struct ProfilingStatsStorage;
struct ProfilingStats {
ProfilingStats();
ProfilingStats(int64_t actual_hits, unsigned long long elapsed_time);
size_t EnsureChild(const ProfilingStatsStorage &storage, std::uintptr_t key);
// We're starting from -1 to facilitate unconditional incrementing within
// `Pull` (which makes the code of the `Pull` easier to follow). The last
// increment (when `Pull` returns `false`) will be superfluous so compensate
// for it here.
int64_t actual_hits;
unsigned long long elapsed_time;
std::uintptr_t key;
std::array<int64_t, kMaxProfilingStatsChildren> children;
};
//////////////////////////////////////////////////////////////////////////////
///
/// ProfilingStatsStorage
struct ProfilingStatsStorage {
int64_t Create();
bool Empty();
ProfilingStats &Root();
const ProfilingStats &Root() const;
ProfilingStats *at(int64_t index);
const ProfilingStats *at(int64_t index) const;
ProfilingStats *operator[](int64_t index);
const ProfilingStats *operator[](int64_t index) const;
std::vector<ProfilingStats> storage;
};
int64_t ProfilingStatsStorage::Create() {
storage.emplace_back();
return storage.size() - 1;
}
bool ProfilingStatsStorage::Empty() { return storage.empty(); }
ProfilingStats &ProfilingStatsStorage::Root() {
CHECK(!storage.empty());
return storage[0];
}
const ProfilingStats &ProfilingStatsStorage::Root() const {
CHECK(!storage.empty());
return storage[0];
}
ProfilingStats *ProfilingStatsStorage::at(int64_t index) {
CHECK(0 <= index);
CHECK(index < storage.size());
return &storage[index];
}
const ProfilingStats *ProfilingStatsStorage::at(int64_t index) const {
CHECK(0 <= index);
CHECK(index < storage.size());
return &storage[index];
}
ProfilingStats *ProfilingStatsStorage::operator[](int64_t index) {
return at(index);
}
const ProfilingStats *ProfilingStatsStorage::operator[](int64_t index) const {
return at(index);
}
//////////////////////////////////////////////////////////////////////////////
//
// ProfilingStats
ProfilingStats::ProfilingStats() : ProfilingStats(-1, 0) {}
ProfilingStats::ProfilingStats(int64_t actual_hits,
unsigned long long elapsed_time)
: actual_hits(actual_hits),
elapsed_time(elapsed_time),
key(reinterpret_cast<std::uintptr_t>(nullptr)),
children{} {
for (auto &child : children) {
child = -1;
}
}
size_t ProfilingStats::EnsureChild(const ProfilingStatsStorage &storage,
std::uintptr_t key) {
size_t size = children.size();
size_t slot = size;
for (size_t i = 0; i < size; ++i) {
if (children[i] == -1 || storage[children[i]]->key == key) {
slot = i;
break;
}
}
// Either we found an instance of `ProfilingStats` that was creater earlier
// and corresponds to the logical operator `op` or we're seeing this
// operator for the first time and have found a free slot for its
// `ProfilingStats` instance.
CHECK(slot < size);
return slot;
}
//////////////////////////////////////////////////////////////////////////////
///
/// Context
struct EvaluationContext {
bool is_profile_query{true};
ProfilingStatsStorage stats_storage;
int64_t stats_root{-1};
};
class Context {
public:
Context() = default;
Context(const Context &) = delete;
Context &operator=(const Context &) = delete;
Context(Context &&) = default;
Context &operator=(Context &&) = default;
EvaluationContext evaluation_context_;
};
//////////////////////////////////////////////////////////////////////////////
//
// ScopedProfile
struct ScopedProfile {
ScopedProfile(std::uintptr_t key, Context *context)
: context(context), old_root_id(-1), stats_id(-1) {
if (context->evaluation_context_.is_profile_query) {
stats_id = EnsureStats(context->evaluation_context_.stats_root, key);
auto &storage = context->evaluation_context_.stats_storage;
ProfilingStats *stats = storage[stats_id];
CHECK(stats);
stats->actual_hits++;
start_time = utils::ReadTSC();
stats->key = key;
// Set the new root ("push")
old_root_id = context->evaluation_context_.stats_root;
context->evaluation_context_.stats_root = stats_id;
}
}
int64_t EnsureStats(int64_t root_id, std::uintptr_t key) {
auto &storage = context->evaluation_context_.stats_storage;
if (root_id == -1) {
if (storage.Empty()) {
return storage.Create();
}
return 0;
}
ProfilingStats *root = storage[root_id];
size_t slot = root->EnsureChild(storage, key);
int64_t stats_id = root->children[slot];
if (stats_id == -1) {
stats_id = storage.Create();
// `Create` might have invalidated the `root` pointer so index `storage`
// explicitly again
storage[root_id]->children[slot] = stats_id;
}
CHECK(stats_id >= 0);
return stats_id;
}
~ScopedProfile() {
auto &storage = context->evaluation_context_.stats_storage;
ProfilingStats *stats = storage[stats_id];
CHECK(stats);
if (context->evaluation_context_.is_profile_query) {
stats->elapsed_time += utils::ReadTSC() - start_time;
// Restore the old root ("pop")
context->evaluation_context_.stats_root = old_root_id;
}
}
Context *context;
int64_t old_root_id, stats_id;
unsigned long long start_time;
};
//////////////////////////////////////////////////////////////////////////////
//
// Profile
void ProfileHelper(const NAryTree *tree, Context *context) {
ScopedProfile profile(reinterpret_cast<std::uintptr_t>(tree), context);
for (auto &child : tree->children) {
ProfileHelper(child.get(), context);
}
}
void Profile(const std::unique_ptr<NAryTree> &tree) {
Context context;
context.evaluation_context_.is_profile_query = true;
ProfileHelper(tree.get(), &context);
}
} // tree_storage
} // namespace
std::vector<std::unique_ptr<NAryTree>> GetTrees() {
std::vector<std::vector<int64_t>> flat_trees = {
// {-1, 1, -2, 1, -3, 1, -4, 0},
// {-1, 1, -2, 1, -3, 3, -4, 1, -5, 1, -6, 0, -7, 1, -8, 1, -6, 0, -6, 0},
{-1, 1, -2, 2, -3, 2, -4, 2, -5, 1, -6, 1, -7, 0, -7,
0, -7, 0, -3, 1, -4, 2, -5, 1, -6, 1, -7, 0, -7, 0}};
std::vector<std::unique_ptr<NAryTree>> trees;
for (auto &flattened : flat_trees) {
trees.emplace_back(LiftTree(flattened));
}
return trees;
}
void BM_PlanProfileMap(benchmark::State &state) {
auto trees = GetTrees();
while (state.KeepRunning()) {
for (auto &tree : trees) {
map::Profile(tree);
}
}
}
void BM_PlanProfileTree(benchmark::State &state) {
auto trees = GetTrees();
while (state.KeepRunning()) {
for (auto &tree : trees) {
tree::Profile(tree);
}
}
}
void BM_PlanProfileTreeNoReserve(benchmark::State &state) {
auto trees = GetTrees();
while (state.KeepRunning()) {
for (auto &tree : trees) {
tree_no_reserve::Profile(tree);
}
}
}
void BM_PlanProfileTreeAndStorage(benchmark::State &state) {
auto trees = GetTrees();
while (state.KeepRunning()) {
for (auto &tree : trees) {
tree_storage::Profile(tree);
}
}
}
BENCHMARK(BM_PlanProfileMap);
BENCHMARK(BM_PlanProfileTree);
BENCHMARK(BM_PlanProfileTreeNoReserve);
BENCHMARK(BM_PlanProfileTreeAndStorage);
BENCHMARK_MAIN();

View File

@ -2421,4 +2421,35 @@ TYPED_TEST(CypherMainVisitorTest, TestExplainStreamQuery) {
SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileRegularQuery) {
{
TypeParam ast_generator("PROFILE RETURN n");
EXPECT_TRUE(dynamic_cast<ProfileQuery *>(ast_generator.query_));
}
}
TYPED_TEST(CypherMainVisitorTest, TestProfileComplicatedQuery) {
{
TypeParam ast_generator(
"profile optional match (n) where n.hello = 5 "
"return n union optional match (n) where n.there = 10 "
"return n");
EXPECT_TRUE(dynamic_cast<ProfileQuery *>(ast_generator.query_));
}
}
TYPED_TEST(CypherMainVisitorTest, TestProfileProfileQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE PROFILE RETURN n"),
SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileAuthQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE SHOW ROLES"), SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE SHOW STREAMS"),
SyntaxException);
}
} // namespace

View File

@ -2428,4 +2428,35 @@ TYPED_TEST(CypherMainVisitorTest, TestExplainStreamQuery) {
SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileRegularQuery) {
{
TypeParam ast_generator("PROFILE RETURN n");
EXPECT_TRUE(dynamic_cast<ProfileQuery *>(ast_generator.query_));
}
}
TYPED_TEST(CypherMainVisitorTest, TestProfileComplicatedQuery) {
{
TypeParam ast_generator(
"profile optional match (n) where n.hello = 5 "
"return n union optional match (n) where n.there = 10 "
"return n");
EXPECT_TRUE(dynamic_cast<ProfileQuery *>(ast_generator.query_));
}
}
TYPED_TEST(CypherMainVisitorTest, TestProfileProfileQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE PROFILE RETURN n"),
SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileAuthQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE SHOW ROLES"), SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
EXPECT_THROW(TypeParam ast_generator("PROFILE SHOW STREAMS"),
SyntaxException);
}
} // namespace