Use the label index with fewest vertices for ScanAll

Reviewers: florijan, mislav.bradac

Reviewed By: florijan, mislav.bradac

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D415
This commit is contained in:
Teon Banek 2017-05-30 11:53:48 +02:00
parent d07eff9f43
commit 1356fd5391
3 changed files with 58 additions and 24 deletions

View File

@ -40,7 +40,7 @@ void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
// high level tree -> logical plan
auto logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
visitor.storage(), symbol_table);
visitor.storage(), symbol_table, &db_accessor);
// generate frame based on symbol table max_position
Frame frame(symbol_table.max_position());

View File

@ -645,10 +645,28 @@ void AddMatching(const Match &match, const SymbolTable &symbol_table,
matching);
}
const GraphDbTypes::Label &FindBestLabelIndex(
const std::vector<GraphDbTypes::Label> &labels, const GraphDbAccessor *db) {
debug_assert(!labels.empty(),
"Trying to find the best label without any labels.");
if (!db) {
// We don't have a database to get index information, so just take the first
// label.
return labels.front();
}
return *std::min_element(labels.begin(), labels.end(),
[db](const auto &label1, const auto &label2) {
return db->vertices_count(label1) <
db->vertices_count(label2);
});
}
LogicalOperator *PlanMatching(const Matching &matching,
LogicalOperator *input_op,
AstTreeStorage &storage, MatchContext &context) {
PlanningContext &planning_ctx,
MatchContext &context) {
auto &bound_symbols = context.bound_symbols;
auto &storage = planning_ctx.ast_storage;
const auto &symbol_table = context.symbol_table;
// Copy filters, because we will modify the list as we generate Filters.
auto filters = matching.filters;
@ -665,10 +683,9 @@ LogicalOperator *PlanMatching(const Matching &matching,
last_op = new ScanAll(std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, context.graph_view);
} else {
// Don't act smart by selecting the best label index, so take the first.
auto label = FindBestLabelIndex(labels, planning_ctx.db);
last_op = new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, labels.front(),
context.graph_view);
node1_symbol, label, context.graph_view);
}
context.new_symbols.emplace_back(node1_symbol);
last_op = GenFilters(last_op, bound_symbols, filters, storage);
@ -732,8 +749,7 @@ auto GenMerge(query::Merge &merge, LogicalOperator *input_op,
std::unordered_set<Symbol> bound_symbols_copy(context.bound_symbols);
MatchContext match_ctx{context.symbol_table, bound_symbols_copy,
GraphView::NEW};
auto on_match =
PlanMatching(matching, nullptr, context.ast_storage, match_ctx);
auto on_match = PlanMatching(matching, nullptr, context, match_ctx);
// Use the original bound_symbols, so we fill it with new symbols.
auto on_create = GenCreateForPattern(
*merge.pattern_, nullptr, context.symbol_table, context.bound_symbols);
@ -798,12 +814,10 @@ std::unique_ptr<LogicalOperator> RuleBasedPlanner::Plan(
bool is_write = false;
for (const auto &query_part : query_parts) {
MatchContext match_ctx{context.symbol_table, context.bound_symbols};
input_op = PlanMatching(query_part.matching, input_op, context.ast_storage,
match_ctx);
input_op = PlanMatching(query_part.matching, input_op, context, match_ctx);
for (const auto &matching : query_part.optional_matching) {
MatchContext opt_ctx{context.symbol_table, context.bound_symbols};
auto *match_op =
PlanMatching(matching, nullptr, context.ast_storage, opt_ctx);
auto *match_op = PlanMatching(matching, nullptr, context, opt_ctx);
if (match_op) {
input_op = new Optional(std::shared_ptr<LogicalOperator>(input_op),
std::shared_ptr<LogicalOperator>(match_op),

View File

@ -1,3 +1,4 @@
/// @file
#pragma once
#include <memory>
@ -68,14 +69,27 @@ struct QueryPart {
std::vector<Clause *> remaining_clauses;
};
// Context which contains variables commonly used during planning.
/// @brief Context which contains variables commonly used during planning.
struct PlanningContext {
/// @brief SymbolTable is used to determine inputs and outputs of planned
/// operators.
///
/// Newly created AST nodes may be added to reference existing symbols.
SymbolTable &symbol_table;
/// @brief The storage is used to traverse the AST as well as create new nodes
/// for use in operators.
AstTreeStorage &ast_storage;
// bound_symbols set is used to differentiate cycles in pattern matching, so
// that the operator can be correctly initialized whether to read the symbol
// or write it. E.g. `MATCH (n) -[r]- (n)` would bind (and write) the first
// `n`, but the latter `n` would only read the already written information.
/// @brief Optional GraphDbAccessor, which may be used to get some information
/// from the database to generate better plans. The accessor is required only
/// to live long enough for the plan generation to finish.
const GraphDbAccessor *db = nullptr;
/// @brief Symbol set is used to differentiate cycles in pattern matching.
///
/// During planning, symbols will be added as each operator produces values
/// for them. This way, the operator can be correctly initialized whether to
/// read a symbol or write it. E.g. `MATCH (n) -[r]- (n)` would bind (and
/// write) the first `n`, but the latter `n` would only read the already
/// written information.
std::unordered_set<Symbol> bound_symbols;
};
@ -92,17 +106,23 @@ class RuleBasedPlanner {
std::vector<QueryPart> CollectQueryParts(const SymbolTable &, AstTreeStorage &);
/// @brief Generates the LogicalOperator tree and returns the root operation.
/// @brief Generates the LogicalOperator tree and returns the resulting plan.
///
/// The tree is constructed by traversing the @c Query node from given
/// @c AstTreeStorage. The storage may also be used to create new AST nodes for
/// use in operators. @c SymbolTable is used to determine inputs and outputs of
/// certain operators.
/// @tparam TPlanner Type of the planner used for generation.
/// @param storage AstTreeStorage used to construct the operator tree by
/// traversing the @c Query node. The storage may also be used to create new
/// AST nodes for use in operators.
/// @param symbol_table SymbolTable used to determine inputs and outputs of
/// certain operators. Newly created AST nodes may be added to this symbol
/// table.
/// @param db Optional @c GraphDbAccessor, which is used to query database
/// information in order to improve generated plans.
template <class TPlanner>
typename TPlanner::PlanResult MakeLogicalPlan(AstTreeStorage &storage,
SymbolTable &symbol_table) {
typename TPlanner::PlanResult MakeLogicalPlan(
AstTreeStorage &storage, SymbolTable &symbol_table,
const GraphDbAccessor *db = nullptr) {
auto query_parts = CollectQueryParts(symbol_table, storage);
PlanningContext context{symbol_table, storage};
PlanningContext context{symbol_table, storage, db};
return TPlanner(context).Plan(query_parts);
}