Generate indexed ScanAll to make Expand existing

Summary: Test generating ScanAll before expand to use existing

Reviewers: florijan, mislav.bradac

Reviewed By: florijan

Subscribers: buda, lion, pullbot

Differential Revision: https://phabricator.memgraph.io/D746
This commit is contained in:
Teon Banek 2017-09-06 14:21:30 +02:00
parent 63966c15a6
commit c8d34c58ef
3 changed files with 140 additions and 44 deletions

View File

@ -7,6 +7,14 @@
#include "utils/algorithm.hpp"
#include "utils/exceptions.hpp"
#include "utils/flag_validation.hpp"
DEFINE_VALIDATED_int64(query_vertex_count_to_expand_existing, 10,
"Maximum count of indexed vertices which provoke "
"indexed lookup and then expand to existing, instead of "
"a regular expand. Default is 10, to turn off use -1.",
FLAG_IN_RANGE(-1,
std::numeric_limits<std::int64_t>::max()));
namespace query::plan {

View File

@ -1,9 +1,13 @@
/// @file
#pragma once
#include "gflags/gflags.h"
#include "query/frontend/ast/ast.hpp"
#include "query/plan/operator.hpp"
DECLARE_int64(query_vertex_count_to_expand_existing);
namespace query::plan {
/// Normalized representation of a pattern that needs to be matched.
@ -176,6 +180,7 @@ struct PlanningContext {
// Contextual information used for generating match operators.
struct MatchContext {
const Matching &matching;
const SymbolTable &symbol_table;
// Already bound symbols, which are used to determine whether the operator
// should reference them or establish new. This is both read from and written
@ -258,11 +263,13 @@ class RuleBasedPlanner {
// Set to true if a query command writes to the database.
bool is_write = false;
for (const auto &query_part : query_parts) {
MatchContext match_ctx{context.symbol_table, context.bound_symbols};
input_op = PlanMatching(query_part.matching, input_op, match_ctx);
MatchContext match_ctx{query_part.matching, context.symbol_table,
context.bound_symbols};
input_op = PlanMatching(match_ctx, input_op);
for (const auto &matching : query_part.optional_matching) {
MatchContext opt_ctx{context.symbol_table, context.bound_symbols};
auto *match_op = PlanMatching(matching, nullptr, opt_ctx);
MatchContext opt_ctx{matching, context.symbol_table,
context.bound_symbols};
auto *match_op = PlanMatching(opt_ctx, nullptr);
if (match_op) {
input_op = new Optional(std::shared_ptr<LogicalOperator>(input_op),
std::shared_ptr<LogicalOperator>(match_op),
@ -319,10 +326,10 @@ class RuleBasedPlanner {
// Finds the label-property combination which has indexed the lowest amount of
// vertices. `best_label` and `best_property` will be set to that combination
// and the function will return `true`. If the index cannot be found, the
// function will return `false` while leaving `best_label` and `best_property`
// unchanged.
bool FindBestLabelPropertyIndex(
// and the function will return (`true`, vertex count in index). If the index
// cannot be found, the function will return (`false`, maximum int64_t), while
// leaving `best_label` and `best_property` unchanged.
std::pair<bool, int64_t> FindBestLabelPropertyIndex(
const std::set<GraphDbTypes::Label> &labels,
const std::map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>> &property_filters,
@ -339,13 +346,12 @@ class RuleBasedPlanner {
return true;
};
bool found = false;
auto min_count = std::numeric_limits<decltype(context_.db.VerticesCount(
GraphDbTypes::Label{}, GraphDbTypes::Property{}))>::max();
int64_t min_count = std::numeric_limits<int64_t>::max();
for (const auto &label : labels) {
for (const auto &prop_pair : property_filters) {
const auto &property = prop_pair.first;
if (context_.db.LabelPropertyIndexExists(label, property)) {
auto vertices_count = context_.db.VerticesCount(label, property);
int64_t vertices_count = context_.db.VerticesCount(label, property);
if (vertices_count < min_count) {
for (const auto &prop_filter : prop_pair.second) {
if (prop_filter.used_symbols.find(symbol) !=
@ -369,7 +375,7 @@ class RuleBasedPlanner {
}
}
}
return found;
return {found, min_count};
}
const GraphDbTypes::Label &FindBestLabelIndex(
@ -383,46 +389,71 @@ class RuleBasedPlanner {
});
}
ScanAll *GenScanByIndex(
LogicalOperator *last_op, const Symbol &node_symbol,
const MatchContext &context, const std::set<GraphDbTypes::Label> &labels,
const std::map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>> &properties) {
debug_assert(!labels.empty(),
"Without labels, indexed data cannot be scanned.");
// Creates a ScanAll by the best possible index for the `node_symbol`. Best
// index is defined as the index with least number of vertices. If the node
// does not have at least a label, no indexed lookup can be created and
// `nullptr` is returned. The operator is chained after `last_op`. Optional
// `max_vertex_count` controls, whether no operator should be created if the
// vertex count in the best index exceeds this number. In such a case,
// `nullptr` is returned and `last_op` is not chained.
ScanAll *GenScanByIndex(LogicalOperator *last_op, const Symbol &node_symbol,
const MatchContext &match_ctx,
const std::experimental::optional<int64_t>
&max_vertex_count = std::experimental::nullopt) {
const auto labels = FindOr(match_ctx.matching.filters.label_filters(),
node_symbol, std::set<GraphDbTypes::Label>())
.first;
if (labels.empty()) {
// Without labels, we cannot generated any indexed ScanAll.
return nullptr;
}
const auto properties =
FindOr(match_ctx.matching.filters.property_filters(), node_symbol,
std::map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>>())
.first;
// First, try to see if we can use label+property index. If not, use just
// the label index (which ought to exist).
GraphDbTypes::Label best_label;
std::pair<GraphDbTypes::Property, Filters::PropertyFilter> best_property;
if (FindBestLabelPropertyIndex(labels, properties, node_symbol,
context.bound_symbols, best_label,
best_property)) {
auto found_index = FindBestLabelPropertyIndex(
labels, properties, node_symbol, match_ctx.bound_symbols, best_label,
best_property);
if (found_index.first &&
// Use label+property index if we satisfy max_vertex_count.
(!max_vertex_count || *max_vertex_count >= found_index.second)) {
const auto &prop_filter = best_property.second;
if (prop_filter.lower_bound || prop_filter.upper_bound) {
return new ScanAllByLabelPropertyRange(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.lower_bound,
prop_filter.upper_bound, context.graph_view);
prop_filter.upper_bound, match_ctx.graph_view);
} else {
debug_assert(
prop_filter.expression,
"Property filter should either have bounds or an expression.");
return new ScanAllByLabelPropertyValue(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.expression, context.graph_view);
best_property.first, prop_filter.expression, match_ctx.graph_view);
}
}
auto label = FindBestLabelIndex(labels);
if (max_vertex_count &&
context_.db.VerticesCount(label) > *max_vertex_count) {
// Don't create an indexed lookup, since we have more labeled vertices
// than the allowed count.
return nullptr;
}
return new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
node_symbol, label, context.graph_view);
node_symbol, label, match_ctx.graph_view);
}
LogicalOperator *PlanMatching(const Matching &matching,
LogicalOperator *input_op,
MatchContext &match_context) {
LogicalOperator *PlanMatching(MatchContext &match_context,
LogicalOperator *input_op) {
auto &bound_symbols = match_context.bound_symbols;
auto &storage = context_.ast_storage;
const auto &symbol_table = match_context.symbol_table;
const auto &matching = match_context.matching;
// Copy all_filters, because we will modify the list as we generate Filters.
auto all_filters = matching.filters.all_filters();
// Try to generate any filters even before the 1st match operator. This
@ -434,22 +465,15 @@ class RuleBasedPlanner {
const auto &node1_symbol = symbol_table.at(*expansion.node1->identifier_);
if (impl::BindSymbol(bound_symbols, node1_symbol)) {
// We have just bound this symbol, so generate ScanAll which fills it.
auto labels = FindOr(matching.filters.label_filters(), node1_symbol,
std::set<GraphDbTypes::Label>())
.first;
if (labels.empty()) {
// Without labels, we can only generate ScanAll of everything.
if (auto *indexed_scan =
GenScanByIndex(last_op, node1_symbol, match_context)) {
// First, try to get an indexed scan.
last_op = indexed_scan;
} else {
// If indexed scan is not possible, we can only generate ScanAll of
// everything.
last_op = new ScanAll(std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, match_context.graph_view);
} else {
// With labels, we can scan indexed data.
auto properties =
FindOr(matching.filters.property_filters(), node1_symbol,
std::map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>>())
.first;
last_op = GenScanByIndex(last_op, node1_symbol, match_context, labels,
properties);
}
match_context.new_symbols.emplace_back(node1_symbol);
last_op =
@ -497,6 +521,22 @@ class RuleBasedPlanner {
existing_node, existing_edge, match_context.graph_view,
filter_expr);
} else {
if (!existing_node) {
// Try to get better behaviour by creating an indexed scan and then
// expanding into existing, instead of letting the Expand iterate
// over all the edges.
// Currently, just use the maximum vertex count flag, below which we
// want to replace Expand with index ScanAll + Expand into existing.
// It would be better to somehow test whether the input vertex
// degree is larger than the destination vertex index count.
auto *indexed_scan =
GenScanByIndex(last_op, node_symbol, match_context,
FLAGS_query_vertex_count_to_expand_existing);
if (indexed_scan) {
last_op = indexed_scan;
existing_node = true;
}
}
last_op = new Expand(node_symbol, edge_symbol, expansion.direction,
std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, existing_node, existing_edge,
@ -537,9 +577,9 @@ class RuleBasedPlanner {
// Copy the bound symbol set, because we don't want to use the updated
// version when generating the create part.
std::unordered_set<Symbol> bound_symbols_copy(context_.bound_symbols);
MatchContext match_ctx{context_.symbol_table, bound_symbols_copy,
MatchContext match_ctx{matching, context_.symbol_table, bound_symbols_copy,
GraphView::NEW};
auto on_match = PlanMatching(matching, nullptr, match_ctx);
auto on_match = PlanMatching(match_ctx, nullptr);
// Use the original bound_symbols, so we fill it with new symbols.
auto on_create = impl::GenCreateForPattern(*merge.pattern_, nullptr,
context_.symbol_table,

View File

@ -1307,4 +1307,52 @@ TEST(TestLogicalPlanner, MatchBreadthFirst) {
ExpectProduce());
}
TEST(TestLogicalPlanner, MatchDoubleScanToExpandExisting) {
// Test MATCH (n) -[r]- (m :label) RETURN r
Dbms dbms;
auto dba = dbms.active();
auto label = dba->Label("label");
dba = dbms.active();
AstTreeStorage storage;
QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m", label))), RETURN("r"));
auto symbol_table = MakeSymbolTable(*storage.query());
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
// We expect 2x ScanAll and then Expand, since we are guessing that is
// faster (due to low label index vertex count).
CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectScanAllByLabel(),
ExpectExpand(), ExpectFilter(), ExpectProduce());
}
TEST(TestLogicalPlanner, MatchScanToExpand) {
// Test MATCH (n) -[r]- (m :label {property: 1}) RETURN r
Dbms dbms;
auto dba = dbms.active();
auto label = dba->Label("label");
auto property = dba->Property("property");
dba->BuildIndex(label, property);
dba = dbms.active();
// Fill vertices to the max.
for (int64_t i = 0; i < FLAGS_query_vertex_count_to_expand_existing; ++i) {
auto vertex = dba->InsertVertex();
vertex.PropsSet(property, 1);
vertex.add_label(label);
}
// Add one more above the max.
auto vertex = dba->InsertVertex();
vertex.add_label(label);
vertex.PropsSet(property, 1);
dba->Commit();
dba = dbms.active();
AstTreeStorage storage;
auto node_m = NODE("m", label);
node_m->properties_[std::make_pair("property", property)] = LITERAL(1);
QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), node_m)), RETURN("r"));
auto symbol_table = MakeSymbolTable(*storage.query());
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
// We expect 1x ScanAllByLabel and then Expand, since we are guessing that is
// faster (due to high label index vertex count).
CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectExpand(),
ExpectFilter(), ExpectProduce());
}
} // namespace