diff --git a/src/query/plan/rule_based_planner.cpp b/src/query/plan/rule_based_planner.cpp index 7945913ca..67413f746 100644 --- a/src/query/plan/rule_based_planner.cpp +++ b/src/query/plan/rule_based_planner.cpp @@ -7,6 +7,14 @@ #include "utils/algorithm.hpp" #include "utils/exceptions.hpp" +#include "utils/flag_validation.hpp" + +DEFINE_VALIDATED_int64(query_vertex_count_to_expand_existing, 10, + "Maximum count of indexed vertices which provoke " + "indexed lookup and then expand to existing, instead of " + "a regular expand. Default is 10, to turn off use -1.", + FLAG_IN_RANGE(-1, + std::numeric_limits::max())); namespace query::plan { diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 82fb3a5b7..ae78f449c 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -1,9 +1,13 @@ /// @file #pragma once +#include "gflags/gflags.h" + #include "query/frontend/ast/ast.hpp" #include "query/plan/operator.hpp" +DECLARE_int64(query_vertex_count_to_expand_existing); + namespace query::plan { /// Normalized representation of a pattern that needs to be matched. @@ -176,6 +180,7 @@ struct PlanningContext { // Contextual information used for generating match operators. struct MatchContext { + const Matching &matching; const SymbolTable &symbol_table; // Already bound symbols, which are used to determine whether the operator // should reference them or establish new. This is both read from and written @@ -258,11 +263,13 @@ class RuleBasedPlanner { // Set to true if a query command writes to the database. bool is_write = false; for (const auto &query_part : query_parts) { - MatchContext match_ctx{context.symbol_table, context.bound_symbols}; - input_op = PlanMatching(query_part.matching, input_op, match_ctx); + MatchContext match_ctx{query_part.matching, context.symbol_table, + context.bound_symbols}; + input_op = PlanMatching(match_ctx, input_op); for (const auto &matching : query_part.optional_matching) { - MatchContext opt_ctx{context.symbol_table, context.bound_symbols}; - auto *match_op = PlanMatching(matching, nullptr, opt_ctx); + MatchContext opt_ctx{matching, context.symbol_table, + context.bound_symbols}; + auto *match_op = PlanMatching(opt_ctx, nullptr); if (match_op) { input_op = new Optional(std::shared_ptr(input_op), std::shared_ptr(match_op), @@ -319,10 +326,10 @@ class RuleBasedPlanner { // Finds the label-property combination which has indexed the lowest amount of // vertices. `best_label` and `best_property` will be set to that combination - // and the function will return `true`. If the index cannot be found, the - // function will return `false` while leaving `best_label` and `best_property` - // unchanged. - bool FindBestLabelPropertyIndex( + // and the function will return (`true`, vertex count in index). If the index + // cannot be found, the function will return (`false`, maximum int64_t), while + // leaving `best_label` and `best_property` unchanged. + std::pair FindBestLabelPropertyIndex( const std::set &labels, const std::map> &property_filters, @@ -339,13 +346,12 @@ class RuleBasedPlanner { return true; }; bool found = false; - auto min_count = std::numeric_limits::max(); + int64_t min_count = std::numeric_limits::max(); for (const auto &label : labels) { for (const auto &prop_pair : property_filters) { const auto &property = prop_pair.first; if (context_.db.LabelPropertyIndexExists(label, property)) { - auto vertices_count = context_.db.VerticesCount(label, property); + int64_t vertices_count = context_.db.VerticesCount(label, property); if (vertices_count < min_count) { for (const auto &prop_filter : prop_pair.second) { if (prop_filter.used_symbols.find(symbol) != @@ -369,7 +375,7 @@ class RuleBasedPlanner { } } } - return found; + return {found, min_count}; } const GraphDbTypes::Label &FindBestLabelIndex( @@ -383,46 +389,71 @@ class RuleBasedPlanner { }); } - ScanAll *GenScanByIndex( - LogicalOperator *last_op, const Symbol &node_symbol, - const MatchContext &context, const std::set &labels, - const std::map> &properties) { - debug_assert(!labels.empty(), - "Without labels, indexed data cannot be scanned."); + // Creates a ScanAll by the best possible index for the `node_symbol`. Best + // index is defined as the index with least number of vertices. If the node + // does not have at least a label, no indexed lookup can be created and + // `nullptr` is returned. The operator is chained after `last_op`. Optional + // `max_vertex_count` controls, whether no operator should be created if the + // vertex count in the best index exceeds this number. In such a case, + // `nullptr` is returned and `last_op` is not chained. + ScanAll *GenScanByIndex(LogicalOperator *last_op, const Symbol &node_symbol, + const MatchContext &match_ctx, + const std::experimental::optional + &max_vertex_count = std::experimental::nullopt) { + const auto labels = FindOr(match_ctx.matching.filters.label_filters(), + node_symbol, std::set()) + .first; + if (labels.empty()) { + // Without labels, we cannot generated any indexed ScanAll. + return nullptr; + } + const auto properties = + FindOr(match_ctx.matching.filters.property_filters(), node_symbol, + std::map>()) + .first; // First, try to see if we can use label+property index. If not, use just // the label index (which ought to exist). GraphDbTypes::Label best_label; std::pair best_property; - if (FindBestLabelPropertyIndex(labels, properties, node_symbol, - context.bound_symbols, best_label, - best_property)) { + auto found_index = FindBestLabelPropertyIndex( + labels, properties, node_symbol, match_ctx.bound_symbols, best_label, + best_property); + if (found_index.first && + // Use label+property index if we satisfy max_vertex_count. + (!max_vertex_count || *max_vertex_count >= found_index.second)) { const auto &prop_filter = best_property.second; if (prop_filter.lower_bound || prop_filter.upper_bound) { return new ScanAllByLabelPropertyRange( std::shared_ptr(last_op), node_symbol, best_label, best_property.first, prop_filter.lower_bound, - prop_filter.upper_bound, context.graph_view); + prop_filter.upper_bound, match_ctx.graph_view); } else { debug_assert( prop_filter.expression, "Property filter should either have bounds or an expression."); return new ScanAllByLabelPropertyValue( std::shared_ptr(last_op), node_symbol, best_label, - best_property.first, prop_filter.expression, context.graph_view); + best_property.first, prop_filter.expression, match_ctx.graph_view); } } auto label = FindBestLabelIndex(labels); + if (max_vertex_count && + context_.db.VerticesCount(label) > *max_vertex_count) { + // Don't create an indexed lookup, since we have more labeled vertices + // than the allowed count. + return nullptr; + } return new ScanAllByLabel(std::shared_ptr(last_op), - node_symbol, label, context.graph_view); + node_symbol, label, match_ctx.graph_view); } - LogicalOperator *PlanMatching(const Matching &matching, - LogicalOperator *input_op, - MatchContext &match_context) { + LogicalOperator *PlanMatching(MatchContext &match_context, + LogicalOperator *input_op) { auto &bound_symbols = match_context.bound_symbols; auto &storage = context_.ast_storage; const auto &symbol_table = match_context.symbol_table; + const auto &matching = match_context.matching; // Copy all_filters, because we will modify the list as we generate Filters. auto all_filters = matching.filters.all_filters(); // Try to generate any filters even before the 1st match operator. This @@ -434,22 +465,15 @@ class RuleBasedPlanner { const auto &node1_symbol = symbol_table.at(*expansion.node1->identifier_); if (impl::BindSymbol(bound_symbols, node1_symbol)) { // We have just bound this symbol, so generate ScanAll which fills it. - auto labels = FindOr(matching.filters.label_filters(), node1_symbol, - std::set()) - .first; - if (labels.empty()) { - // Without labels, we can only generate ScanAll of everything. + if (auto *indexed_scan = + GenScanByIndex(last_op, node1_symbol, match_context)) { + // First, try to get an indexed scan. + last_op = indexed_scan; + } else { + // If indexed scan is not possible, we can only generate ScanAll of + // everything. last_op = new ScanAll(std::shared_ptr(last_op), node1_symbol, match_context.graph_view); - } else { - // With labels, we can scan indexed data. - auto properties = - FindOr(matching.filters.property_filters(), node1_symbol, - std::map>()) - .first; - last_op = GenScanByIndex(last_op, node1_symbol, match_context, labels, - properties); } match_context.new_symbols.emplace_back(node1_symbol); last_op = @@ -497,6 +521,22 @@ class RuleBasedPlanner { existing_node, existing_edge, match_context.graph_view, filter_expr); } else { + if (!existing_node) { + // Try to get better behaviour by creating an indexed scan and then + // expanding into existing, instead of letting the Expand iterate + // over all the edges. + // Currently, just use the maximum vertex count flag, below which we + // want to replace Expand with index ScanAll + Expand into existing. + // It would be better to somehow test whether the input vertex + // degree is larger than the destination vertex index count. + auto *indexed_scan = + GenScanByIndex(last_op, node_symbol, match_context, + FLAGS_query_vertex_count_to_expand_existing); + if (indexed_scan) { + last_op = indexed_scan; + existing_node = true; + } + } last_op = new Expand(node_symbol, edge_symbol, expansion.direction, std::shared_ptr(last_op), node1_symbol, existing_node, existing_edge, @@ -537,9 +577,9 @@ class RuleBasedPlanner { // Copy the bound symbol set, because we don't want to use the updated // version when generating the create part. std::unordered_set bound_symbols_copy(context_.bound_symbols); - MatchContext match_ctx{context_.symbol_table, bound_symbols_copy, + MatchContext match_ctx{matching, context_.symbol_table, bound_symbols_copy, GraphView::NEW}; - auto on_match = PlanMatching(matching, nullptr, match_ctx); + auto on_match = PlanMatching(match_ctx, nullptr); // Use the original bound_symbols, so we fill it with new symbols. auto on_create = impl::GenCreateForPattern(*merge.pattern_, nullptr, context_.symbol_table, diff --git a/tests/unit/query_planner.cpp b/tests/unit/query_planner.cpp index 598ffa516..3a689fc6f 100644 --- a/tests/unit/query_planner.cpp +++ b/tests/unit/query_planner.cpp @@ -1307,4 +1307,52 @@ TEST(TestLogicalPlanner, MatchBreadthFirst) { ExpectProduce()); } +TEST(TestLogicalPlanner, MatchDoubleScanToExpandExisting) { + // Test MATCH (n) -[r]- (m :label) RETURN r + Dbms dbms; + auto dba = dbms.active(); + auto label = dba->Label("label"); + dba = dbms.active(); + AstTreeStorage storage; + QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m", label))), RETURN("r")); + auto symbol_table = MakeSymbolTable(*storage.query()); + auto plan = MakeLogicalPlan(storage, symbol_table, *dba); + // We expect 2x ScanAll and then Expand, since we are guessing that is + // faster (due to low label index vertex count). + CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectScanAllByLabel(), + ExpectExpand(), ExpectFilter(), ExpectProduce()); +} + +TEST(TestLogicalPlanner, MatchScanToExpand) { + // Test MATCH (n) -[r]- (m :label {property: 1}) RETURN r + Dbms dbms; + auto dba = dbms.active(); + auto label = dba->Label("label"); + auto property = dba->Property("property"); + dba->BuildIndex(label, property); + dba = dbms.active(); + // Fill vertices to the max. + for (int64_t i = 0; i < FLAGS_query_vertex_count_to_expand_existing; ++i) { + auto vertex = dba->InsertVertex(); + vertex.PropsSet(property, 1); + vertex.add_label(label); + } + // Add one more above the max. + auto vertex = dba->InsertVertex(); + vertex.add_label(label); + vertex.PropsSet(property, 1); + dba->Commit(); + dba = dbms.active(); + AstTreeStorage storage; + auto node_m = NODE("m", label); + node_m->properties_[std::make_pair("property", property)] = LITERAL(1); + QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), node_m)), RETURN("r")); + auto symbol_table = MakeSymbolTable(*storage.query()); + auto plan = MakeLogicalPlan(storage, symbol_table, *dba); + // We expect 1x ScanAllByLabel and then Expand, since we are guessing that is + // faster (due to high label index vertex count). + CheckPlan(*plan, symbol_table, ExpectScanAll(), ExpectExpand(), + ExpectFilter(), ExpectProduce()); +} + } // namespace