diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index 8d934cb02..91bf34354 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -35,7 +35,7 @@ class PostProcessor final { std::unique_ptr Rewrite( std::unique_ptr plan, TPlanningContext *context) { return RewriteWithIndexLookup(std::move(plan), *context->symbol_table, - context->db); + context->ast_storage, context->db); } template diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index e661a5925..b7b4c58d4 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -153,8 +153,9 @@ auto SplitExpressionOnAnd(Expression *expression) { PropertyFilter::PropertyFilter(const SymbolTable &symbol_table, const Symbol &symbol, PropertyIx property, - Expression *value) - : symbol_(symbol), property_(property), value_(value) { + Expression *value, Type type) + : symbol_(symbol), property_(property), type_(type), value_(value) { + CHECK(type != Type::RANGE); UsedSymbolsCollector collector(symbol_table); value->Accept(collector); is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol); @@ -166,6 +167,7 @@ PropertyFilter::PropertyFilter( const std::experimental::optional &upper_bound) : symbol_(symbol), property_(property), + type_(Type::RANGE), lower_bound_(lower_bound), upper_bound_(upper_bound) { UsedSymbolsCollector collector(symbol_table); @@ -288,7 +290,8 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table, collector.symbols_}; // Store a PropertyFilter on the value of the property. filter_info.property_filter.emplace(symbol_table, symbol, prop_pair.first, - prop_pair.second); + prop_pair.second, + PropertyFilter::Type::EQUAL); all_filters_.emplace_back(filter_info); } }; @@ -354,9 +357,23 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr, Identifier *ident = nullptr; if (get_property_lookup(maybe_lookup, prop_lookup, ident)) { auto filter = make_filter(FilterInfo::Type::Property); - filter.property_filter = - PropertyFilter(symbol_table, symbol_table.at(*ident), - prop_lookup->property_, val_expr); + filter.property_filter = PropertyFilter( + symbol_table, symbol_table.at(*ident), prop_lookup->property_, + val_expr, PropertyFilter::Type::EQUAL); + all_filters_.emplace_back(filter); + return true; + } + return false; + }; + // Like add_prop_equal, but for adding regex match property filter. + auto add_prop_regex_match = [&](auto *maybe_lookup, auto *val_expr) -> bool { + PropertyLookup *prop_lookup = nullptr; + Identifier *ident = nullptr; + if (get_property_lookup(maybe_lookup, prop_lookup, ident)) { + auto filter = make_filter(FilterInfo::Type::Property); + filter.property_filter = PropertyFilter( + symbol_table, symbol_table.at(*ident), prop_lookup->property_, + val_expr, PropertyFilter::Type::REGEX_MATCH); all_filters_.emplace_back(filter); return true; } @@ -422,6 +439,10 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr, // No PropertyFilter was added, so just store a generic filter. all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic)); } + } else if (auto *regex_match = utils::Downcast(expr)) { + if (!add_prop_regex_match(regex_match->string_expr_, regex_match->regex_)) { + all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic)); + } } else if (auto *gt = utils::Downcast(expr)) { if (!add_prop_greater(gt->expression1_, gt->expression2_, Bound::Type::EXCLUSIVE)) { diff --git a/src/query/plan/preprocess.hpp b/src/query/plan/preprocess.hpp index 5b852f77d..e0e0d2a46 100644 --- a/src/query/plan/preprocess.hpp +++ b/src/query/plan/preprocess.hpp @@ -80,7 +80,14 @@ class PropertyFilter { public: using Bound = ScanAllByLabelPropertyRange::Bound; - PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *); + /// Depending on type, this PropertyFilter may be a value equality, regex + /// matched value or a range with lower and (or) upper bounds. + enum class Type { EQUAL, REGEX_MATCH, RANGE }; + + /// Construct with Expression being the equality or regex match check. + PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *, + Type); + /// Construct the range based filter. PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, const std::experimental::optional &, const std::experimental::optional &); @@ -88,10 +95,11 @@ class PropertyFilter { /// Symbol whose property is looked up. Symbol symbol_; PropertyIx property_; + Type type_; /// True if the same symbol is used in expressions for value or bounds. bool is_symbol_in_value_ = false; /// Expression which when evaluated produces the value a property must - /// equal. + /// equal or regex match depending on type_. Expression *value_ = nullptr; /// Expressions which produce lower and upper bounds for a property. std::experimental::optional lower_bound_{}; diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 78b4c96e4..e55cc6e49 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -31,8 +31,9 @@ Expression *RemoveAndExpressions( template class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { public: - IndexLookupRewriter(const SymbolTable *symbol_table, TDbAccessor *db) - : symbol_table_(symbol_table), db_(db) {} + IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage, + TDbAccessor *db) + : symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {} using HierarchicalLogicalOperatorVisitor::PostVisit; using HierarchicalLogicalOperatorVisitor::PreVisit; @@ -383,6 +384,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { private: const SymbolTable *symbol_table_; + AstStorage *ast_storage_; TDbAccessor *db_; Filters filters_; std::unordered_set filter_exprs_for_removal_; @@ -410,7 +412,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { } void RewriteBranch(std::shared_ptr *branch) { - IndexLookupRewriter rewriter(symbol_table_, db_); + IndexLookupRewriter rewriter(symbol_table_, ast_storage_, db_); (*branch)->Accept(rewriter); if (rewriter.new_root_) { *branch = rewriter.new_root_; @@ -448,24 +450,37 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { std::experimental::optional found; for (const auto &label : filters_.FilteredLabels(symbol)) { for (const auto &filter : filters_.PropertyFilters(symbol)) { + if (filter.property_filter->is_symbol_in_value_ || + !are_bound(filter.used_symbols)) { + // Skip filter expressions which use the symbol whose property we are + // looking up or aren't bound. We cannot scan by such expressions. For + // example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we + // cannot scan `n` by property index. + continue; + } const auto &property = filter.property_filter->property_; - if (db_->LabelPropertyIndexExists(GetLabel(label), - GetProperty(property))) { - int64_t vertex_count = - db_->VerticesCount(GetLabel(label), GetProperty(property)); - if (!found || vertex_count < found->vertex_count) { - if (filter.property_filter->is_symbol_in_value_) { - // Skip filter expressions which use the symbol whose property - // we are looking up. We cannot scan by such expressions. For - // example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, - // so we cannot scan `n` by property index. - continue; - } - if (are_bound(filter.used_symbols)) { - // Take the property filter which uses bound symbols. - found = LabelPropertyIndex{label, filter, vertex_count}; - } - } + if (!db_->LabelPropertyIndexExists(GetLabel(label), + GetProperty(property))) { + continue; + } + int64_t vertex_count = + db_->VerticesCount(GetLabel(label), GetProperty(property)); + auto is_better_type = [&found](PropertyFilter::Type type) { + // Order the types by the most preferred index lookup type. + static const PropertyFilter::Type kFilterTypeOrder[] = { + PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, + PropertyFilter::Type::REGEX_MATCH}; + auto *found_sort_ix = + std::find(kFilterTypeOrder, kFilterTypeOrder + 3, + found->filter.property_filter->type_); + auto *type_sort_ix = + std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type); + return type_sort_ix < found_sort_ix; + }; + if (!found || vertex_count < found->vertex_count || + (vertex_count == found->vertex_count && + is_better_type(filter.property_filter->type_))) { + found = LabelPropertyIndex{label, filter, vertex_count}; } } } @@ -502,7 +517,12 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { (!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) { // Copy the property filter and then erase it from filters. const auto prop_filter = *found_index->filter.property_filter; - filter_exprs_for_removal_.insert(found_index->filter.expression); + if (prop_filter.type_ != PropertyFilter::Type::REGEX_MATCH) { + // Remove the original expression from Filter operation only if it's not + // a regex match. In such a case we need to perform the matching even + // after we've scanned the index. + filter_exprs_for_removal_.insert(found_index->filter.expression); + } filters_.EraseFilter(found_index->filter); std::vector removed_expressions; filters_.EraseLabelFilter(node_symbol, found_index->label, @@ -514,6 +534,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { input, node_symbol, GetLabel(found_index->label), GetProperty(prop_filter.property_), prop_filter.property_.name, prop_filter.lower_bound_, prop_filter.upper_bound_, graph_view); + } else if (prop_filter.type_ == PropertyFilter::Type::REGEX_MATCH) { + // Generate index scan using the empty string as a lower bound. + Expression *empty_string = ast_storage_->Create(""); + auto lower_bound = utils::MakeBoundInclusive(empty_string); + return std::make_unique( + input, node_symbol, GetLabel(found_index->label), + GetProperty(prop_filter.property_), prop_filter.property_.name, + std::experimental::make_optional(lower_bound), + std::experimental::nullopt, graph_view); } else { CHECK(prop_filter.value_) << "Property filter should either have " "bounds or a value expression."; @@ -544,8 +573,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { template std::unique_ptr RewriteWithIndexLookup( std::unique_ptr root_op, const SymbolTable &symbol_table, - TDbAccessor *db) { - impl::IndexLookupRewriter rewriter(&symbol_table, db); + AstStorage *ast_storage, TDbAccessor *db) { + impl::IndexLookupRewriter rewriter(&symbol_table, ast_storage, + db); root_op->Accept(rewriter); if (rewriter.new_root_) { // This shouldn't happen in real use case, because IndexLookupRewriter diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index 8963c2cc9..5405a4316 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -1105,6 +1105,26 @@ TYPED_TEST(TestPlanner, WhereIndexedLabelPropertyRange) { } } +TYPED_TEST(TestPlanner, WherePreferEqualityIndexOverRange) { + // Test MATCH (n :label) WHERE n.property = 42 AND n.property > 0 RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto label = dba.Label("label"); + auto property = PROPERTY_PAIR("property"); + dba.SetIndexCount(label, property.second, 0); + auto lit_42 = LITERAL(42); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(AND(EQ(PROPERTY_LOOKUP("n", property), lit_42), + GREATER(PROPERTY_LOOKUP("n", property), LITERAL(0)))), + RETURN("n"))); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, + ExpectScanAllByLabelPropertyValue(label, property, lit_42), + ExpectFilter(), ExpectProduce()); +} + TYPED_TEST(TestPlanner, UnableToUsePropertyIndex) { // Test MATCH (n: label) WHERE n.property = n.property RETURN n FakeDbAccessor dba; @@ -1350,4 +1370,104 @@ TYPED_TEST(TestPlanner, ReturnAsteriskOmitsLambdaSymbols) { } } +TYPED_TEST(TestPlanner, FilterRegexMatchIndex) { + // Test MATCH (n :label) WHERE n.prop =~ "regex" RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto prop = dba.Property("prop"); + auto label = dba.Label("label"); + dba.SetIndexCount(label, 0); + dba.SetIndexCount(label, prop, 0); + auto *regex_match = storage.Create( + PROPERTY_LOOKUP("n", prop), LITERAL("regex")); + auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n", "label"))), + WHERE(regex_match), RETURN("n"))); + // We expect that we use index by property range where lower bound is an empty + // string. Filter must still remain in place, because we don't have regex + // based index. + Bound lower_bound(LITERAL(""), Bound::Type::INCLUSIVE); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, + ExpectScanAllByLabelPropertyRange(label, prop, lower_bound, + std::experimental::nullopt), + ExpectFilter(), ExpectProduce()); +} + +TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex) { + // Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop = 42 RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto prop = PROPERTY_PAIR("prop"); + auto label = dba.Label("label"); + dba.SetIndexCount(label, 0); + dba.SetIndexCount(label, prop.second, 0); + auto *regex_match = storage.Create( + PROPERTY_LOOKUP("n", prop), LITERAL("regex")); + auto *lit_42 = LITERAL(42); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42))), + RETURN("n"))); + // We expect that we use index by property value equal to 42, because that's + // much better than property range for regex matching. + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, + ExpectScanAllByLabelPropertyValue(label, prop, lit_42), + ExpectFilter(), ExpectProduce()); +} + +TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex2) { + // Test MATCH (n :label) + // WHERE n.prop =~ "regex" AND n.prop = 42 AND n.prop > 0 RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto prop = PROPERTY_PAIR("prop"); + auto label = dba.Label("label"); + dba.SetIndexCount(label, 0); + dba.SetIndexCount(label, prop.second, 0); + auto *regex_match = storage.Create( + PROPERTY_LOOKUP("n", prop), LITERAL("regex")); + auto *lit_42 = LITERAL(42); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(AND(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42)), + GREATER(PROPERTY_LOOKUP("n", prop), LITERAL(0)))), + RETURN("n"))); + // We expect that we use index by property value equal to 42, because that's + // much better than property range. + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, + ExpectScanAllByLabelPropertyValue(label, prop, lit_42), + ExpectFilter(), ExpectProduce()); +} + +TYPED_TEST(TestPlanner, FilterRegexMatchPreferRangeIndex) { + // Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop > 42 RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto prop = dba.Property("prop"); + auto label = dba.Label("label"); + dba.SetIndexCount(label, 0); + dba.SetIndexCount(label, prop, 0); + auto *regex_match = storage.Create( + PROPERTY_LOOKUP("n", prop), LITERAL("regex")); + auto *lit_42 = LITERAL(42); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(AND(regex_match, GREATER(PROPERTY_LOOKUP("n", prop), lit_42))), + RETURN("n"))); + // We expect that we use index by property range on a concrete value (42), as + // it is much better than using a range from empty string for regex matching. + Bound lower_bound(lit_42, Bound::Type::EXCLUSIVE); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, + ExpectScanAllByLabelPropertyRange(label, prop, lower_bound, + std::experimental::nullopt), + ExpectFilter(), ExpectProduce()); +} + } // namespace diff --git a/tests/unit/query_plan_checker.hpp b/tests/unit/query_plan_checker.hpp index ecb228efd..df3a450ce 100644 --- a/tests/unit/query_plan_checker.hpp +++ b/tests/unit/query_plan_checker.hpp @@ -108,7 +108,8 @@ class OpChecker : public BaseOpChecker { public: void CheckOp(LogicalOperator &op, const SymbolTable &symbol_table) override { auto *expected_op = dynamic_cast(&op); - ASSERT_TRUE(expected_op); + ASSERT_TRUE(expected_op) << "op is '" << op.GetTypeInfo().name + << "' expected '" << TOp::kType.name << "'!"; ExpectOp(*expected_op, symbol_table); }