diff --git a/src/query/frontend/semantic/symbol_table.hpp b/src/query/frontend/semantic/symbol_table.hpp index 263f0d1b5..5b70c1ccf 100644 --- a/src/query/frontend/semantic/symbol_table.hpp +++ b/src/query/frontend/semantic/symbol_table.hpp @@ -23,6 +23,22 @@ class SymbolTable final { return got.first->second; } + // TODO(buda): This is the same logic as in the cypher_main_visitor. During + // parsing phase symbol table doesn't exist. Figure out a better solution. + const Symbol &CreateAnonymousSymbol(Symbol::Type type = Symbol::Type::ANY) { + int id = 1; + while (true) { + static const std::string &kAnonPrefix = "anon"; + std::string name_candidate = kAnonPrefix + std::to_string(id++); + if (std::find_if(std::begin(table_), std::end(table_), + [&name_candidate](const auto &item) -> bool { + return item.second.name_ == name_candidate; + }) == std::end(table_)) { + return CreateSymbol(name_candidate, false, type); + } + } + } + const Symbol &at(const Identifier &ident) const { return table_.at(ident.symbol_pos_); } diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index eaa13554a..74d2d8845 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -391,6 +391,9 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const { mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices)); } +// TODO(buda): Implement ScanAllByLabelProperty operator to iterate over +// vertices that have the label and some value for the given property. + ScanAllByLabelPropertyRange::ScanAllByLabelPropertyRange( const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol, storage::LabelId label, storage::PropertyId property, diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index 561c3d74a..f02161b08 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -34,7 +34,7 @@ class PostProcessor final { template <class TPlanningContext> std::unique_ptr<LogicalOperator> Rewrite( std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) { - return RewriteWithIndexLookup(std::move(plan), *context->symbol_table, + return RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db); } diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index 6c1520ab3..cc6cbf662 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -429,6 +429,22 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr, all_filters_.emplace_back(filter); return true; }; + // Checks if maybe_lookup is a property lookup, stores it as a + // PropertyFilter and returns true. If it isn't, returns false. + auto add_prop_in_list = [&](auto *maybe_lookup, auto *val_expr) -> bool { + if (!utils::Downcast<ListLiteral>(val_expr)) return false; + PropertyLookup *prop_lookup = nullptr; + Identifier *ident = nullptr; + if (get_property_lookup(maybe_lookup, prop_lookup, ident)) { + auto filter = make_filter(FilterInfo::Type::Property); + filter.property_filter = PropertyFilter( + symbol_table, symbol_table.at(*ident), prop_lookup->property_, + val_expr, PropertyFilter::Type::IN); + all_filters_.emplace_back(filter); + return true; + } + return false; + }; // We are only interested to see the insides of And, because Or prevents // indexing since any labels and properties found there may be optional. DCHECK(!utils::IsSubtype(*expr, AndOperator::kType)) @@ -491,6 +507,14 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr, Bound::Type::INCLUSIVE)) { all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic)); } + } else if (auto *in = utils::Downcast<InListOperator>(expr)) { + // IN isn't equivalent to Equal because IN isn't a symmetric operator. The + // IN filter is captured here only if the property lookup occurs on the + // left side of the operator. In that case, it's valid to do the IN list + // optimization during the index lookup rewrite phase. + if (!add_prop_in_list(in->expression1_, in->expression2_)) { + all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic)); + } } else { all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic)); } diff --git a/src/query/plan/preprocess.hpp b/src/query/plan/preprocess.hpp index 4c298693b..51a4d9eab 100644 --- a/src/query/plan/preprocess.hpp +++ b/src/query/plan/preprocess.hpp @@ -81,8 +81,8 @@ class PropertyFilter { using Bound = ScanAllByLabelPropertyRange::Bound; /// Depending on type, this PropertyFilter may be a value equality, regex - /// matched value or a range with lower and (or) upper bounds. - enum class Type { EQUAL, REGEX_MATCH, RANGE }; + /// matched value or a range with lower and (or) upper bounds, IN list filter. + enum class Type { EQUAL, REGEX_MATCH, RANGE, IN }; /// Construct with Expression being the equality or regex match check. PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *, diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 2debbd0ce..61b68379c 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -31,7 +31,7 @@ Expression *RemoveAndExpressions( template <class TDbAccessor> class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { public: - IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage, + IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db) : symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {} @@ -423,7 +423,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { std::shared_ptr<LogicalOperator> new_root_; private: - const SymbolTable *symbol_table_; + SymbolTable *symbol_table_; AstStorage *ast_storage_; TDbAccessor *db_; // Collected filters, pending for examination if they can be used for advanced @@ -614,6 +614,18 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { input, node_symbol, GetLabel(found_index->label), GetProperty(prop_filter.property_), prop_filter.property_.name, std::make_optional(lower_bound), std::nullopt, view); + } else if (prop_filter.type_ == PropertyFilter::Type::IN) { + // TODO(buda): ScanAllByLabelProperty + Filter should be considered + // here once the operator and the right cardinality estimation exist. + auto const &symbol = symbol_table_->CreateAnonymousSymbol(); + auto *expression = ast_storage_->Create<Identifier>(symbol.name_); + expression->MapTo(symbol); + auto unwind_operator = + std::make_unique<Unwind>(input, prop_filter.value_, symbol); + return std::make_unique<ScanAllByLabelPropertyValue>( + std::move(unwind_operator), node_symbol, + GetLabel(found_index->label), GetProperty(prop_filter.property_), + prop_filter.property_.name, expression, view); } else { CHECK(prop_filter.value_) << "Property filter should either have " "bounds or a value expression."; @@ -645,9 +657,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { template <class TDbAccessor> std::unique_ptr<LogicalOperator> RewriteWithIndexLookup( - std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table, + std::unique_ptr<LogicalOperator> root_op, SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db) { - impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, ast_storage, + impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db); root_op->Accept(rewriter); if (rewriter.new_root_) { diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index c7d964c00..0e2e42139 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -1589,4 +1589,89 @@ TYPED_TEST(TestPlanner, BfsToExisting) { ExpectExpandBfs(), ExpectProduce()); } +TYPED_TEST(TestPlanner, LabelPropertyInListValidOptimization) { + // Test MATCH (n:label) WHERE n.property IN ['a'] RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto label = dba.Label("label"); + auto property = PROPERTY_PAIR("property"); + auto *lit_list_a = LIST(LITERAL('a')); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_list_a)), RETURN("n"))); + { + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(), + ExpectProduce()); + } + { + dba.SetIndexCount(label, 1); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(), + ExpectFilter(), ExpectProduce()); + } + { + dba.SetIndexCount(label, property.second, 1); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectUnwind(), + ExpectScanAllByLabelPropertyValue(label, property, lit_list_a), + ExpectProduce()); + } +} + +TYPED_TEST(TestPlanner, + LabelPropertyInListWhereLabelPropertyOnLeftNotListOnRight) { + // Test MATCH (n:label) WHERE n.property IN 'a' RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto label = dba.Label("label"); + auto property = PROPERTY_PAIR("property"); + auto *lit_a = LITERAL('a'); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_a)), RETURN("n"))); + { + dba.SetIndexCount(label, property.second, 1); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(), + ExpectProduce()); + } +} + +TYPED_TEST(TestPlanner, LabelPropertyInListWhereLabelPropertyOnRight) { + // Test MATCH (n:label) WHERE ['a'] IN n.property RETURN n + AstStorage storage; + FakeDbAccessor dba; + auto label = dba.Label("label"); + auto property = PROPERTY_PAIR("property"); + auto *lit_list_a = LIST(LITERAL('a')); + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n", "label"))), + WHERE(IN_LIST(lit_list_a, PROPERTY_LOOKUP("n", property))), RETURN("n"))); + { + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(), + ExpectProduce()); + } + { + dba.SetIndexCount(label, 1); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(), + ExpectFilter(), ExpectProduce()); + } + { + dba.SetIndexCount(label, property.second, 1); + auto symbol_table = query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(), + ExpectFilter(), ExpectProduce()); + } +} + } // namespace diff --git a/tests/unit/query_semantic.cpp b/tests/unit/query_semantic.cpp index 91518d8f6..5b1f11982 100644 --- a/tests/unit/query_semantic.cpp +++ b/tests/unit/query_semantic.cpp @@ -1197,3 +1197,18 @@ TEST_F(TestSymbolGenerator, CallWithoutFieldsReturnAsterisk) { auto query = QUERY(SINGLE_QUERY(call, ret)); EXPECT_THROW(query::MakeSymbolTable(query), SemanticException); } + +TEST(TestSymbolTable, CreateAnonymousSymbols) { + SymbolTable symbol_table; + auto anon1 = symbol_table.CreateAnonymousSymbol(); + ASSERT_EQ(anon1.name_, "anon1"); + auto anon2 = symbol_table.CreateAnonymousSymbol(); + ASSERT_EQ(anon2.name_, "anon2"); +} + +TEST(TestSymbolTable, CreateAnonymousSymbolWithExistingUserSymbolCalledAnon) { + SymbolTable symbol_table; + symbol_table.CreateSymbol("anon1", false); + auto anon2 = symbol_table.CreateAnonymousSymbol(); + ASSERT_EQ(anon2.name_, "anon2"); +}