Optimize IN list execution
Summary: Use Unwind + ScanAllByLabelPropertyValue logical operator to accelerate the execution of queries like the following one: `MATCH (n:Label) WHERE n.property IN [] ...` Reviewers: llugovic, mferencevic Reviewed By: llugovic, mferencevic Subscribers: llugovic, pullbot Differential Revision: https://phabricator.memgraph.io/D2758
This commit is contained in:
parent
e04b96a0ac
commit
f35225f26b
@ -23,6 +23,22 @@ class SymbolTable final {
|
||||
return got.first->second;
|
||||
}
|
||||
|
||||
// TODO(buda): This is the same logic as in the cypher_main_visitor. During
|
||||
// parsing phase symbol table doesn't exist. Figure out a better solution.
|
||||
const Symbol &CreateAnonymousSymbol(Symbol::Type type = Symbol::Type::ANY) {
|
||||
int id = 1;
|
||||
while (true) {
|
||||
static const std::string &kAnonPrefix = "anon";
|
||||
std::string name_candidate = kAnonPrefix + std::to_string(id++);
|
||||
if (std::find_if(std::begin(table_), std::end(table_),
|
||||
[&name_candidate](const auto &item) -> bool {
|
||||
return item.second.name_ == name_candidate;
|
||||
}) == std::end(table_)) {
|
||||
return CreateSymbol(name_candidate, false, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Symbol &at(const Identifier &ident) const {
|
||||
return table_.at(ident.symbol_pos_);
|
||||
}
|
||||
|
@ -391,6 +391,9 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const {
|
||||
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
|
||||
}
|
||||
|
||||
// TODO(buda): Implement ScanAllByLabelProperty operator to iterate over
|
||||
// vertices that have the label and some value for the given property.
|
||||
|
||||
ScanAllByLabelPropertyRange::ScanAllByLabelPropertyRange(
|
||||
const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol,
|
||||
storage::LabelId label, storage::PropertyId property,
|
||||
|
@ -34,7 +34,7 @@ class PostProcessor final {
|
||||
template <class TPlanningContext>
|
||||
std::unique_ptr<LogicalOperator> Rewrite(
|
||||
std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
|
||||
return RewriteWithIndexLookup(std::move(plan), *context->symbol_table,
|
||||
return RewriteWithIndexLookup(std::move(plan), context->symbol_table,
|
||||
context->ast_storage, context->db);
|
||||
}
|
||||
|
||||
|
@ -429,6 +429,22 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
all_filters_.emplace_back(filter);
|
||||
return true;
|
||||
};
|
||||
// Checks if maybe_lookup is a property lookup, stores it as a
|
||||
// PropertyFilter and returns true. If it isn't, returns false.
|
||||
auto add_prop_in_list = [&](auto *maybe_lookup, auto *val_expr) -> bool {
|
||||
if (!utils::Downcast<ListLiteral>(val_expr)) return false;
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
|
||||
auto filter = make_filter(FilterInfo::Type::Property);
|
||||
filter.property_filter = PropertyFilter(
|
||||
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
|
||||
val_expr, PropertyFilter::Type::IN);
|
||||
all_filters_.emplace_back(filter);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
// We are only interested to see the insides of And, because Or prevents
|
||||
// indexing since any labels and properties found there may be optional.
|
||||
DCHECK(!utils::IsSubtype(*expr, AndOperator::kType))
|
||||
@ -491,6 +507,14 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
Bound::Type::INCLUSIVE)) {
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
} else if (auto *in = utils::Downcast<InListOperator>(expr)) {
|
||||
// IN isn't equivalent to Equal because IN isn't a symmetric operator. The
|
||||
// IN filter is captured here only if the property lookup occurs on the
|
||||
// left side of the operator. In that case, it's valid to do the IN list
|
||||
// optimization during the index lookup rewrite phase.
|
||||
if (!add_prop_in_list(in->expression1_, in->expression2_)) {
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
} else {
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
|
@ -81,8 +81,8 @@ class PropertyFilter {
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
/// Depending on type, this PropertyFilter may be a value equality, regex
|
||||
/// matched value or a range with lower and (or) upper bounds.
|
||||
enum class Type { EQUAL, REGEX_MATCH, RANGE };
|
||||
/// matched value or a range with lower and (or) upper bounds, IN list filter.
|
||||
enum class Type { EQUAL, REGEX_MATCH, RANGE, IN };
|
||||
|
||||
/// Construct with Expression being the equality or regex match check.
|
||||
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *,
|
||||
|
@ -31,7 +31,7 @@ Expression *RemoveAndExpressions(
|
||||
template <class TDbAccessor>
|
||||
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
public:
|
||||
IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage,
|
||||
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage,
|
||||
TDbAccessor *db)
|
||||
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
|
||||
|
||||
@ -423,7 +423,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
std::shared_ptr<LogicalOperator> new_root_;
|
||||
|
||||
private:
|
||||
const SymbolTable *symbol_table_;
|
||||
SymbolTable *symbol_table_;
|
||||
AstStorage *ast_storage_;
|
||||
TDbAccessor *db_;
|
||||
// Collected filters, pending for examination if they can be used for advanced
|
||||
@ -614,6 +614,18 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
input, node_symbol, GetLabel(found_index->label),
|
||||
GetProperty(prop_filter.property_), prop_filter.property_.name,
|
||||
std::make_optional(lower_bound), std::nullopt, view);
|
||||
} else if (prop_filter.type_ == PropertyFilter::Type::IN) {
|
||||
// TODO(buda): ScanAllByLabelProperty + Filter should be considered
|
||||
// here once the operator and the right cardinality estimation exist.
|
||||
auto const &symbol = symbol_table_->CreateAnonymousSymbol();
|
||||
auto *expression = ast_storage_->Create<Identifier>(symbol.name_);
|
||||
expression->MapTo(symbol);
|
||||
auto unwind_operator =
|
||||
std::make_unique<Unwind>(input, prop_filter.value_, symbol);
|
||||
return std::make_unique<ScanAllByLabelPropertyValue>(
|
||||
std::move(unwind_operator), node_symbol,
|
||||
GetLabel(found_index->label), GetProperty(prop_filter.property_),
|
||||
prop_filter.property_.name, expression, view);
|
||||
} else {
|
||||
CHECK(prop_filter.value_) << "Property filter should either have "
|
||||
"bounds or a value expression.";
|
||||
@ -645,9 +657,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
template <class TDbAccessor>
|
||||
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(
|
||||
std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table,
|
||||
std::unique_ptr<LogicalOperator> root_op, SymbolTable *symbol_table,
|
||||
AstStorage *ast_storage, TDbAccessor *db) {
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, ast_storage,
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage,
|
||||
db);
|
||||
root_op->Accept(rewriter);
|
||||
if (rewriter.new_root_) {
|
||||
|
@ -1589,4 +1589,89 @@ TYPED_TEST(TestPlanner, BfsToExisting) {
|
||||
ExpectExpandBfs(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, LabelPropertyInListValidOptimization) {
|
||||
// Test MATCH (n:label) WHERE n.property IN ['a'] RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto label = dba.Label("label");
|
||||
auto property = PROPERTY_PAIR("property");
|
||||
auto *lit_list_a = LIST(LITERAL('a'));
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_list_a)), RETURN("n")));
|
||||
{
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
|
||||
ExpectProduce());
|
||||
}
|
||||
{
|
||||
dba.SetIndexCount(label, 1);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
{
|
||||
dba.SetIndexCount(label, property.second, 1);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectUnwind(),
|
||||
ExpectScanAllByLabelPropertyValue(label, property, lit_list_a),
|
||||
ExpectProduce());
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner,
|
||||
LabelPropertyInListWhereLabelPropertyOnLeftNotListOnRight) {
|
||||
// Test MATCH (n:label) WHERE n.property IN 'a' RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto label = dba.Label("label");
|
||||
auto property = PROPERTY_PAIR("property");
|
||||
auto *lit_a = LITERAL('a');
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_a)), RETURN("n")));
|
||||
{
|
||||
dba.SetIndexCount(label, property.second, 1);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
|
||||
ExpectProduce());
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, LabelPropertyInListWhereLabelPropertyOnRight) {
|
||||
// Test MATCH (n:label) WHERE ['a'] IN n.property RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto label = dba.Label("label");
|
||||
auto property = PROPERTY_PAIR("property");
|
||||
auto *lit_list_a = LIST(LITERAL('a'));
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(IN_LIST(lit_list_a, PROPERTY_LOOKUP("n", property))), RETURN("n")));
|
||||
{
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
|
||||
ExpectProduce());
|
||||
}
|
||||
{
|
||||
dba.SetIndexCount(label, 1);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
{
|
||||
dba.SetIndexCount(label, property.second, 1);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -1197,3 +1197,18 @@ TEST_F(TestSymbolGenerator, CallWithoutFieldsReturnAsterisk) {
|
||||
auto query = QUERY(SINGLE_QUERY(call, ret));
|
||||
EXPECT_THROW(query::MakeSymbolTable(query), SemanticException);
|
||||
}
|
||||
|
||||
TEST(TestSymbolTable, CreateAnonymousSymbols) {
|
||||
SymbolTable symbol_table;
|
||||
auto anon1 = symbol_table.CreateAnonymousSymbol();
|
||||
ASSERT_EQ(anon1.name_, "anon1");
|
||||
auto anon2 = symbol_table.CreateAnonymousSymbol();
|
||||
ASSERT_EQ(anon2.name_, "anon2");
|
||||
}
|
||||
|
||||
TEST(TestSymbolTable, CreateAnonymousSymbolWithExistingUserSymbolCalledAnon) {
|
||||
SymbolTable symbol_table;
|
||||
symbol_table.CreateSymbol("anon1", false);
|
||||
auto anon2 = symbol_table.CreateAnonymousSymbol();
|
||||
ASSERT_EQ(anon2.name_, "anon2");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user