Optimize IN list execution

Summary:
Use Unwind + ScanAllByLabelPropertyValue logical operator to
accelerate the execution of queries like the following one:
`MATCH (n:Label) WHERE n.property IN [] ...`

Reviewers: llugovic, mferencevic

Reviewed By: llugovic, mferencevic

Subscribers: llugovic, pullbot

Differential Revision: https://phabricator.memgraph.io/D2758
This commit is contained in:
Marko Budiselic 2020-04-20 19:58:11 +02:00
parent e04b96a0ac
commit f35225f26b
8 changed files with 162 additions and 7 deletions

View File

@ -23,6 +23,22 @@ class SymbolTable final {
return got.first->second;
}
// TODO(buda): This is the same logic as in the cypher_main_visitor. During
// parsing phase symbol table doesn't exist. Figure out a better solution.
const Symbol &CreateAnonymousSymbol(Symbol::Type type = Symbol::Type::ANY) {
int id = 1;
while (true) {
static const std::string &kAnonPrefix = "anon";
std::string name_candidate = kAnonPrefix + std::to_string(id++);
if (std::find_if(std::begin(table_), std::end(table_),
[&name_candidate](const auto &item) -> bool {
return item.second.name_ == name_candidate;
}) == std::end(table_)) {
return CreateSymbol(name_candidate, false, type);
}
}
}
const Symbol &at(const Identifier &ident) const {
return table_.at(ident.symbol_pos_);
}

View File

@ -391,6 +391,9 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const {
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
}
// TODO(buda): Implement ScanAllByLabelProperty operator to iterate over
// vertices that have the label and some value for the given property.
ScanAllByLabelPropertyRange::ScanAllByLabelPropertyRange(
const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol,
storage::LabelId label, storage::PropertyId property,

View File

@ -34,7 +34,7 @@ class PostProcessor final {
template <class TPlanningContext>
std::unique_ptr<LogicalOperator> Rewrite(
std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
return RewriteWithIndexLookup(std::move(plan), *context->symbol_table,
return RewriteWithIndexLookup(std::move(plan), context->symbol_table,
context->ast_storage, context->db);
}

View File

@ -429,6 +429,22 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
all_filters_.emplace_back(filter);
return true;
};
// Checks if maybe_lookup is a property lookup, stores it as a
// PropertyFilter and returns true. If it isn't, returns false.
auto add_prop_in_list = [&](auto *maybe_lookup, auto *val_expr) -> bool {
if (!utils::Downcast<ListLiteral>(val_expr)) return false;
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter = PropertyFilter(
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
val_expr, PropertyFilter::Type::IN);
all_filters_.emplace_back(filter);
return true;
}
return false;
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
DCHECK(!utils::IsSubtype(*expr, AndOperator::kType))
@ -491,6 +507,14 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
Bound::Type::INCLUSIVE)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *in = utils::Downcast<InListOperator>(expr)) {
// IN isn't equivalent to Equal because IN isn't a symmetric operator. The
// IN filter is captured here only if the property lookup occurs on the
// left side of the operator. In that case, it's valid to do the IN list
// optimization during the index lookup rewrite phase.
if (!add_prop_in_list(in->expression1_, in->expression2_)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}

View File

@ -81,8 +81,8 @@ class PropertyFilter {
using Bound = ScanAllByLabelPropertyRange::Bound;
/// Depending on type, this PropertyFilter may be a value equality, regex
/// matched value or a range with lower and (or) upper bounds.
enum class Type { EQUAL, REGEX_MATCH, RANGE };
/// matched value or a range with lower and (or) upper bounds, IN list filter.
enum class Type { EQUAL, REGEX_MATCH, RANGE, IN };
/// Construct with Expression being the equality or regex match check.
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *,

View File

@ -31,7 +31,7 @@ Expression *RemoveAndExpressions(
template <class TDbAccessor>
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
public:
IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage,
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage,
TDbAccessor *db)
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
@ -423,7 +423,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
std::shared_ptr<LogicalOperator> new_root_;
private:
const SymbolTable *symbol_table_;
SymbolTable *symbol_table_;
AstStorage *ast_storage_;
TDbAccessor *db_;
// Collected filters, pending for examination if they can be used for advanced
@ -614,6 +614,18 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
std::make_optional(lower_bound), std::nullopt, view);
} else if (prop_filter.type_ == PropertyFilter::Type::IN) {
// TODO(buda): ScanAllByLabelProperty + Filter should be considered
// here once the operator and the right cardinality estimation exist.
auto const &symbol = symbol_table_->CreateAnonymousSymbol();
auto *expression = ast_storage_->Create<Identifier>(symbol.name_);
expression->MapTo(symbol);
auto unwind_operator =
std::make_unique<Unwind>(input, prop_filter.value_, symbol);
return std::make_unique<ScanAllByLabelPropertyValue>(
std::move(unwind_operator), node_symbol,
GetLabel(found_index->label), GetProperty(prop_filter.property_),
prop_filter.property_.name, expression, view);
} else {
CHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";
@ -645,9 +657,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
template <class TDbAccessor>
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(
std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table,
std::unique_ptr<LogicalOperator> root_op, SymbolTable *symbol_table,
AstStorage *ast_storage, TDbAccessor *db) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, ast_storage,
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage,
db);
root_op->Accept(rewriter);
if (rewriter.new_root_) {

View File

@ -1589,4 +1589,89 @@ TYPED_TEST(TestPlanner, BfsToExisting) {
ExpectExpandBfs(), ExpectProduce());
}
TYPED_TEST(TestPlanner, LabelPropertyInListValidOptimization) {
// Test MATCH (n:label) WHERE n.property IN ['a'] RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto label = dba.Label("label");
auto property = PROPERTY_PAIR("property");
auto *lit_list_a = LIST(LITERAL('a'));
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_list_a)), RETURN("n")));
{
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
ExpectProduce());
}
{
dba.SetIndexCount(label, 1);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
ExpectFilter(), ExpectProduce());
}
{
dba.SetIndexCount(label, property.second, 1);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectUnwind(),
ExpectScanAllByLabelPropertyValue(label, property, lit_list_a),
ExpectProduce());
}
}
TYPED_TEST(TestPlanner,
LabelPropertyInListWhereLabelPropertyOnLeftNotListOnRight) {
// Test MATCH (n:label) WHERE n.property IN 'a' RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto label = dba.Label("label");
auto property = PROPERTY_PAIR("property");
auto *lit_a = LITERAL('a');
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(IN_LIST(PROPERTY_LOOKUP("n", property), lit_a)), RETURN("n")));
{
dba.SetIndexCount(label, property.second, 1);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
ExpectProduce());
}
}
TYPED_TEST(TestPlanner, LabelPropertyInListWhereLabelPropertyOnRight) {
// Test MATCH (n:label) WHERE ['a'] IN n.property RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto label = dba.Label("label");
auto property = PROPERTY_PAIR("property");
auto *lit_list_a = LIST(LITERAL('a'));
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(IN_LIST(lit_list_a, PROPERTY_LOOKUP("n", property))), RETURN("n")));
{
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
ExpectProduce());
}
{
dba.SetIndexCount(label, 1);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
ExpectFilter(), ExpectProduce());
}
{
dba.SetIndexCount(label, property.second, 1);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table, ExpectScanAllByLabel(),
ExpectFilter(), ExpectProduce());
}
}
} // namespace

View File

@ -1197,3 +1197,18 @@ TEST_F(TestSymbolGenerator, CallWithoutFieldsReturnAsterisk) {
auto query = QUERY(SINGLE_QUERY(call, ret));
EXPECT_THROW(query::MakeSymbolTable(query), SemanticException);
}
TEST(TestSymbolTable, CreateAnonymousSymbols) {
SymbolTable symbol_table;
auto anon1 = symbol_table.CreateAnonymousSymbol();
ASSERT_EQ(anon1.name_, "anon1");
auto anon2 = symbol_table.CreateAnonymousSymbol();
ASSERT_EQ(anon2.name_, "anon2");
}
TEST(TestSymbolTable, CreateAnonymousSymbolWithExistingUserSymbolCalledAnon) {
SymbolTable symbol_table;
symbol_table.CreateSymbol("anon1", false);
auto anon2 = symbol_table.CreateAnonymousSymbol();
ASSERT_EQ(anon2.name_, "anon2");
}