Use label+property index with regex matching
Reviewers: mtomic, llugovic, msantl Reviewed By: mtomic Subscribers: buda, pullbot Differential Revision: https://phabricator.memgraph.io/D1890
This commit is contained in:
parent
a14c24e71f
commit
49b552408b
@ -35,7 +35,7 @@ class PostProcessor final {
|
||||
std::unique_ptr<LogicalOperator> Rewrite(
|
||||
std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
|
||||
return RewriteWithIndexLookup(std::move(plan), *context->symbol_table,
|
||||
context->db);
|
||||
context->ast_storage, context->db);
|
||||
}
|
||||
|
||||
template <class TVertexCounts>
|
||||
|
@ -153,8 +153,9 @@ auto SplitExpressionOnAnd(Expression *expression) {
|
||||
|
||||
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table,
|
||||
const Symbol &symbol, PropertyIx property,
|
||||
Expression *value)
|
||||
: symbol_(symbol), property_(property), value_(value) {
|
||||
Expression *value, Type type)
|
||||
: symbol_(symbol), property_(property), type_(type), value_(value) {
|
||||
CHECK(type != Type::RANGE);
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
value->Accept(collector);
|
||||
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
|
||||
@ -166,6 +167,7 @@ PropertyFilter::PropertyFilter(
|
||||
const std::experimental::optional<PropertyFilter::Bound> &upper_bound)
|
||||
: symbol_(symbol),
|
||||
property_(property),
|
||||
type_(Type::RANGE),
|
||||
lower_bound_(lower_bound),
|
||||
upper_bound_(upper_bound) {
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
@ -288,7 +290,8 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
|
||||
collector.symbols_};
|
||||
// Store a PropertyFilter on the value of the property.
|
||||
filter_info.property_filter.emplace(symbol_table, symbol, prop_pair.first,
|
||||
prop_pair.second);
|
||||
prop_pair.second,
|
||||
PropertyFilter::Type::EQUAL);
|
||||
all_filters_.emplace_back(filter_info);
|
||||
}
|
||||
};
|
||||
@ -354,9 +357,23 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
|
||||
auto filter = make_filter(FilterInfo::Type::Property);
|
||||
filter.property_filter =
|
||||
PropertyFilter(symbol_table, symbol_table.at(*ident),
|
||||
prop_lookup->property_, val_expr);
|
||||
filter.property_filter = PropertyFilter(
|
||||
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
|
||||
val_expr, PropertyFilter::Type::EQUAL);
|
||||
all_filters_.emplace_back(filter);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
// Like add_prop_equal, but for adding regex match property filter.
|
||||
auto add_prop_regex_match = [&](auto *maybe_lookup, auto *val_expr) -> bool {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
|
||||
auto filter = make_filter(FilterInfo::Type::Property);
|
||||
filter.property_filter = PropertyFilter(
|
||||
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
|
||||
val_expr, PropertyFilter::Type::REGEX_MATCH);
|
||||
all_filters_.emplace_back(filter);
|
||||
return true;
|
||||
}
|
||||
@ -422,6 +439,10 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
// No PropertyFilter was added, so just store a generic filter.
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
} else if (auto *regex_match = utils::Downcast<RegexMatch>(expr)) {
|
||||
if (!add_prop_regex_match(regex_match->string_expr_, regex_match->regex_)) {
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
} else if (auto *gt = utils::Downcast<GreaterOperator>(expr)) {
|
||||
if (!add_prop_greater(gt->expression1_, gt->expression2_,
|
||||
Bound::Type::EXCLUSIVE)) {
|
||||
|
@ -80,7 +80,14 @@ class PropertyFilter {
|
||||
public:
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *);
|
||||
/// Depending on type, this PropertyFilter may be a value equality, regex
|
||||
/// matched value or a range with lower and (or) upper bounds.
|
||||
enum class Type { EQUAL, REGEX_MATCH, RANGE };
|
||||
|
||||
/// Construct with Expression being the equality or regex match check.
|
||||
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *,
|
||||
Type);
|
||||
/// Construct the range based filter.
|
||||
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx,
|
||||
const std::experimental::optional<Bound> &,
|
||||
const std::experimental::optional<Bound> &);
|
||||
@ -88,10 +95,11 @@ class PropertyFilter {
|
||||
/// Symbol whose property is looked up.
|
||||
Symbol symbol_;
|
||||
PropertyIx property_;
|
||||
Type type_;
|
||||
/// True if the same symbol is used in expressions for value or bounds.
|
||||
bool is_symbol_in_value_ = false;
|
||||
/// Expression which when evaluated produces the value a property must
|
||||
/// equal.
|
||||
/// equal or regex match depending on type_.
|
||||
Expression *value_ = nullptr;
|
||||
/// Expressions which produce lower and upper bounds for a property.
|
||||
std::experimental::optional<Bound> lower_bound_{};
|
||||
|
@ -31,8 +31,9 @@ Expression *RemoveAndExpressions(
|
||||
template <class TDbAccessor>
|
||||
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
public:
|
||||
IndexLookupRewriter(const SymbolTable *symbol_table, TDbAccessor *db)
|
||||
: symbol_table_(symbol_table), db_(db) {}
|
||||
IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage,
|
||||
TDbAccessor *db)
|
||||
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
|
||||
|
||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||
@ -383,6 +384,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
private:
|
||||
const SymbolTable *symbol_table_;
|
||||
AstStorage *ast_storage_;
|
||||
TDbAccessor *db_;
|
||||
Filters filters_;
|
||||
std::unordered_set<Expression *> filter_exprs_for_removal_;
|
||||
@ -410,7 +412,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
}
|
||||
|
||||
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
|
||||
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, db_);
|
||||
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_);
|
||||
(*branch)->Accept(rewriter);
|
||||
if (rewriter.new_root_) {
|
||||
*branch = rewriter.new_root_;
|
||||
@ -448,24 +450,37 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
std::experimental::optional<LabelPropertyIndex> found;
|
||||
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
||||
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
||||
if (filter.property_filter->is_symbol_in_value_ ||
|
||||
!are_bound(filter.used_symbols)) {
|
||||
// Skip filter expressions which use the symbol whose property we are
|
||||
// looking up or aren't bound. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
|
||||
// cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
const auto &property = filter.property_filter->property_;
|
||||
if (db_->LabelPropertyIndexExists(GetLabel(label),
|
||||
GetProperty(property))) {
|
||||
int64_t vertex_count =
|
||||
db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||
if (!found || vertex_count < found->vertex_count) {
|
||||
if (filter.property_filter->is_symbol_in_value_) {
|
||||
// Skip filter expressions which use the symbol whose property
|
||||
// we are looking up. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
|
||||
// so we cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
if (are_bound(filter.used_symbols)) {
|
||||
// Take the property filter which uses bound symbols.
|
||||
found = LabelPropertyIndex{label, filter, vertex_count};
|
||||
}
|
||||
}
|
||||
if (!db_->LabelPropertyIndexExists(GetLabel(label),
|
||||
GetProperty(property))) {
|
||||
continue;
|
||||
}
|
||||
int64_t vertex_count =
|
||||
db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||
auto is_better_type = [&found](PropertyFilter::Type type) {
|
||||
// Order the types by the most preferred index lookup type.
|
||||
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
||||
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE,
|
||||
PropertyFilter::Type::REGEX_MATCH};
|
||||
auto *found_sort_ix =
|
||||
std::find(kFilterTypeOrder, kFilterTypeOrder + 3,
|
||||
found->filter.property_filter->type_);
|
||||
auto *type_sort_ix =
|
||||
std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
||||
return type_sort_ix < found_sort_ix;
|
||||
};
|
||||
if (!found || vertex_count < found->vertex_count ||
|
||||
(vertex_count == found->vertex_count &&
|
||||
is_better_type(filter.property_filter->type_))) {
|
||||
found = LabelPropertyIndex{label, filter, vertex_count};
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -502,7 +517,12 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
|
||||
// Copy the property filter and then erase it from filters.
|
||||
const auto prop_filter = *found_index->filter.property_filter;
|
||||
filter_exprs_for_removal_.insert(found_index->filter.expression);
|
||||
if (prop_filter.type_ != PropertyFilter::Type::REGEX_MATCH) {
|
||||
// Remove the original expression from Filter operation only if it's not
|
||||
// a regex match. In such a case we need to perform the matching even
|
||||
// after we've scanned the index.
|
||||
filter_exprs_for_removal_.insert(found_index->filter.expression);
|
||||
}
|
||||
filters_.EraseFilter(found_index->filter);
|
||||
std::vector<Expression *> removed_expressions;
|
||||
filters_.EraseLabelFilter(node_symbol, found_index->label,
|
||||
@ -514,6 +534,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
input, node_symbol, GetLabel(found_index->label),
|
||||
GetProperty(prop_filter.property_), prop_filter.property_.name,
|
||||
prop_filter.lower_bound_, prop_filter.upper_bound_, graph_view);
|
||||
} else if (prop_filter.type_ == PropertyFilter::Type::REGEX_MATCH) {
|
||||
// Generate index scan using the empty string as a lower bound.
|
||||
Expression *empty_string = ast_storage_->Create<PrimitiveLiteral>("");
|
||||
auto lower_bound = utils::MakeBoundInclusive(empty_string);
|
||||
return std::make_unique<ScanAllByLabelPropertyRange>(
|
||||
input, node_symbol, GetLabel(found_index->label),
|
||||
GetProperty(prop_filter.property_), prop_filter.property_.name,
|
||||
std::experimental::make_optional(lower_bound),
|
||||
std::experimental::nullopt, graph_view);
|
||||
} else {
|
||||
CHECK(prop_filter.value_) << "Property filter should either have "
|
||||
"bounds or a value expression.";
|
||||
@ -544,8 +573,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
template <class TDbAccessor>
|
||||
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(
|
||||
std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table,
|
||||
TDbAccessor *db) {
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, db);
|
||||
AstStorage *ast_storage, TDbAccessor *db) {
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, ast_storage,
|
||||
db);
|
||||
root_op->Accept(rewriter);
|
||||
if (rewriter.new_root_) {
|
||||
// This shouldn't happen in real use case, because IndexLookupRewriter
|
||||
|
@ -1105,6 +1105,26 @@ TYPED_TEST(TestPlanner, WhereIndexedLabelPropertyRange) {
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, WherePreferEqualityIndexOverRange) {
|
||||
// Test MATCH (n :label) WHERE n.property = 42 AND n.property > 0 RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto label = dba.Label("label");
|
||||
auto property = PROPERTY_PAIR("property");
|
||||
dba.SetIndexCount(label, property.second, 0);
|
||||
auto lit_42 = LITERAL(42);
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(AND(EQ(PROPERTY_LOOKUP("n", property), lit_42),
|
||||
GREATER(PROPERTY_LOOKUP("n", property), LITERAL(0)))),
|
||||
RETURN("n")));
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table,
|
||||
ExpectScanAllByLabelPropertyValue(label, property, lit_42),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, UnableToUsePropertyIndex) {
|
||||
// Test MATCH (n: label) WHERE n.property = n.property RETURN n
|
||||
FakeDbAccessor dba;
|
||||
@ -1350,4 +1370,104 @@ TYPED_TEST(TestPlanner, ReturnAsteriskOmitsLambdaSymbols) {
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, FilterRegexMatchIndex) {
|
||||
// Test MATCH (n :label) WHERE n.prop =~ "regex" RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto prop = dba.Property("prop");
|
||||
auto label = dba.Label("label");
|
||||
dba.SetIndexCount(label, 0);
|
||||
dba.SetIndexCount(label, prop, 0);
|
||||
auto *regex_match = storage.Create<query::RegexMatch>(
|
||||
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
|
||||
auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(regex_match), RETURN("n")));
|
||||
// We expect that we use index by property range where lower bound is an empty
|
||||
// string. Filter must still remain in place, because we don't have regex
|
||||
// based index.
|
||||
Bound lower_bound(LITERAL(""), Bound::Type::INCLUSIVE);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table,
|
||||
ExpectScanAllByLabelPropertyRange(label, prop, lower_bound,
|
||||
std::experimental::nullopt),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex) {
|
||||
// Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop = 42 RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto prop = PROPERTY_PAIR("prop");
|
||||
auto label = dba.Label("label");
|
||||
dba.SetIndexCount(label, 0);
|
||||
dba.SetIndexCount(label, prop.second, 0);
|
||||
auto *regex_match = storage.Create<query::RegexMatch>(
|
||||
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
|
||||
auto *lit_42 = LITERAL(42);
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42))),
|
||||
RETURN("n")));
|
||||
// We expect that we use index by property value equal to 42, because that's
|
||||
// much better than property range for regex matching.
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table,
|
||||
ExpectScanAllByLabelPropertyValue(label, prop, lit_42),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex2) {
|
||||
// Test MATCH (n :label)
|
||||
// WHERE n.prop =~ "regex" AND n.prop = 42 AND n.prop > 0 RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto prop = PROPERTY_PAIR("prop");
|
||||
auto label = dba.Label("label");
|
||||
dba.SetIndexCount(label, 0);
|
||||
dba.SetIndexCount(label, prop.second, 0);
|
||||
auto *regex_match = storage.Create<query::RegexMatch>(
|
||||
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
|
||||
auto *lit_42 = LITERAL(42);
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(AND(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42)),
|
||||
GREATER(PROPERTY_LOOKUP("n", prop), LITERAL(0)))),
|
||||
RETURN("n")));
|
||||
// We expect that we use index by property value equal to 42, because that's
|
||||
// much better than property range.
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table,
|
||||
ExpectScanAllByLabelPropertyValue(label, prop, lit_42),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, FilterRegexMatchPreferRangeIndex) {
|
||||
// Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop > 42 RETURN n
|
||||
AstStorage storage;
|
||||
FakeDbAccessor dba;
|
||||
auto prop = dba.Property("prop");
|
||||
auto label = dba.Label("label");
|
||||
dba.SetIndexCount(label, 0);
|
||||
dba.SetIndexCount(label, prop, 0);
|
||||
auto *regex_match = storage.Create<query::RegexMatch>(
|
||||
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
|
||||
auto *lit_42 = LITERAL(42);
|
||||
auto *query = QUERY(SINGLE_QUERY(
|
||||
MATCH(PATTERN(NODE("n", "label"))),
|
||||
WHERE(AND(regex_match, GREATER(PROPERTY_LOOKUP("n", prop), lit_42))),
|
||||
RETURN("n")));
|
||||
// We expect that we use index by property range on a concrete value (42), as
|
||||
// it is much better than using a range from empty string for regex matching.
|
||||
Bound lower_bound(lit_42, Bound::Type::EXCLUSIVE);
|
||||
auto symbol_table = query::MakeSymbolTable(query);
|
||||
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
|
||||
CheckPlan(planner.plan(), symbol_table,
|
||||
ExpectScanAllByLabelPropertyRange(label, prop, lower_bound,
|
||||
std::experimental::nullopt),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -108,7 +108,8 @@ class OpChecker : public BaseOpChecker {
|
||||
public:
|
||||
void CheckOp(LogicalOperator &op, const SymbolTable &symbol_table) override {
|
||||
auto *expected_op = dynamic_cast<TOp *>(&op);
|
||||
ASSERT_TRUE(expected_op);
|
||||
ASSERT_TRUE(expected_op) << "op is '" << op.GetTypeInfo().name
|
||||
<< "' expected '" << TOp::kType.name << "'!";
|
||||
ExpectOp(*expected_op, symbol_table);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user