Use label+property index with regex matching

Reviewers: mtomic, llugovic, msantl

Reviewed By: mtomic

Subscribers: buda, pullbot

Differential Revision: https://phabricator.memgraph.io/D1890
This commit is contained in:
Teon Banek 2019-03-19 15:29:50 +01:00
parent a14c24e71f
commit 49b552408b
6 changed files with 213 additions and 33 deletions

View File

@ -35,7 +35,7 @@ class PostProcessor final {
std::unique_ptr<LogicalOperator> Rewrite(
std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
return RewriteWithIndexLookup(std::move(plan), *context->symbol_table,
context->db);
context->ast_storage, context->db);
}
template <class TVertexCounts>

View File

@ -153,8 +153,9 @@ auto SplitExpressionOnAnd(Expression *expression) {
PropertyFilter::PropertyFilter(const SymbolTable &symbol_table,
const Symbol &symbol, PropertyIx property,
Expression *value)
: symbol_(symbol), property_(property), value_(value) {
Expression *value, Type type)
: symbol_(symbol), property_(property), type_(type), value_(value) {
CHECK(type != Type::RANGE);
UsedSymbolsCollector collector(symbol_table);
value->Accept(collector);
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
@ -166,6 +167,7 @@ PropertyFilter::PropertyFilter(
const std::experimental::optional<PropertyFilter::Bound> &upper_bound)
: symbol_(symbol),
property_(property),
type_(Type::RANGE),
lower_bound_(lower_bound),
upper_bound_(upper_bound) {
UsedSymbolsCollector collector(symbol_table);
@ -288,7 +290,8 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table,
collector.symbols_};
// Store a PropertyFilter on the value of the property.
filter_info.property_filter.emplace(symbol_table, symbol, prop_pair.first,
prop_pair.second);
prop_pair.second,
PropertyFilter::Type::EQUAL);
all_filters_.emplace_back(filter_info);
}
};
@ -354,9 +357,23 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter =
PropertyFilter(symbol_table, symbol_table.at(*ident),
prop_lookup->property_, val_expr);
filter.property_filter = PropertyFilter(
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
val_expr, PropertyFilter::Type::EQUAL);
all_filters_.emplace_back(filter);
return true;
}
return false;
};
// Like add_prop_equal, but for adding regex match property filter.
auto add_prop_regex_match = [&](auto *maybe_lookup, auto *val_expr) -> bool {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(maybe_lookup, prop_lookup, ident)) {
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter = PropertyFilter(
symbol_table, symbol_table.at(*ident), prop_lookup->property_,
val_expr, PropertyFilter::Type::REGEX_MATCH);
all_filters_.emplace_back(filter);
return true;
}
@ -422,6 +439,10 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
// No PropertyFilter was added, so just store a generic filter.
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *regex_match = utils::Downcast<RegexMatch>(expr)) {
if (!add_prop_regex_match(regex_match->string_expr_, regex_match->regex_)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *gt = utils::Downcast<GreaterOperator>(expr)) {
if (!add_prop_greater(gt->expression1_, gt->expression2_,
Bound::Type::EXCLUSIVE)) {

View File

@ -80,7 +80,14 @@ class PropertyFilter {
public:
using Bound = ScanAllByLabelPropertyRange::Bound;
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *);
/// Depending on type, this PropertyFilter may be a value equality, regex
/// matched value or a range with lower and (or) upper bounds.
enum class Type { EQUAL, REGEX_MATCH, RANGE };
/// Construct with Expression being the equality or regex match check.
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *,
Type);
/// Construct the range based filter.
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx,
const std::experimental::optional<Bound> &,
const std::experimental::optional<Bound> &);
@ -88,10 +95,11 @@ class PropertyFilter {
/// Symbol whose property is looked up.
Symbol symbol_;
PropertyIx property_;
Type type_;
/// True if the same symbol is used in expressions for value or bounds.
bool is_symbol_in_value_ = false;
/// Expression which when evaluated produces the value a property must
/// equal.
/// equal or regex match depending on type_.
Expression *value_ = nullptr;
/// Expressions which produce lower and upper bounds for a property.
std::experimental::optional<Bound> lower_bound_{};

View File

@ -31,8 +31,9 @@ Expression *RemoveAndExpressions(
template <class TDbAccessor>
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
public:
IndexLookupRewriter(const SymbolTable *symbol_table, TDbAccessor *db)
: symbol_table_(symbol_table), db_(db) {}
IndexLookupRewriter(const SymbolTable *symbol_table, AstStorage *ast_storage,
TDbAccessor *db)
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
using HierarchicalLogicalOperatorVisitor::PostVisit;
using HierarchicalLogicalOperatorVisitor::PreVisit;
@ -383,6 +384,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
private:
const SymbolTable *symbol_table_;
AstStorage *ast_storage_;
TDbAccessor *db_;
Filters filters_;
std::unordered_set<Expression *> filter_exprs_for_removal_;
@ -410,7 +412,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
}
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, db_);
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_);
(*branch)->Accept(rewriter);
if (rewriter.new_root_) {
*branch = rewriter.new_root_;
@ -448,24 +450,37 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
std::experimental::optional<LabelPropertyIndex> found;
for (const auto &label : filters_.FilteredLabels(symbol)) {
for (const auto &filter : filters_.PropertyFilters(symbol)) {
if (filter.property_filter->is_symbol_in_value_ ||
!are_bound(filter.used_symbols)) {
// Skip filter expressions which use the symbol whose property we are
// looking up or aren't bound. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
// cannot scan `n` by property index.
continue;
}
const auto &property = filter.property_filter->property_;
if (db_->LabelPropertyIndexExists(GetLabel(label),
GetProperty(property))) {
int64_t vertex_count =
db_->VerticesCount(GetLabel(label), GetProperty(property));
if (!found || vertex_count < found->vertex_count) {
if (filter.property_filter->is_symbol_in_value_) {
// Skip filter expressions which use the symbol whose property
// we are looking up. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`,
// so we cannot scan `n` by property index.
continue;
}
if (are_bound(filter.used_symbols)) {
// Take the property filter which uses bound symbols.
found = LabelPropertyIndex{label, filter, vertex_count};
}
}
if (!db_->LabelPropertyIndexExists(GetLabel(label),
GetProperty(property))) {
continue;
}
int64_t vertex_count =
db_->VerticesCount(GetLabel(label), GetProperty(property));
auto is_better_type = [&found](PropertyFilter::Type type) {
// Order the types by the most preferred index lookup type.
static const PropertyFilter::Type kFilterTypeOrder[] = {
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE,
PropertyFilter::Type::REGEX_MATCH};
auto *found_sort_ix =
std::find(kFilterTypeOrder, kFilterTypeOrder + 3,
found->filter.property_filter->type_);
auto *type_sort_ix =
std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
return type_sort_ix < found_sort_ix;
};
if (!found || vertex_count < found->vertex_count ||
(vertex_count == found->vertex_count &&
is_better_type(filter.property_filter->type_))) {
found = LabelPropertyIndex{label, filter, vertex_count};
}
}
}
@ -502,7 +517,12 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
(!max_vertex_count || *max_vertex_count >= found_index->vertex_count)) {
// Copy the property filter and then erase it from filters.
const auto prop_filter = *found_index->filter.property_filter;
filter_exprs_for_removal_.insert(found_index->filter.expression);
if (prop_filter.type_ != PropertyFilter::Type::REGEX_MATCH) {
// Remove the original expression from Filter operation only if it's not
// a regex match. In such a case we need to perform the matching even
// after we've scanned the index.
filter_exprs_for_removal_.insert(found_index->filter.expression);
}
filters_.EraseFilter(found_index->filter);
std::vector<Expression *> removed_expressions;
filters_.EraseLabelFilter(node_symbol, found_index->label,
@ -514,6 +534,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
prop_filter.lower_bound_, prop_filter.upper_bound_, graph_view);
} else if (prop_filter.type_ == PropertyFilter::Type::REGEX_MATCH) {
// Generate index scan using the empty string as a lower bound.
Expression *empty_string = ast_storage_->Create<PrimitiveLiteral>("");
auto lower_bound = utils::MakeBoundInclusive(empty_string);
return std::make_unique<ScanAllByLabelPropertyRange>(
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
std::experimental::make_optional(lower_bound),
std::experimental::nullopt, graph_view);
} else {
CHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";
@ -544,8 +573,9 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
template <class TDbAccessor>
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(
std::unique_ptr<LogicalOperator> root_op, const SymbolTable &symbol_table,
TDbAccessor *db) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, db);
AstStorage *ast_storage, TDbAccessor *db) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(&symbol_table, ast_storage,
db);
root_op->Accept(rewriter);
if (rewriter.new_root_) {
// This shouldn't happen in real use case, because IndexLookupRewriter

View File

@ -1105,6 +1105,26 @@ TYPED_TEST(TestPlanner, WhereIndexedLabelPropertyRange) {
}
}
TYPED_TEST(TestPlanner, WherePreferEqualityIndexOverRange) {
// Test MATCH (n :label) WHERE n.property = 42 AND n.property > 0 RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto label = dba.Label("label");
auto property = PROPERTY_PAIR("property");
dba.SetIndexCount(label, property.second, 0);
auto lit_42 = LITERAL(42);
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(AND(EQ(PROPERTY_LOOKUP("n", property), lit_42),
GREATER(PROPERTY_LOOKUP("n", property), LITERAL(0)))),
RETURN("n")));
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelPropertyValue(label, property, lit_42),
ExpectFilter(), ExpectProduce());
}
TYPED_TEST(TestPlanner, UnableToUsePropertyIndex) {
// Test MATCH (n: label) WHERE n.property = n.property RETURN n
FakeDbAccessor dba;
@ -1350,4 +1370,104 @@ TYPED_TEST(TestPlanner, ReturnAsteriskOmitsLambdaSymbols) {
}
}
TYPED_TEST(TestPlanner, FilterRegexMatchIndex) {
// Test MATCH (n :label) WHERE n.prop =~ "regex" RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto prop = dba.Property("prop");
auto label = dba.Label("label");
dba.SetIndexCount(label, 0);
dba.SetIndexCount(label, prop, 0);
auto *regex_match = storage.Create<query::RegexMatch>(
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n", "label"))),
WHERE(regex_match), RETURN("n")));
// We expect that we use index by property range where lower bound is an empty
// string. Filter must still remain in place, because we don't have regex
// based index.
Bound lower_bound(LITERAL(""), Bound::Type::INCLUSIVE);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelPropertyRange(label, prop, lower_bound,
std::experimental::nullopt),
ExpectFilter(), ExpectProduce());
}
TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex) {
// Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop = 42 RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto prop = PROPERTY_PAIR("prop");
auto label = dba.Label("label");
dba.SetIndexCount(label, 0);
dba.SetIndexCount(label, prop.second, 0);
auto *regex_match = storage.Create<query::RegexMatch>(
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
auto *lit_42 = LITERAL(42);
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42))),
RETURN("n")));
// We expect that we use index by property value equal to 42, because that's
// much better than property range for regex matching.
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelPropertyValue(label, prop, lit_42),
ExpectFilter(), ExpectProduce());
}
TYPED_TEST(TestPlanner, FilterRegexMatchPreferEqualityIndex2) {
// Test MATCH (n :label)
// WHERE n.prop =~ "regex" AND n.prop = 42 AND n.prop > 0 RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto prop = PROPERTY_PAIR("prop");
auto label = dba.Label("label");
dba.SetIndexCount(label, 0);
dba.SetIndexCount(label, prop.second, 0);
auto *regex_match = storage.Create<query::RegexMatch>(
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
auto *lit_42 = LITERAL(42);
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(AND(AND(regex_match, EQ(PROPERTY_LOOKUP("n", prop), lit_42)),
GREATER(PROPERTY_LOOKUP("n", prop), LITERAL(0)))),
RETURN("n")));
// We expect that we use index by property value equal to 42, because that's
// much better than property range.
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelPropertyValue(label, prop, lit_42),
ExpectFilter(), ExpectProduce());
}
TYPED_TEST(TestPlanner, FilterRegexMatchPreferRangeIndex) {
// Test MATCH (n :label) WHERE n.prop =~ "regex" AND n.prop > 42 RETURN n
AstStorage storage;
FakeDbAccessor dba;
auto prop = dba.Property("prop");
auto label = dba.Label("label");
dba.SetIndexCount(label, 0);
dba.SetIndexCount(label, prop, 0);
auto *regex_match = storage.Create<query::RegexMatch>(
PROPERTY_LOOKUP("n", prop), LITERAL("regex"));
auto *lit_42 = LITERAL(42);
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"))),
WHERE(AND(regex_match, GREATER(PROPERTY_LOOKUP("n", prop), lit_42))),
RETURN("n")));
// We expect that we use index by property range on a concrete value (42), as
// it is much better than using a range from empty string for regex matching.
Bound lower_bound(lit_42, Bound::Type::EXCLUSIVE);
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelPropertyRange(label, prop, lower_bound,
std::experimental::nullopt),
ExpectFilter(), ExpectProduce());
}
} // namespace

View File

@ -108,7 +108,8 @@ class OpChecker : public BaseOpChecker {
public:
void CheckOp(LogicalOperator &op, const SymbolTable &symbol_table) override {
auto *expected_op = dynamic_cast<TOp *>(&op);
ASSERT_TRUE(expected_op);
ASSERT_TRUE(expected_op) << "op is '" << op.GetTypeInfo().name
<< "' expected '" << TOp::kType.name << "'!";
ExpectOp(*expected_op, symbol_table);
}