Plan ScanAll by using a range index
Summary: Add optional bounds to PropertyFilter and collect them Relation operators (e.g. `<`, `>` ...) should be used to produce scanning the index by a range of values. For that reason, PropertyFilter is extended to store either the equality expression or range bounds. The `AnalyzeFilter` function is extended to look for those operators and see if their top level expression contains a property lookup. If it does, a filter with a bound is generated. Test for property comparison preventing index use Reviewers: florijan, mislav.bradac, buda Reviewed By: florijan Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D529
This commit is contained in:
parent
3d8d0efea5
commit
104fe22f53
@ -31,11 +31,15 @@ class Filters {
|
||||
public:
|
||||
/// Stores the symbols and expression used to filter a property.
|
||||
struct PropertyFilter {
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
/// Set of used symbols in the @c expression.
|
||||
std::unordered_set<Symbol> used_symbols;
|
||||
/// Expression which when evaluated produces the value a property must
|
||||
/// equal.
|
||||
Expression *expression;
|
||||
Expression *expression = nullptr;
|
||||
std::experimental::optional<Bound> lower_bound;
|
||||
std::experimental::optional<Bound> upper_bound;
|
||||
};
|
||||
|
||||
/// All filter expressions that should be generated.
|
||||
|
@ -718,6 +718,41 @@ bool FindBestLabelPropertyIndex(
|
||||
return found;
|
||||
}
|
||||
|
||||
ScanAll *GenScanByIndex(
|
||||
LogicalOperator *last_op, const GraphDbAccessor &db,
|
||||
const Symbol &node_symbol, const MatchContext &context,
|
||||
const std::set<GraphDbTypes::Label> &labels,
|
||||
const std::map<GraphDbTypes::Property, std::vector<Filters::PropertyFilter>>
|
||||
&properties) {
|
||||
debug_assert(!labels.empty(),
|
||||
"Without labels, indexed data cannot be scanned.");
|
||||
// First, try to see if we can use label+property index. If not, use just the
|
||||
// label index (which ought to exist).
|
||||
GraphDbTypes::Label best_label;
|
||||
std::pair<GraphDbTypes::Property, Filters::PropertyFilter> best_property;
|
||||
if (FindBestLabelPropertyIndex(db, labels, properties, node_symbol,
|
||||
context.bound_symbols, best_label,
|
||||
best_property)) {
|
||||
const auto &prop_filter = best_property.second;
|
||||
if (prop_filter.lower_bound || prop_filter.upper_bound) {
|
||||
return new ScanAllByLabelPropertyRange(
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
|
||||
best_property.first, prop_filter.lower_bound, prop_filter.upper_bound,
|
||||
context.graph_view);
|
||||
} else {
|
||||
debug_assert(
|
||||
prop_filter.expression,
|
||||
"Property filter should either have bounds or an expression.");
|
||||
return new ScanAllByLabelPropertyValue(
|
||||
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
|
||||
best_property.first, prop_filter.expression, context.graph_view);
|
||||
}
|
||||
}
|
||||
auto label = FindBestLabelIndex(db, labels);
|
||||
return new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
|
||||
node_symbol, label, context.graph_view);
|
||||
}
|
||||
|
||||
LogicalOperator *PlanMatching(const Matching &matching,
|
||||
LogicalOperator *input_op,
|
||||
PlanningContext &planning_ctx,
|
||||
@ -743,30 +778,14 @@ LogicalOperator *PlanMatching(const Matching &matching,
|
||||
last_op = new ScanAll(std::shared_ptr<LogicalOperator>(last_op),
|
||||
node1_symbol, context.graph_view);
|
||||
} else {
|
||||
// With labels, we can scan indexed data. First, try to see if we can
|
||||
// use label+property index. If not, use just the label index (which
|
||||
// ought to exist).
|
||||
GraphDbTypes::Label best_label;
|
||||
std::pair<GraphDbTypes::Property, Filters::PropertyFilter>
|
||||
best_property;
|
||||
// With labels, we can scan indexed data.
|
||||
auto properties =
|
||||
FindOr(matching.filters.property_filters(), node1_symbol,
|
||||
std::map<GraphDbTypes::Property,
|
||||
std::vector<Filters::PropertyFilter>>())
|
||||
.first;
|
||||
if (FindBestLabelPropertyIndex(planning_ctx.db, labels, properties,
|
||||
node1_symbol, bound_symbols, best_label,
|
||||
best_property)) {
|
||||
last_op = new ScanAllByLabelPropertyValue(
|
||||
std::shared_ptr<LogicalOperator>(last_op), node1_symbol,
|
||||
best_label, best_property.first, best_property.second.expression,
|
||||
context.graph_view);
|
||||
} else {
|
||||
auto label = FindBestLabelIndex(planning_ctx.db, labels);
|
||||
last_op =
|
||||
new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
|
||||
node1_symbol, label, context.graph_view);
|
||||
}
|
||||
last_op = GenScanByIndex(last_op, planning_ctx.db, node1_symbol,
|
||||
context, labels, properties);
|
||||
}
|
||||
context.new_symbols.emplace_back(node1_symbol);
|
||||
last_op = GenFilters(last_op, bound_symbols, all_filters, storage);
|
||||
@ -855,6 +874,7 @@ auto GenMerge(query::Merge &merge, LogicalOperator *input_op,
|
||||
// and properties to be used with indexing. Note that all filters are never
|
||||
// updated here, but only labels and properties are.
|
||||
void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
|
||||
auto *&ident) {
|
||||
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
|
||||
@ -870,6 +890,28 @@ void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
|
||||
.emplace_back(PropertyFilter{collector.symbols_, val_expr});
|
||||
}
|
||||
};
|
||||
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
|
||||
PropertyLookup *prop_lookup = nullptr;
|
||||
Identifier *ident = nullptr;
|
||||
if (get_property_lookup(expr1, prop_lookup, ident)) {
|
||||
// n.prop > value
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
expr2->Accept(collector);
|
||||
auto prop_filter = PropertyFilter{collector.symbols_};
|
||||
prop_filter.lower_bound = Bound{expr2, bound_type};
|
||||
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
|
||||
.emplace_back(std::move(prop_filter));
|
||||
}
|
||||
if (get_property_lookup(expr2, prop_lookup, ident)) {
|
||||
// value > n.prop
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
expr1->Accept(collector);
|
||||
auto prop_filter = PropertyFilter{collector.symbols_};
|
||||
prop_filter.upper_bound = Bound{expr1, bound_type};
|
||||
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
|
||||
.emplace_back(std::move(prop_filter));
|
||||
}
|
||||
};
|
||||
// We are only interested to see the insides of And, because Or prevents
|
||||
// indexing since any labels and properties found there may be optional.
|
||||
if (auto *and_op = dynamic_cast<AndOperator *>(expr)) {
|
||||
@ -898,9 +940,25 @@ void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
|
||||
add_prop_equal(eq->expression1_, eq->expression2_);
|
||||
// And reversed.
|
||||
add_prop_equal(eq->expression2_, eq->expression1_);
|
||||
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
|
||||
add_prop_greater(gt->expression1_, gt->expression2_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
|
||||
add_prop_greater(ge->expression1_, ge->expression2_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
|
||||
// Like greater, but in reverse.
|
||||
add_prop_greater(lt->expression2_, lt->expression1_,
|
||||
Bound::Type::EXCLUSIVE);
|
||||
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
|
||||
// Like greater equal, but in reverse.
|
||||
add_prop_greater(le->expression2_, le->expression1_,
|
||||
Bound::Type::INCLUSIVE);
|
||||
}
|
||||
// TODO: Collect potential property indexing by range.
|
||||
return;
|
||||
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
|
||||
// indexing by range. Note, that the generated Ast uses AND for chained
|
||||
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
|
||||
// as `expr1 < n.prop AND n.prop < expr2`.
|
||||
}
|
||||
|
||||
void Filters::CollectPatternFilters(Pattern &pattern,
|
||||
|
@ -462,6 +462,12 @@ auto GetMerge(AstTreeStorage &storage, Pattern *pattern, OnMatch on_match,
|
||||
#define ADD(expr1, expr2) \
|
||||
storage.Create<query::AdditionOperator>((expr1), (expr2))
|
||||
#define LESS(expr1, expr2) storage.Create<query::LessOperator>((expr1), (expr2))
|
||||
#define LESS_EQ(expr1, expr2) \
|
||||
storage.Create<query::LessEqualOperator>((expr1), (expr2))
|
||||
#define GREATER(expr1, expr2) \
|
||||
storage.Create<query::GreaterOperator>((expr1), (expr2))
|
||||
#define GREATER_EQ(expr1, expr2) \
|
||||
storage.Create<query::GreaterEqualOperator>((expr1), (expr2))
|
||||
#define SUM(expr) \
|
||||
storage.Create<query::Aggregation>((expr), query::Aggregation::Op::SUM)
|
||||
#define COUNT(expr) \
|
||||
|
@ -19,6 +19,7 @@ using query::Symbol;
|
||||
using query::SymbolTable;
|
||||
using query::SymbolGenerator;
|
||||
using Direction = query::EdgeAtom::Direction;
|
||||
using Bound = ScanAllByLabelPropertyRange::Bound;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -120,8 +121,6 @@ using ExpectCreateExpand = OpChecker<CreateExpand>;
|
||||
using ExpectDelete = OpChecker<Delete>;
|
||||
using ExpectScanAll = OpChecker<ScanAll>;
|
||||
using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>;
|
||||
using ExpectScanAllByLabelPropertyRange =
|
||||
OpChecker<ScanAllByLabelPropertyRange>;
|
||||
using ExpectExpand = OpChecker<Expand>;
|
||||
using ExpectFilter = OpChecker<Filter>;
|
||||
using ExpectProduce = OpChecker<Produce>;
|
||||
@ -233,6 +232,41 @@ class ExpectScanAllByLabelPropertyValue
|
||||
query::Expression *expression_;
|
||||
};
|
||||
|
||||
class ExpectScanAllByLabelPropertyRange
|
||||
: public OpChecker<ScanAllByLabelPropertyRange> {
|
||||
public:
|
||||
ExpectScanAllByLabelPropertyRange(
|
||||
GraphDbTypes::Label label, GraphDbTypes::Property property,
|
||||
std::experimental::optional<Bound> lower_bound,
|
||||
std::experimental::optional<Bound> upper_bound)
|
||||
: label_(label),
|
||||
property_(property),
|
||||
lower_bound_(lower_bound),
|
||||
upper_bound_(upper_bound) {}
|
||||
|
||||
void ExpectOp(ScanAllByLabelPropertyRange &scan_all,
|
||||
const SymbolTable &) override {
|
||||
EXPECT_EQ(scan_all.label(), label_);
|
||||
EXPECT_EQ(scan_all.property(), property_);
|
||||
if (lower_bound_) {
|
||||
ASSERT_TRUE(scan_all.lower_bound());
|
||||
EXPECT_EQ(scan_all.lower_bound()->value(), lower_bound_->value());
|
||||
EXPECT_EQ(scan_all.lower_bound()->type(), lower_bound_->type());
|
||||
}
|
||||
if (upper_bound_) {
|
||||
ASSERT_TRUE(scan_all.upper_bound());
|
||||
EXPECT_EQ(scan_all.upper_bound()->value(), upper_bound_->value());
|
||||
EXPECT_EQ(scan_all.upper_bound()->type(), upper_bound_->type());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
GraphDbTypes::Label label_;
|
||||
GraphDbTypes::Property property_;
|
||||
std::experimental::optional<Bound> lower_bound_;
|
||||
std::experimental::optional<Bound> upper_bound_;
|
||||
};
|
||||
|
||||
class ExpectCreateIndex : public OpChecker<CreateIndex> {
|
||||
public:
|
||||
ExpectCreateIndex(GraphDbTypes::Label label, GraphDbTypes::Property property)
|
||||
@ -1100,4 +1134,75 @@ TEST(TestLogicalPlanner, MultiPropertyIndexScan) {
|
||||
ExpectFilter(), ExpectProduce());
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
|
||||
// Test MATCH (n :label) WHERE n.property REL_OP 42 RETURN n
|
||||
// REL_OP is one of: `<`, `<=`, `>`, `>=`
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
auto label = dba->label("label");
|
||||
auto property = dba->property("property");
|
||||
dba->BuildIndex(label, property);
|
||||
dba = dbms.active();
|
||||
AstTreeStorage storage;
|
||||
auto lit_42 = LITERAL(42);
|
||||
auto n_prop = PROPERTY_LOOKUP("n", property);
|
||||
auto check_planned_range = [&label, &property, &dba](
|
||||
const auto &rel_expr, auto lower_bound, auto upper_bound) {
|
||||
// Shadow the first storage, so that the query is created in this one.
|
||||
AstTreeStorage storage;
|
||||
QUERY(MATCH(PATTERN(NODE("n", label))), WHERE(rel_expr), RETURN("n"));
|
||||
auto symbol_table = MakeSymbolTable(*storage.query());
|
||||
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
|
||||
CheckPlan(*plan, symbol_table,
|
||||
ExpectScanAllByLabelPropertyRange(label, property, lower_bound,
|
||||
upper_bound),
|
||||
ExpectFilter(), ExpectProduce());
|
||||
};
|
||||
{
|
||||
// Test relation operators which form an upper bound for range.
|
||||
std::vector<std::pair<query::Expression *, Bound::Type>> upper_bound_rel_op{
|
||||
std::make_pair(LESS(n_prop, lit_42), Bound::Type::EXCLUSIVE),
|
||||
std::make_pair(LESS_EQ(n_prop, lit_42), Bound::Type::INCLUSIVE),
|
||||
std::make_pair(GREATER(lit_42, n_prop), Bound::Type::EXCLUSIVE),
|
||||
std::make_pair(GREATER_EQ(lit_42, n_prop), Bound::Type::INCLUSIVE)};
|
||||
for (const auto &rel_op : upper_bound_rel_op) {
|
||||
check_planned_range(rel_op.first, std::experimental::nullopt,
|
||||
Bound(lit_42, rel_op.second));
|
||||
}
|
||||
}
|
||||
{
|
||||
// Test relation operators which form a lower bound for range.
|
||||
std::vector<std::pair<query::Expression *, Bound::Type>> lower_bound_rel_op{
|
||||
std::make_pair(LESS(lit_42, n_prop), Bound::Type::EXCLUSIVE),
|
||||
std::make_pair(LESS_EQ(lit_42, n_prop), Bound::Type::INCLUSIVE),
|
||||
std::make_pair(GREATER(n_prop, lit_42), Bound::Type::EXCLUSIVE),
|
||||
std::make_pair(GREATER_EQ(n_prop, lit_42), Bound::Type::INCLUSIVE)};
|
||||
for (const auto &rel_op : lower_bound_rel_op) {
|
||||
check_planned_range(rel_op.first, Bound(lit_42, rel_op.second),
|
||||
std::experimental::nullopt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TestLogicalPlanner, UnableToUsePropertyIndex) {
|
||||
// Test MATCH (n: label) WHERE n.property = n.property RETURN n
|
||||
Dbms dbms;
|
||||
auto dba = dbms.active();
|
||||
auto label = dba->label("label");
|
||||
auto property = dba->property("property");
|
||||
dba->BuildIndex(label, property);
|
||||
dba = dbms.active();
|
||||
AstTreeStorage storage;
|
||||
QUERY(
|
||||
MATCH(PATTERN(NODE("n", label))),
|
||||
WHERE(EQ(PROPERTY_LOOKUP("n", property), PROPERTY_LOOKUP("n", property))),
|
||||
RETURN("n"));
|
||||
auto symbol_table = MakeSymbolTable(*storage.query());
|
||||
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
|
||||
// We can only get ScanAllByLabelIndex, because we are comparing properties
|
||||
// with those on the same node.
|
||||
CheckPlan(*plan, symbol_table, ExpectScanAllByLabel(), ExpectFilter(),
|
||||
ExpectProduce());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user