Plan ScanAll by using a range index

Summary:
Add optional bounds to PropertyFilter and collect them

Relation operators (e.g. `<`, `>` ...) should be used to produce
scanning the index by a range of values. For that reason, PropertyFilter
is extended to store either the equality expression or range bounds.
The `AnalyzeFilter` function is extended to look for those operators and
see if their top level expression contains a property lookup. If it
does, a filter with a bound is generated.

Test for property comparison preventing index use

Reviewers: florijan, mislav.bradac, buda

Reviewed By: florijan

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D529
This commit is contained in:
Teon Banek 2017-07-04 10:37:39 +02:00
parent 3d8d0efea5
commit 104fe22f53
4 changed files with 197 additions and 24 deletions

View File

@ -31,11 +31,15 @@ class Filters {
public:
/// Stores the symbols and expression used to filter a property.
struct PropertyFilter {
using Bound = ScanAllByLabelPropertyRange::Bound;
/// Set of used symbols in the @c expression.
std::unordered_set<Symbol> used_symbols;
/// Expression which when evaluated produces the value a property must
/// equal.
Expression *expression;
Expression *expression = nullptr;
std::experimental::optional<Bound> lower_bound;
std::experimental::optional<Bound> upper_bound;
};
/// All filter expressions that should be generated.

View File

@ -718,6 +718,41 @@ bool FindBestLabelPropertyIndex(
return found;
}
ScanAll *GenScanByIndex(
LogicalOperator *last_op, const GraphDbAccessor &db,
const Symbol &node_symbol, const MatchContext &context,
const std::set<GraphDbTypes::Label> &labels,
const std::map<GraphDbTypes::Property, std::vector<Filters::PropertyFilter>>
&properties) {
debug_assert(!labels.empty(),
"Without labels, indexed data cannot be scanned.");
// First, try to see if we can use label+property index. If not, use just the
// label index (which ought to exist).
GraphDbTypes::Label best_label;
std::pair<GraphDbTypes::Property, Filters::PropertyFilter> best_property;
if (FindBestLabelPropertyIndex(db, labels, properties, node_symbol,
context.bound_symbols, best_label,
best_property)) {
const auto &prop_filter = best_property.second;
if (prop_filter.lower_bound || prop_filter.upper_bound) {
return new ScanAllByLabelPropertyRange(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.lower_bound, prop_filter.upper_bound,
context.graph_view);
} else {
debug_assert(
prop_filter.expression,
"Property filter should either have bounds or an expression.");
return new ScanAllByLabelPropertyValue(
std::shared_ptr<LogicalOperator>(last_op), node_symbol, best_label,
best_property.first, prop_filter.expression, context.graph_view);
}
}
auto label = FindBestLabelIndex(db, labels);
return new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
node_symbol, label, context.graph_view);
}
LogicalOperator *PlanMatching(const Matching &matching,
LogicalOperator *input_op,
PlanningContext &planning_ctx,
@ -743,30 +778,14 @@ LogicalOperator *PlanMatching(const Matching &matching,
last_op = new ScanAll(std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, context.graph_view);
} else {
// With labels, we can scan indexed data. First, try to see if we can
// use label+property index. If not, use just the label index (which
// ought to exist).
GraphDbTypes::Label best_label;
std::pair<GraphDbTypes::Property, Filters::PropertyFilter>
best_property;
// With labels, we can scan indexed data.
auto properties =
FindOr(matching.filters.property_filters(), node1_symbol,
std::map<GraphDbTypes::Property,
std::vector<Filters::PropertyFilter>>())
.first;
if (FindBestLabelPropertyIndex(planning_ctx.db, labels, properties,
node1_symbol, bound_symbols, best_label,
best_property)) {
last_op = new ScanAllByLabelPropertyValue(
std::shared_ptr<LogicalOperator>(last_op), node1_symbol,
best_label, best_property.first, best_property.second.expression,
context.graph_view);
} else {
auto label = FindBestLabelIndex(planning_ctx.db, labels);
last_op =
new ScanAllByLabel(std::shared_ptr<LogicalOperator>(last_op),
node1_symbol, label, context.graph_view);
}
last_op = GenScanByIndex(last_op, planning_ctx.db, node1_symbol,
context, labels, properties);
}
context.new_symbols.emplace_back(node1_symbol);
last_op = GenFilters(last_op, bound_symbols, all_filters, storage);
@ -855,6 +874,7 @@ auto GenMerge(query::Merge &merge, LogicalOperator *input_op,
// and properties to be used with indexing. Note that all filters are never
// updated here, but only labels and properties are.
void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
using Bound = ScanAllByLabelPropertyRange::Bound;
auto get_property_lookup = [](auto *maybe_lookup, auto *&prop_lookup,
auto *&ident) {
return (prop_lookup = dynamic_cast<PropertyLookup *>(maybe_lookup)) &&
@ -870,6 +890,28 @@ void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
.emplace_back(PropertyFilter{collector.symbols_, val_expr});
}
};
auto add_prop_greater = [&](auto *expr1, auto *expr2, auto bound_type) {
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (get_property_lookup(expr1, prop_lookup, ident)) {
// n.prop > value
UsedSymbolsCollector collector(symbol_table);
expr2->Accept(collector);
auto prop_filter = PropertyFilter{collector.symbols_};
prop_filter.lower_bound = Bound{expr2, bound_type};
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
.emplace_back(std::move(prop_filter));
}
if (get_property_lookup(expr2, prop_lookup, ident)) {
// value > n.prop
UsedSymbolsCollector collector(symbol_table);
expr1->Accept(collector);
auto prop_filter = PropertyFilter{collector.symbols_};
prop_filter.upper_bound = Bound{expr1, bound_type};
property_filters_[symbol_table.at(*ident)][prop_lookup->property_]
.emplace_back(std::move(prop_filter));
}
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
if (auto *and_op = dynamic_cast<AndOperator *>(expr)) {
@ -898,9 +940,25 @@ void Filters::AnalyzeFilter(Expression *expr, const SymbolTable &symbol_table) {
add_prop_equal(eq->expression1_, eq->expression2_);
// And reversed.
add_prop_equal(eq->expression2_, eq->expression1_);
} else if (auto *gt = dynamic_cast<GreaterOperator *>(expr)) {
add_prop_greater(gt->expression1_, gt->expression2_,
Bound::Type::EXCLUSIVE);
} else if (auto *ge = dynamic_cast<GreaterEqualOperator *>(expr)) {
add_prop_greater(ge->expression1_, ge->expression2_,
Bound::Type::INCLUSIVE);
} else if (auto *lt = dynamic_cast<LessOperator *>(expr)) {
// Like greater, but in reverse.
add_prop_greater(lt->expression2_, lt->expression1_,
Bound::Type::EXCLUSIVE);
} else if (auto *le = dynamic_cast<LessEqualOperator *>(expr)) {
// Like greater equal, but in reverse.
add_prop_greater(le->expression2_, le->expression1_,
Bound::Type::INCLUSIVE);
}
// TODO: Collect potential property indexing by range.
return;
// TODO: Collect comparisons like `expr1 < n.prop < expr2` for potential
// indexing by range. Note, that the generated Ast uses AND for chained
// relation operators. Therefore, `expr1 < n.prop < expr2` will be represented
// as `expr1 < n.prop AND n.prop < expr2`.
}
void Filters::CollectPatternFilters(Pattern &pattern,

View File

@ -462,6 +462,12 @@ auto GetMerge(AstTreeStorage &storage, Pattern *pattern, OnMatch on_match,
#define ADD(expr1, expr2) \
storage.Create<query::AdditionOperator>((expr1), (expr2))
#define LESS(expr1, expr2) storage.Create<query::LessOperator>((expr1), (expr2))
#define LESS_EQ(expr1, expr2) \
storage.Create<query::LessEqualOperator>((expr1), (expr2))
#define GREATER(expr1, expr2) \
storage.Create<query::GreaterOperator>((expr1), (expr2))
#define GREATER_EQ(expr1, expr2) \
storage.Create<query::GreaterEqualOperator>((expr1), (expr2))
#define SUM(expr) \
storage.Create<query::Aggregation>((expr), query::Aggregation::Op::SUM)
#define COUNT(expr) \

View File

@ -19,6 +19,7 @@ using query::Symbol;
using query::SymbolTable;
using query::SymbolGenerator;
using Direction = query::EdgeAtom::Direction;
using Bound = ScanAllByLabelPropertyRange::Bound;
namespace {
@ -120,8 +121,6 @@ using ExpectCreateExpand = OpChecker<CreateExpand>;
using ExpectDelete = OpChecker<Delete>;
using ExpectScanAll = OpChecker<ScanAll>;
using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>;
using ExpectScanAllByLabelPropertyRange =
OpChecker<ScanAllByLabelPropertyRange>;
using ExpectExpand = OpChecker<Expand>;
using ExpectFilter = OpChecker<Filter>;
using ExpectProduce = OpChecker<Produce>;
@ -233,6 +232,41 @@ class ExpectScanAllByLabelPropertyValue
query::Expression *expression_;
};
class ExpectScanAllByLabelPropertyRange
: public OpChecker<ScanAllByLabelPropertyRange> {
public:
ExpectScanAllByLabelPropertyRange(
GraphDbTypes::Label label, GraphDbTypes::Property property,
std::experimental::optional<Bound> lower_bound,
std::experimental::optional<Bound> upper_bound)
: label_(label),
property_(property),
lower_bound_(lower_bound),
upper_bound_(upper_bound) {}
void ExpectOp(ScanAllByLabelPropertyRange &scan_all,
const SymbolTable &) override {
EXPECT_EQ(scan_all.label(), label_);
EXPECT_EQ(scan_all.property(), property_);
if (lower_bound_) {
ASSERT_TRUE(scan_all.lower_bound());
EXPECT_EQ(scan_all.lower_bound()->value(), lower_bound_->value());
EXPECT_EQ(scan_all.lower_bound()->type(), lower_bound_->type());
}
if (upper_bound_) {
ASSERT_TRUE(scan_all.upper_bound());
EXPECT_EQ(scan_all.upper_bound()->value(), upper_bound_->value());
EXPECT_EQ(scan_all.upper_bound()->type(), upper_bound_->type());
}
}
private:
GraphDbTypes::Label label_;
GraphDbTypes::Property property_;
std::experimental::optional<Bound> lower_bound_;
std::experimental::optional<Bound> upper_bound_;
};
class ExpectCreateIndex : public OpChecker<CreateIndex> {
public:
ExpectCreateIndex(GraphDbTypes::Label label, GraphDbTypes::Property property)
@ -1100,4 +1134,75 @@ TEST(TestLogicalPlanner, MultiPropertyIndexScan) {
ExpectFilter(), ExpectProduce());
}
TEST(TestLogicalPlanner, WhereIndexedLabelPropertyRange) {
// Test MATCH (n :label) WHERE n.property REL_OP 42 RETURN n
// REL_OP is one of: `<`, `<=`, `>`, `>=`
Dbms dbms;
auto dba = dbms.active();
auto label = dba->label("label");
auto property = dba->property("property");
dba->BuildIndex(label, property);
dba = dbms.active();
AstTreeStorage storage;
auto lit_42 = LITERAL(42);
auto n_prop = PROPERTY_LOOKUP("n", property);
auto check_planned_range = [&label, &property, &dba](
const auto &rel_expr, auto lower_bound, auto upper_bound) {
// Shadow the first storage, so that the query is created in this one.
AstTreeStorage storage;
QUERY(MATCH(PATTERN(NODE("n", label))), WHERE(rel_expr), RETURN("n"));
auto symbol_table = MakeSymbolTable(*storage.query());
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
CheckPlan(*plan, symbol_table,
ExpectScanAllByLabelPropertyRange(label, property, lower_bound,
upper_bound),
ExpectFilter(), ExpectProduce());
};
{
// Test relation operators which form an upper bound for range.
std::vector<std::pair<query::Expression *, Bound::Type>> upper_bound_rel_op{
std::make_pair(LESS(n_prop, lit_42), Bound::Type::EXCLUSIVE),
std::make_pair(LESS_EQ(n_prop, lit_42), Bound::Type::INCLUSIVE),
std::make_pair(GREATER(lit_42, n_prop), Bound::Type::EXCLUSIVE),
std::make_pair(GREATER_EQ(lit_42, n_prop), Bound::Type::INCLUSIVE)};
for (const auto &rel_op : upper_bound_rel_op) {
check_planned_range(rel_op.first, std::experimental::nullopt,
Bound(lit_42, rel_op.second));
}
}
{
// Test relation operators which form a lower bound for range.
std::vector<std::pair<query::Expression *, Bound::Type>> lower_bound_rel_op{
std::make_pair(LESS(lit_42, n_prop), Bound::Type::EXCLUSIVE),
std::make_pair(LESS_EQ(lit_42, n_prop), Bound::Type::INCLUSIVE),
std::make_pair(GREATER(n_prop, lit_42), Bound::Type::EXCLUSIVE),
std::make_pair(GREATER_EQ(n_prop, lit_42), Bound::Type::INCLUSIVE)};
for (const auto &rel_op : lower_bound_rel_op) {
check_planned_range(rel_op.first, Bound(lit_42, rel_op.second),
std::experimental::nullopt);
}
}
}
TEST(TestLogicalPlanner, UnableToUsePropertyIndex) {
// Test MATCH (n: label) WHERE n.property = n.property RETURN n
Dbms dbms;
auto dba = dbms.active();
auto label = dba->label("label");
auto property = dba->property("property");
dba->BuildIndex(label, property);
dba = dbms.active();
AstTreeStorage storage;
QUERY(
MATCH(PATTERN(NODE("n", label))),
WHERE(EQ(PROPERTY_LOOKUP("n", property), PROPERTY_LOOKUP("n", property))),
RETURN("n"));
auto symbol_table = MakeSymbolTable(*storage.query());
auto plan = MakeLogicalPlan<RuleBasedPlanner>(storage, symbol_table, *dba);
// We can only get ScanAllByLabelIndex, because we are comparing properties
// with those on the same node.
CheckPlan(*plan, symbol_table, ExpectScanAllByLabel(), ExpectFilter(),
ExpectProduce());
}
} // namespace