Add plan for label+property indexed node where property is not null (#2)

Replace ScanAll + Filter with a ScanAll variant performing label and property
lookup.

Co-authored-by: jseljan <josip.seljan@memgraph.io>
This commit is contained in:
Josip Seljan 2020-10-01 13:22:21 +02:00 committed by GitHub
parent c12e4a49b1
commit f7f861ca71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 238 additions and 2 deletions

View File

@ -234,6 +234,11 @@ class DbAccessor final {
return VerticesIterable(accessor_->Vertices(label, view));
}
VerticesIterable Vertices(storage::View view, storage::LabelId label,
storage::PropertyId property) {
return VerticesIterable(accessor_->Vertices(label, property, view));
}
VerticesIterable Vertices(storage::View view, storage::LabelId label,
storage::PropertyId property,
const storage::PropertyValue &value) {

View File

@ -44,6 +44,7 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
static constexpr double kScanAllByLabel{1.1};
static constexpr double MakeScanAllByLabelPropertyValue{1.1};
static constexpr double MakeScanAllByLabelPropertyRange{1.1};
static constexpr double MakeScanAllByLabelProperty{1.1};
static constexpr double kExpand{2.0};
static constexpr double kExpandVariable{3.0};
static constexpr double kFilter{1.5};
@ -134,6 +135,14 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
return true;
}
bool PostVisit(ScanAllByLabelProperty &logical_op) override {
const auto factor =
db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
cardinality_ *= factor;
IncrementCost(CostParam::MakeScanAllByLabelProperty);
return true;
}
// TODO: Cost estimate ScanAllById?
// For the given op first increments the cardinality and then cost.

View File

@ -499,6 +499,28 @@ UniqueCursorPtr ScanAllByLabelPropertyValue::MakeCursor(
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
}
ScanAllByLabelProperty::ScanAllByLabelProperty(
const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol,
storage::LabelId label, storage::PropertyId property,
const std::string &property_name, storage::View view)
: ScanAll(input, output_symbol, view),
label_(label),
property_(property),
property_name_(property_name) {}
ACCEPT_WITH_INPUT(ScanAllByLabelProperty)
UniqueCursorPtr ScanAllByLabelProperty::MakeCursor(
utils::MemoryResource *mem) const {
auto vertices = [this](Frame &frame, ExecutionContext &context) {
auto *db = context.db_accessor;
return std::make_optional(db->Vertices(view_, label_, property_));
};
return MakeUniqueCursorPtr<ScanAllCursor<decltype(vertices)>>(
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
}
ScanAllById::ScanAllById(const std::shared_ptr<LogicalOperator> &input,
Symbol output_symbol, Expression *expression,
storage::View view)

View File

@ -90,6 +90,7 @@ class ScanAll;
class ScanAllByLabel;
class ScanAllByLabelPropertyRange;
class ScanAllByLabelPropertyValue;
class ScanAllByLabelProperty;
class ScanAllById;
class Expand;
class ExpandVariable;
@ -118,7 +119,8 @@ class CallProcedure;
using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor<
Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel,
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue, ScanAllById,
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue,
ScanAllByLabelProperty, ScanAllById,
Expand, ExpandVariable, ConstructNamedPath, Filter, Produce, Delete,
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels,
EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, OrderBy, Merge,
@ -732,6 +734,37 @@ property value.
(:serialize (:slk))
(:clone))
(lcp:define-class scan-all-by-label-property (scan-all)
((label "::storage::LabelId" :scope :public)
(property "::storage::PropertyId" :scope :public)
(property-name "std::string" :scope :public)
(expression "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")))
(:documentation
"Behaves like @c ScanAll, but this operator produces only vertices with
given label and property.
@sa ScanAll
@sa ScanAllByLabelPropertyRange
@sa ScanAllByLabelPropertyValue")
(:public
#>cpp
ScanAllByLabelProperty() {}
ScanAllByLabelProperty(const std::shared_ptr<LogicalOperator> &input,
Symbol output_symbol, storage::LabelId label,
storage::PropertyId property,
const std::string &property_name,
storage::View view = storage::View::OLD);
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
cpp<#)
(:serialize (:slk))
(:clone))
(lcp:define-class scan-all-by-id (scan-all)
((expression "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer

View File

@ -180,6 +180,17 @@ PropertyFilter::PropertyFilter(
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
PropertyFilter::PropertyFilter(const Symbol &symbol, PropertyIx property,
Type type)
: symbol_(symbol), property_(property), type_(type) {
// As this constructor is used for property filters where
// we don't have to evaluate the filter expression, we set
// the is_symbol_in_value_ to false, although the filter
// expression may actually contain the symbol whose property
// we may be looking up.
}
IdFilter::IdFilter(const SymbolTable &symbol_table, const Symbol &symbol,
Expression *value)
: symbol_(symbol), value_(value) {
@ -445,6 +456,37 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
}
return false;
};
// Checks whether maybe_prop_not_null_check is the null check on a property,
// ("prop IS NOT NULL"), stores it as a PropertyFilter if it is, and returns
// true. If it isn't returns false.
auto add_prop_is_not_null_check = [&](auto *maybe_is_not_null_check) -> bool {
// Strip away the outer NOT operator, and figure out
// whether the inner expression is of the form "prop IS NULL"
if (!maybe_is_not_null_check) {
return false;
}
auto *maybe_is_null_check =
utils::Downcast<IsNullOperator>(maybe_is_not_null_check->expression_);
if (!maybe_is_null_check) {
return false;
}
PropertyLookup *prop_lookup = nullptr;
Identifier *ident = nullptr;
if (!get_property_lookup(maybe_is_null_check->expression_, prop_lookup,
ident)) {
return false;
}
auto filter = make_filter(FilterInfo::Type::Property);
filter.property_filter =
PropertyFilter(symbol_table.at(*ident), prop_lookup->property_,
PropertyFilter::Type::IS_NOT_NULL);
all_filters_.emplace_back(filter);
return true;
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
DCHECK(!utils::IsSubtype(*expr, AndOperator::kType))
@ -515,6 +557,10 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
if (!add_prop_in_list(in->expression1_, in->expression2_)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *is_not_null = utils::Downcast<NotOperator>(expr)) {
if (!add_prop_is_not_null_check(is_not_null)) {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else {
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}

View File

@ -96,7 +96,7 @@ class PropertyFilter {
/// Depending on type, this PropertyFilter may be a value equality, regex
/// matched value or a range with lower and (or) upper bounds, IN list filter.
enum class Type { EQUAL, REGEX_MATCH, RANGE, IN };
enum class Type { EQUAL, REGEX_MATCH, RANGE, IN, IS_NOT_NULL };
/// Construct with Expression being the equality or regex match check.
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx, Expression *,
@ -104,6 +104,11 @@ class PropertyFilter {
/// Construct the range based filter.
PropertyFilter(const SymbolTable &, const Symbol &, PropertyIx,
const std::optional<Bound> &, const std::optional<Bound> &);
/// Construct a filter without an expression that produces a value.
/// Used for the "PROP IS NOT NULL" filter, and can be used for any
/// property filter that doesn't need to use an expression to produce
/// values that should be filtered further.
PropertyFilter(const Symbol &, PropertyIx, Type);
/// Symbol whose property is looked up.
Symbol symbol_;

View File

@ -70,6 +70,16 @@ bool PlanPrinter::PreVisit(query::plan::ScanAllByLabelPropertyRange &op) {
return true;
}
bool PlanPrinter::PreVisit(query::plan::ScanAllByLabelProperty &op) {
WithPrintLn([&](auto &out) {
out << "* ScanAllByLabelProperty"
<< " (" << op.output_symbol_.name() << " :"
<< dba_->LabelToName(op.label_) << " {"
<< dba_->PropertyToName(op.property_) << "})";
});
return true;
}
bool PlanPrinter::PreVisit(ScanAllById &op) {
WithPrintLn([&](auto &out) {
out << "* ScanAllById"
@ -466,6 +476,20 @@ bool PlanToJsonVisitor::PreVisit(ScanAllByLabelPropertyValue &op) {
return false;
}
bool PlanToJsonVisitor::PreVisit(ScanAllByLabelProperty &op) {
json self;
self["name"] = "ScanAllByLabelProperty";
self["label"] = ToJson(op.label_, *dba_);
self["property"] = ToJson(op.property_, *dba_);
self["output_symbol"] = ToJson(op.output_symbol_);
op.input_->Accept(*this);
self["input"] = PopOutput();
output_ = std::move(self);
return false;
}
bool PlanToJsonVisitor::PreVisit(ScanAllById &op) {
json self;
self["name"] = "ScanAllById";

View File

@ -58,6 +58,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor {
bool PreVisit(ScanAllByLabel &) override;
bool PreVisit(ScanAllByLabelPropertyValue &) override;
bool PreVisit(ScanAllByLabelPropertyRange &) override;
bool PreVisit(ScanAllByLabelProperty &) override;
bool PreVisit(ScanAllById &) override;
bool PreVisit(Expand &) override;
@ -183,6 +184,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor {
bool PreVisit(ScanAllByLabel &) override;
bool PreVisit(ScanAllByLabelPropertyRange &) override;
bool PreVisit(ScanAllByLabelPropertyValue &) override;
bool PreVisit(ScanAllByLabelProperty &) override;
bool PreVisit(ScanAllById &) override;
bool PreVisit(Produce &) override;

View File

@ -258,6 +258,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
return true;
}
bool PreVisit(ScanAllByLabelProperty &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ScanAllByLabelProperty &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ScanAllById &op) override {
prev_ops_.push_back(&op);
return true;
@ -626,6 +635,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
std::move(unwind_operator), node_symbol,
GetLabel(found_index->label), GetProperty(prop_filter.property_),
prop_filter.property_.name, expression, view);
} else if (prop_filter.type_ == PropertyFilter::Type::IS_NOT_NULL) {
return std::make_unique<ScanAllByLabelProperty>(
input, node_symbol, GetLabel(found_index->label),
GetProperty(prop_filter.property_), prop_filter.property_.name,
view);
} else {
CHECK(prop_filter.value_) << "Property filter should either have "
"bounds or a value expression.";

View File

@ -731,6 +731,61 @@ TYPED_TEST(TestPlanner, MatchWhereBeforeExpand) {
ExpectExpand(), ExpectProduce());
}
TYPED_TEST(TestPlanner, MatchFilterPropIsNotNull) {
FakeDbAccessor dba;
auto label = dba.Label("label");
auto prop = PROPERTY_PAIR("prop");
dba.SetIndexCount(label, 1);
dba.SetIndexCount(label, prop.second, 1);
AstStorage storage;
{
// Test MATCH (n :label) -[r]- (m) WHERE n.prop IS NOT NULL RETURN n
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"), EDGE("r"), NODE("m"))),
WHERE(NOT(IS_NULL(PROPERTY_LOOKUP("n", prop)))), RETURN("n")));
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
// We expect ScanAllByLabelProperty to come instead of ScanAll > Filter.
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelProperty(label, prop), ExpectExpand(),
ExpectProduce());
}
{
// Test MATCH (n :label) -[r]- (m) WHERE n.prop IS NOT NULL OR true RETURN n
auto *query = QUERY(SINGLE_QUERY(
MATCH(PATTERN(NODE("n", "label"), EDGE("r"), NODE("m"))),
WHERE(OR(NOT(IS_NULL(PROPERTY_LOOKUP("n", prop))), LITERAL(true))),
RETURN("n")));
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
// We expect ScanAllBy > Filter because of the "or true" condition.
CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(),
ExpectExpand(), ExpectProduce());
}
{
// Test MATCH (n :label) -[r]- (m)
// WHERE n.prop IS NOT NULL AND n.x = 2 RETURN n
auto prop_x = PROPERTY_PAIR("x");
auto *query = QUERY(
SINGLE_QUERY(MATCH(PATTERN(NODE("n", "label"), EDGE("r"), NODE("m"))),
WHERE(AND(NOT(IS_NULL(PROPERTY_LOOKUP("n", prop))),
EQ(PROPERTY_LOOKUP("n", prop_x), LITERAL(2)))),
RETURN("n")));
auto symbol_table = query::MakeSymbolTable(query);
auto planner = MakePlanner<TypeParam>(&dba, storage, symbol_table, query);
// We expect ScanAllByLabelProperty > Filter
// to come instead of ScanAll > Filter.
CheckPlan(planner.plan(), symbol_table,
ExpectScanAllByLabelProperty(label, prop), ExpectFilter(),
ExpectExpand(), ExpectProduce());
}
}
TYPED_TEST(TestPlanner, MultiMatchWhere) {
// Test MATCH (n) -[r]- (m) MATCH (l) WHERE n.prop < 42 RETURN n
FakeDbAccessor dba;

View File

@ -51,6 +51,7 @@ class PlanChecker : public virtual HierarchicalLogicalOperatorVisitor {
PRE_VISIT(ScanAllByLabel);
PRE_VISIT(ScanAllByLabelPropertyValue);
PRE_VISIT(ScanAllByLabelPropertyRange);
PRE_VISIT(ScanAllByLabelProperty);
PRE_VISIT(ScanAllById);
PRE_VISIT(Expand);
PRE_VISIT(ExpandVariable);
@ -309,6 +310,26 @@ class ExpectScanAllByLabelPropertyRange
std::optional<ScanAllByLabelPropertyRange::Bound> upper_bound_;
};
class ExpectScanAllByLabelProperty : public OpChecker<ScanAllByLabelProperty> {
public:
ExpectScanAllByLabelProperty(
storage::LabelId label,
const std::pair<std::string, storage::PropertyId> &prop_pair)
: label_(label), property_(prop_pair.second) {}
void ExpectOp(ScanAllByLabelProperty &scan_all,
const SymbolTable &) override {
EXPECT_EQ(scan_all.label_, label_);
EXPECT_EQ(scan_all.property_, property_);
}
private:
storage::LabelId label_;
storage::PropertyId property_;
};
class ExpectCartesian : public OpChecker<Cartesian> {
public:
ExpectCartesian(const std::list<std::unique_ptr<BaseOpChecker>> &left,