Optimize vertex lookup by ID

Reviewers: mferencevic, llugovic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2609
This commit is contained in:
Teon Banek 2019-12-23 12:31:35 +01:00
parent 591eadad20
commit d910813955
12 changed files with 191 additions and 15 deletions

View File

@ -1109,7 +1109,7 @@ NameToFunction(const std::string &function_name) {
if (function_name == "OUTDEGREE") return OutDegree;
if (function_name == "ENDNODE") return EndNode;
if (function_name == "HEAD") return Head;
if (function_name == "ID") return Id;
if (function_name == kId) return Id;
if (function_name == "LAST") return Last;
if (function_name == "PROPERTIES") return Properties;
if (function_name == "SIZE") return Size;

View File

@ -17,6 +17,7 @@ namespace {
const char kStartsWith[] = "STARTSWITH";
const char kEndsWith[] = "ENDSWITH";
const char kContains[] = "CONTAINS";
const char kId[] = "ID";
} // namespace
struct FunctionContext {

View File

@ -134,6 +134,8 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
return true;
}
// TODO: Cost estimate ScanAllById?
// For the given op first increments the cardinality and then cost.
#define POST_VISIT_CARD_FIRST(NAME) \
bool PostVisit(NAME &) override { \
@ -182,8 +184,6 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
bool Visit(Once &) override { return true; }
// TODO: Cost estimate PullRemote and ProduceRemote?
auto cost() const { return cost_; }
auto cardinality() const { return cardinality_; }

View File

@ -505,6 +505,34 @@ UniqueCursorPtr ScanAllByLabelPropertyValue::MakeCursor(
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
}
ScanAllById::ScanAllById(const std::shared_ptr<LogicalOperator> &input,
Symbol output_symbol, Expression *expression,
storage::View view)
: ScanAll(input, output_symbol, view), expression_(expression) {
CHECK(expression);
}
ACCEPT_WITH_INPUT(ScanAllById)
UniqueCursorPtr ScanAllById::MakeCursor(utils::MemoryResource *mem) const {
auto vertices = [this](Frame &frame, ExecutionContext &context)
-> std::optional<std::vector<VertexAccessor>> {
auto *db = context.db_accessor;
ExpressionEvaluator evaluator(&frame, context.symbol_table,
context.evaluation_context,
context.db_accessor, view_);
auto value = expression_->Accept(evaluator);
if (!value.IsNumeric()) return std::nullopt;
int64_t id = value.IsInt() ? value.ValueInt() : value.ValueDouble();
if (value.IsDouble() && id != value.ValueDouble()) return std::nullopt;
auto maybe_vertex = db->FindVertex(storage::Gid::FromInt(id), view_);
if (!maybe_vertex) return std::nullopt;
return std::vector<VertexAccessor>{*maybe_vertex};
};
return MakeUniqueCursorPtr<ScanAllCursor<decltype(vertices)>>(
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
}
namespace {
bool CheckExistingNode(const VertexAccessor &new_node,
const Symbol &existing_node_sym, Frame &frame) {

View File

@ -91,6 +91,7 @@ class ScanAll;
class ScanAllByLabel;
class ScanAllByLabelPropertyRange;
class ScanAllByLabelPropertyValue;
class ScanAllById;
class Expand;
class ExpandVariable;
class ConstructNamedPath;
@ -118,9 +119,9 @@ class CallProcedure;
using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor<
Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel,
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue, Expand,
ExpandVariable, ConstructNamedPath, Filter, Produce, Delete, SetProperty,
SetProperties, SetLabels, RemoveProperty, RemoveLabels,
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue, ScanAllById,
Expand, ExpandVariable, ConstructNamedPath, Filter, Produce, Delete,
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels,
EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, OrderBy, Merge,
Optional, Unwind, Distinct, Union, Cartesian, CallProcedure>;
@ -732,6 +733,25 @@ property value.
(:serialize (:slk))
(:clone))
(lcp:define-class scan-all-by-id (scan-all)
((expression "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")))
(:documentation
"ScanAll producing a single node with ID equal to evaluated expression")
(:public
#>cpp
ScanAllById() {}
ScanAllById(const std::shared_ptr<LogicalOperator> &input,
Symbol output_symbol, Expression *expression,
storage::View view = storage::View::OLD);
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
cpp<#)
(:serialize (:slk))
(:clone))
(lcp:define-struct expand-common ()
(
;; info on what's getting expanded

View File

@ -180,6 +180,15 @@ PropertyFilter::PropertyFilter(
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
IdFilter::IdFilter(const SymbolTable &symbol_table, const Symbol &symbol,
Expression *value)
: symbol_(symbol), value_(value) {
CHECK(value);
UsedSymbolsCollector collector(symbol_table);
value->Accept(collector);
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
}
void Filters::EraseFilter(const FilterInfo &filter) {
// TODO: Ideally, we want to determine the equality of both expression trees,
// instead of a simple pointer compare.
@ -406,6 +415,20 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
}
return is_prop_filter;
};
// Check if maybe_id_fun is ID invocation on an indentifier and add it as
// IdFilter.
auto add_id_equal = [&](auto *maybe_id_fun, auto *val_expr) -> bool {
auto *id_fun = utils::Downcast<Function>(maybe_id_fun);
if (!id_fun) return false;
if (id_fun->function_name_ != kId) return false;
if (id_fun->arguments_.size() != 1U) return false;
auto *ident = utils::Downcast<Identifier>(id_fun->arguments_.front());
if (!ident) return false;
auto filter = make_filter(FilterInfo::Type::Id);
filter.id_filter.emplace(symbol_table, symbol_table.at(*ident), val_expr);
all_filters_.emplace_back(filter);
return true;
};
// We are only interested to see the insides of And, because Or prevents
// indexing since any labels and properties found there may be optional.
DCHECK(!utils::IsSubtype(*expr, AndOperator::kType))
@ -435,8 +458,11 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
bool is_prop_filter = add_prop_equal(eq->expression1_, eq->expression2_);
// And reversed.
is_prop_filter |= add_prop_equal(eq->expression2_, eq->expression1_);
if (!is_prop_filter) {
// No PropertyFilter was added, so just store a generic filter.
// Try to get ID equality filter.
bool is_id_filter = add_id_equal(eq->expression1_, eq->expression2_);
is_id_filter |= add_id_equal(eq->expression2_, eq->expression1_);
if (!is_prop_filter && !is_id_filter) {
// No special filter was added, so just store a generic filter.
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
}
} else if (auto *regex_match = utils::Downcast<RegexMatch>(expr)) {

View File

@ -105,16 +105,30 @@ class PropertyFilter {
std::optional<Bound> upper_bound_{};
};
/// Filtering by ID, for example `MATCH (n) WHERE id(n) = 42 ...`
class IdFilter {
public:
/// Construct with Expression being the required value for ID.
IdFilter(const SymbolTable &, const Symbol &, Expression *);
/// Symbol whose id is looked up.
Symbol symbol_;
/// Expression which when evaluted produces the value an ID must satisfy.
Expression *value_;
/// True if the same symbol is used in expressions for value.
bool is_symbol_in_value_{false};
};
/// Stores additional information for a filter expression.
struct FilterInfo {
/// A FilterInfo can be a generic filter expression or a specific filtering
/// applied for labels or a property. Non generic types contain extra
/// information which can be used to produce indexed scans of graph
/// elements.
enum class Type { Generic, Label, Property };
enum class Type { Generic, Label, Property, Id };
Type type;
/// The filter expression which must be satisfied.
/// The original filter expression which must be satisfied.
Expression *expression;
/// Set of used symbols by the filter @c expression.
std::unordered_set<Symbol> used_symbols;
@ -122,6 +136,8 @@ struct FilterInfo {
std::vector<LabelIx> labels;
/// Property information for Type::Property filtering.
std::optional<PropertyFilter> property_filter;
/// Information for Type::Id filtering.
std::optional<IdFilter> id_filter;
};
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
@ -185,6 +201,18 @@ class Filters final {
return filters;
}
/// Return a vector of FilterInfo for ID equality filtering.
auto IdFilters(const Symbol &symbol) const {
std::vector<FilterInfo> filters;
for (const auto &filter : all_filters_) {
if (filter.type == FilterInfo::Type::Id &&
filter.id_filter->symbol_ == symbol) {
filters.push_back(filter);
}
}
return filters;
}
/// Collects filtering information from a pattern.
///
/// Goes through all the atoms in a pattern and generates filter expressions

View File

@ -70,6 +70,14 @@ bool PlanPrinter::PreVisit(query::plan::ScanAllByLabelPropertyRange &op) {
return true;
}
bool PlanPrinter::PreVisit(ScanAllById &op) {
WithPrintLn([&](auto &out) {
out << "* ScanAllById"
<< " (" << op.output_symbol_.name() << ")";
});
return true;
}
bool PlanPrinter::PreVisit(query::plan::Expand &op) {
WithPrintLn([&](auto &out) {
*out_ << "* Expand (" << op.input_symbol_.name() << ")"
@ -433,6 +441,16 @@ bool PlanToJsonVisitor::PreVisit(ScanAllByLabelPropertyValue &op) {
return false;
}
bool PlanToJsonVisitor::PreVisit(ScanAllById &op) {
json self;
self["name"] = "ScanAllById";
self["output_symbol"] = ToJson(op.output_symbol_);
op.input_->Accept(*this);
self["input"] = PopOutput();
output_ = std::move(self);
return false;
}
bool PlanToJsonVisitor::PreVisit(CreateNode &op) {
json self;
self["name"] = "CreateNode";

View File

@ -58,6 +58,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor {
bool PreVisit(ScanAllByLabel &) override;
bool PreVisit(ScanAllByLabelPropertyValue &) override;
bool PreVisit(ScanAllByLabelPropertyRange &) override;
bool PreVisit(ScanAllById &) override;
bool PreVisit(Expand &) override;
bool PreVisit(ExpandVariable &) override;
@ -181,6 +182,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor {
bool PreVisit(ScanAllByLabel &) override;
bool PreVisit(ScanAllByLabelPropertyRange &) override;
bool PreVisit(ScanAllByLabelPropertyValue &) override;
bool PreVisit(ScanAllById &) override;
bool PreVisit(Produce &) override;
bool PreVisit(Accumulate &) override;

View File

@ -226,6 +226,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
return true;
}
bool PreVisit(ScanAllById &op) override {
prev_ops_.push_back(&op);
return true;
}
bool PostVisit(ScanAllById &) override {
prev_ops_.pop_back();
return true;
}
bool PreVisit(ExpandVariable &op) override {
prev_ops_.push_back(&op);
return true;
@ -394,7 +403,10 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
const SymbolTable *symbol_table_;
AstStorage *ast_storage_;
TDbAccessor *db_;
// Collected filters, pending for examination if they can be used for advanced
// lookup operations (by index, node ID, ...).
Filters filters_;
// Expressions which no longer need a plain Filter operator.
std::unordered_set<Expression *> filter_exprs_for_removal_;
std::vector<LogicalOperator *> prev_ops_;
@ -518,16 +530,36 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
const auto &input = scan.input();
const auto &node_symbol = scan.output_symbol_;
const auto &view = scan.view_;
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(),
modified_symbols.end());
auto are_bound = [&bound_symbols](const auto &used_symbols) {
for (const auto &used_symbol : used_symbols) {
if (!utils::Contains(bound_symbols, used_symbol)) {
return false;
}
}
return true;
};
// First, try to see if we can find a vertex by ID.
if (!max_vertex_count || *max_vertex_count >= 1) {
for (const auto &filter : filters_.IdFilters(node_symbol)) {
if (filter.id_filter->is_symbol_in_value_ ||
!are_bound(filter.used_symbols))
continue;
auto *value = filter.id_filter->value_;
filter_exprs_for_removal_.insert(filter.expression);
filters_.EraseFilter(filter);
return std::make_unique<ScanAllById>(input, node_symbol, value, view);
}
}
// Now try to see if we can use label+property index. If not, try to use
// just the label index.
const auto labels = filters_.FilteredLabels(node_symbol);
if (labels.empty()) {
// Without labels, we cannot generate any indexed ScanAll.
return nullptr;
}
// First, try to see if we can use label+property index. If not, use just
// the label index (which ought to exist).
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(),
modified_symbols.end());
auto found_index = FindBestLabelPropertyIndex(node_symbol, bound_symbols);
if (found_index &&
// Use label+property index if we satisfy max_vertex_count.

View File

@ -1530,4 +1530,23 @@ TYPED_TEST(TestPlanner, CallProcedureAfterScanAll) {
ExpectProduce());
}
TYPED_TEST(TestPlanner, ScanAllById) {
// Test MATCH (n) WHERE id(n) = 42 RETURN n
AstStorage storage;
auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))),
WHERE(EQ(FN("id", IDENT("n")), LITERAL(42))),
RETURN("n")));
CheckPlan<TypeParam>(query, storage, ExpectScanAllById(), ExpectProduce());
}
TYPED_TEST(TestPlanner, ScanAllByIdExpandToExisting) {
// Test MATCH (n)-[r]-(m) WHERE id(m) = 42 RETURN r
AstStorage storage;
auto *query = QUERY(
SINGLE_QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m"))),
WHERE(EQ(FN("id", IDENT("m")), LITERAL(42))), RETURN("r")));
CheckPlan<TypeParam>(query, storage, ExpectScanAll(), ExpectScanAllById(),
ExpectExpand(), ExpectProduce());
}
} // namespace

View File

@ -51,6 +51,7 @@ class PlanChecker : public virtual HierarchicalLogicalOperatorVisitor {
PRE_VISIT(ScanAllByLabel);
PRE_VISIT(ScanAllByLabelPropertyValue);
PRE_VISIT(ScanAllByLabelPropertyRange);
PRE_VISIT(ScanAllById);
PRE_VISIT(Expand);
PRE_VISIT(ExpandVariable);
PRE_VISIT(Filter);
@ -123,6 +124,7 @@ using ExpectCreateExpand = OpChecker<CreateExpand>;
using ExpectDelete = OpChecker<Delete>;
using ExpectScanAll = OpChecker<ScanAll>;
using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>;
using ExpectScanAllById = OpChecker<ScanAllById>;
using ExpectExpand = OpChecker<Expand>;
using ExpectFilter = OpChecker<Filter>;
using ExpectConstructNamedPath = OpChecker<ConstructNamedPath>;