Optimize vertex lookup by ID
Reviewers: mferencevic, llugovic Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2609
This commit is contained in:
parent
591eadad20
commit
d910813955
@ -1109,7 +1109,7 @@ NameToFunction(const std::string &function_name) {
|
||||
if (function_name == "OUTDEGREE") return OutDegree;
|
||||
if (function_name == "ENDNODE") return EndNode;
|
||||
if (function_name == "HEAD") return Head;
|
||||
if (function_name == "ID") return Id;
|
||||
if (function_name == kId) return Id;
|
||||
if (function_name == "LAST") return Last;
|
||||
if (function_name == "PROPERTIES") return Properties;
|
||||
if (function_name == "SIZE") return Size;
|
||||
|
@ -17,6 +17,7 @@ namespace {
|
||||
const char kStartsWith[] = "STARTSWITH";
|
||||
const char kEndsWith[] = "ENDSWITH";
|
||||
const char kContains[] = "CONTAINS";
|
||||
const char kId[] = "ID";
|
||||
} // namespace
|
||||
|
||||
struct FunctionContext {
|
||||
|
@ -134,6 +134,8 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Cost estimate ScanAllById?
|
||||
|
||||
// For the given op first increments the cardinality and then cost.
|
||||
#define POST_VISIT_CARD_FIRST(NAME) \
|
||||
bool PostVisit(NAME &) override { \
|
||||
@ -182,8 +184,6 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
bool Visit(Once &) override { return true; }
|
||||
|
||||
// TODO: Cost estimate PullRemote and ProduceRemote?
|
||||
|
||||
auto cost() const { return cost_; }
|
||||
auto cardinality() const { return cardinality_; }
|
||||
|
||||
|
@ -505,6 +505,34 @@ UniqueCursorPtr ScanAllByLabelPropertyValue::MakeCursor(
|
||||
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
|
||||
}
|
||||
|
||||
ScanAllById::ScanAllById(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol output_symbol, Expression *expression,
|
||||
storage::View view)
|
||||
: ScanAll(input, output_symbol, view), expression_(expression) {
|
||||
CHECK(expression);
|
||||
}
|
||||
|
||||
ACCEPT_WITH_INPUT(ScanAllById)
|
||||
|
||||
UniqueCursorPtr ScanAllById::MakeCursor(utils::MemoryResource *mem) const {
|
||||
auto vertices = [this](Frame &frame, ExecutionContext &context)
|
||||
-> std::optional<std::vector<VertexAccessor>> {
|
||||
auto *db = context.db_accessor;
|
||||
ExpressionEvaluator evaluator(&frame, context.symbol_table,
|
||||
context.evaluation_context,
|
||||
context.db_accessor, view_);
|
||||
auto value = expression_->Accept(evaluator);
|
||||
if (!value.IsNumeric()) return std::nullopt;
|
||||
int64_t id = value.IsInt() ? value.ValueInt() : value.ValueDouble();
|
||||
if (value.IsDouble() && id != value.ValueDouble()) return std::nullopt;
|
||||
auto maybe_vertex = db->FindVertex(storage::Gid::FromInt(id), view_);
|
||||
if (!maybe_vertex) return std::nullopt;
|
||||
return std::vector<VertexAccessor>{*maybe_vertex};
|
||||
};
|
||||
return MakeUniqueCursorPtr<ScanAllCursor<decltype(vertices)>>(
|
||||
mem, output_symbol_, input_->MakeCursor(mem), std::move(vertices));
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool CheckExistingNode(const VertexAccessor &new_node,
|
||||
const Symbol &existing_node_sym, Frame &frame) {
|
||||
|
@ -91,6 +91,7 @@ class ScanAll;
|
||||
class ScanAllByLabel;
|
||||
class ScanAllByLabelPropertyRange;
|
||||
class ScanAllByLabelPropertyValue;
|
||||
class ScanAllById;
|
||||
class Expand;
|
||||
class ExpandVariable;
|
||||
class ConstructNamedPath;
|
||||
@ -118,9 +119,9 @@ class CallProcedure;
|
||||
|
||||
using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor<
|
||||
Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel,
|
||||
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue, Expand,
|
||||
ExpandVariable, ConstructNamedPath, Filter, Produce, Delete, SetProperty,
|
||||
SetProperties, SetLabels, RemoveProperty, RemoveLabels,
|
||||
ScanAllByLabelPropertyRange, ScanAllByLabelPropertyValue, ScanAllById,
|
||||
Expand, ExpandVariable, ConstructNamedPath, Filter, Produce, Delete,
|
||||
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels,
|
||||
EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, OrderBy, Merge,
|
||||
Optional, Unwind, Distinct, Union, Cartesian, CallProcedure>;
|
||||
|
||||
@ -732,6 +733,25 @@ property value.
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class scan-all-by-id (scan-all)
|
||||
((expression "Expression *" :scope :public
|
||||
:slk-save #'slk-save-ast-pointer
|
||||
:slk-load (slk-load-ast-pointer "Expression")))
|
||||
(:documentation
|
||||
"ScanAll producing a single node with ID equal to evaluated expression")
|
||||
(:public
|
||||
#>cpp
|
||||
ScanAllById() {}
|
||||
ScanAllById(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol output_symbol, Expression *expression,
|
||||
storage::View view = storage::View::OLD);
|
||||
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-struct expand-common ()
|
||||
(
|
||||
;; info on what's getting expanded
|
||||
|
@ -180,6 +180,15 @@ PropertyFilter::PropertyFilter(
|
||||
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
|
||||
}
|
||||
|
||||
IdFilter::IdFilter(const SymbolTable &symbol_table, const Symbol &symbol,
|
||||
Expression *value)
|
||||
: symbol_(symbol), value_(value) {
|
||||
CHECK(value);
|
||||
UsedSymbolsCollector collector(symbol_table);
|
||||
value->Accept(collector);
|
||||
is_symbol_in_value_ = utils::Contains(collector.symbols_, symbol);
|
||||
}
|
||||
|
||||
void Filters::EraseFilter(const FilterInfo &filter) {
|
||||
// TODO: Ideally, we want to determine the equality of both expression trees,
|
||||
// instead of a simple pointer compare.
|
||||
@ -406,6 +415,20 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
}
|
||||
return is_prop_filter;
|
||||
};
|
||||
// Check if maybe_id_fun is ID invocation on an indentifier and add it as
|
||||
// IdFilter.
|
||||
auto add_id_equal = [&](auto *maybe_id_fun, auto *val_expr) -> bool {
|
||||
auto *id_fun = utils::Downcast<Function>(maybe_id_fun);
|
||||
if (!id_fun) return false;
|
||||
if (id_fun->function_name_ != kId) return false;
|
||||
if (id_fun->arguments_.size() != 1U) return false;
|
||||
auto *ident = utils::Downcast<Identifier>(id_fun->arguments_.front());
|
||||
if (!ident) return false;
|
||||
auto filter = make_filter(FilterInfo::Type::Id);
|
||||
filter.id_filter.emplace(symbol_table, symbol_table.at(*ident), val_expr);
|
||||
all_filters_.emplace_back(filter);
|
||||
return true;
|
||||
};
|
||||
// We are only interested to see the insides of And, because Or prevents
|
||||
// indexing since any labels and properties found there may be optional.
|
||||
DCHECK(!utils::IsSubtype(*expr, AndOperator::kType))
|
||||
@ -435,8 +458,11 @@ void Filters::AnalyzeAndStoreFilter(Expression *expr,
|
||||
bool is_prop_filter = add_prop_equal(eq->expression1_, eq->expression2_);
|
||||
// And reversed.
|
||||
is_prop_filter |= add_prop_equal(eq->expression2_, eq->expression1_);
|
||||
if (!is_prop_filter) {
|
||||
// No PropertyFilter was added, so just store a generic filter.
|
||||
// Try to get ID equality filter.
|
||||
bool is_id_filter = add_id_equal(eq->expression1_, eq->expression2_);
|
||||
is_id_filter |= add_id_equal(eq->expression2_, eq->expression1_);
|
||||
if (!is_prop_filter && !is_id_filter) {
|
||||
// No special filter was added, so just store a generic filter.
|
||||
all_filters_.emplace_back(make_filter(FilterInfo::Type::Generic));
|
||||
}
|
||||
} else if (auto *regex_match = utils::Downcast<RegexMatch>(expr)) {
|
||||
|
@ -105,16 +105,30 @@ class PropertyFilter {
|
||||
std::optional<Bound> upper_bound_{};
|
||||
};
|
||||
|
||||
/// Filtering by ID, for example `MATCH (n) WHERE id(n) = 42 ...`
|
||||
class IdFilter {
|
||||
public:
|
||||
/// Construct with Expression being the required value for ID.
|
||||
IdFilter(const SymbolTable &, const Symbol &, Expression *);
|
||||
|
||||
/// Symbol whose id is looked up.
|
||||
Symbol symbol_;
|
||||
/// Expression which when evaluted produces the value an ID must satisfy.
|
||||
Expression *value_;
|
||||
/// True if the same symbol is used in expressions for value.
|
||||
bool is_symbol_in_value_{false};
|
||||
};
|
||||
|
||||
/// Stores additional information for a filter expression.
|
||||
struct FilterInfo {
|
||||
/// A FilterInfo can be a generic filter expression or a specific filtering
|
||||
/// applied for labels or a property. Non generic types contain extra
|
||||
/// information which can be used to produce indexed scans of graph
|
||||
/// elements.
|
||||
enum class Type { Generic, Label, Property };
|
||||
enum class Type { Generic, Label, Property, Id };
|
||||
|
||||
Type type;
|
||||
/// The filter expression which must be satisfied.
|
||||
/// The original filter expression which must be satisfied.
|
||||
Expression *expression;
|
||||
/// Set of used symbols by the filter @c expression.
|
||||
std::unordered_set<Symbol> used_symbols;
|
||||
@ -122,6 +136,8 @@ struct FilterInfo {
|
||||
std::vector<LabelIx> labels;
|
||||
/// Property information for Type::Property filtering.
|
||||
std::optional<PropertyFilter> property_filter;
|
||||
/// Information for Type::Id filtering.
|
||||
std::optional<IdFilter> id_filter;
|
||||
};
|
||||
|
||||
/// Stores information on filters used inside the @c Matching of a @c QueryPart.
|
||||
@ -185,6 +201,18 @@ class Filters final {
|
||||
return filters;
|
||||
}
|
||||
|
||||
/// Return a vector of FilterInfo for ID equality filtering.
|
||||
auto IdFilters(const Symbol &symbol) const {
|
||||
std::vector<FilterInfo> filters;
|
||||
for (const auto &filter : all_filters_) {
|
||||
if (filter.type == FilterInfo::Type::Id &&
|
||||
filter.id_filter->symbol_ == symbol) {
|
||||
filters.push_back(filter);
|
||||
}
|
||||
}
|
||||
return filters;
|
||||
}
|
||||
|
||||
/// Collects filtering information from a pattern.
|
||||
///
|
||||
/// Goes through all the atoms in a pattern and generates filter expressions
|
||||
|
@ -70,6 +70,14 @@ bool PlanPrinter::PreVisit(query::plan::ScanAllByLabelPropertyRange &op) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PlanPrinter::PreVisit(ScanAllById &op) {
|
||||
WithPrintLn([&](auto &out) {
|
||||
out << "* ScanAllById"
|
||||
<< " (" << op.output_symbol_.name() << ")";
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PlanPrinter::PreVisit(query::plan::Expand &op) {
|
||||
WithPrintLn([&](auto &out) {
|
||||
*out_ << "* Expand (" << op.input_symbol_.name() << ")"
|
||||
@ -433,6 +441,16 @@ bool PlanToJsonVisitor::PreVisit(ScanAllByLabelPropertyValue &op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PlanToJsonVisitor::PreVisit(ScanAllById &op) {
|
||||
json self;
|
||||
self["name"] = "ScanAllById";
|
||||
self["output_symbol"] = ToJson(op.output_symbol_);
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PlanToJsonVisitor::PreVisit(CreateNode &op) {
|
||||
json self;
|
||||
self["name"] = "CreateNode";
|
||||
|
@ -58,6 +58,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor {
|
||||
bool PreVisit(ScanAllByLabel &) override;
|
||||
bool PreVisit(ScanAllByLabelPropertyValue &) override;
|
||||
bool PreVisit(ScanAllByLabelPropertyRange &) override;
|
||||
bool PreVisit(ScanAllById &) override;
|
||||
|
||||
bool PreVisit(Expand &) override;
|
||||
bool PreVisit(ExpandVariable &) override;
|
||||
@ -181,6 +182,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor {
|
||||
bool PreVisit(ScanAllByLabel &) override;
|
||||
bool PreVisit(ScanAllByLabelPropertyRange &) override;
|
||||
bool PreVisit(ScanAllByLabelPropertyValue &) override;
|
||||
bool PreVisit(ScanAllById &) override;
|
||||
|
||||
bool PreVisit(Produce &) override;
|
||||
bool PreVisit(Accumulate &) override;
|
||||
|
@ -226,6 +226,15 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PreVisit(ScanAllById &op) override {
|
||||
prev_ops_.push_back(&op);
|
||||
return true;
|
||||
}
|
||||
bool PostVisit(ScanAllById &) override {
|
||||
prev_ops_.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PreVisit(ExpandVariable &op) override {
|
||||
prev_ops_.push_back(&op);
|
||||
return true;
|
||||
@ -394,7 +403,10 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
const SymbolTable *symbol_table_;
|
||||
AstStorage *ast_storage_;
|
||||
TDbAccessor *db_;
|
||||
// Collected filters, pending for examination if they can be used for advanced
|
||||
// lookup operations (by index, node ID, ...).
|
||||
Filters filters_;
|
||||
// Expressions which no longer need a plain Filter operator.
|
||||
std::unordered_set<Expression *> filter_exprs_for_removal_;
|
||||
std::vector<LogicalOperator *> prev_ops_;
|
||||
|
||||
@ -518,16 +530,36 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
const auto &input = scan.input();
|
||||
const auto &node_symbol = scan.output_symbol_;
|
||||
const auto &view = scan.view_;
|
||||
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
|
||||
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(),
|
||||
modified_symbols.end());
|
||||
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
||||
for (const auto &used_symbol : used_symbols) {
|
||||
if (!utils::Contains(bound_symbols, used_symbol)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
// First, try to see if we can find a vertex by ID.
|
||||
if (!max_vertex_count || *max_vertex_count >= 1) {
|
||||
for (const auto &filter : filters_.IdFilters(node_symbol)) {
|
||||
if (filter.id_filter->is_symbol_in_value_ ||
|
||||
!are_bound(filter.used_symbols))
|
||||
continue;
|
||||
auto *value = filter.id_filter->value_;
|
||||
filter_exprs_for_removal_.insert(filter.expression);
|
||||
filters_.EraseFilter(filter);
|
||||
return std::make_unique<ScanAllById>(input, node_symbol, value, view);
|
||||
}
|
||||
}
|
||||
// Now try to see if we can use label+property index. If not, try to use
|
||||
// just the label index.
|
||||
const auto labels = filters_.FilteredLabels(node_symbol);
|
||||
if (labels.empty()) {
|
||||
// Without labels, we cannot generate any indexed ScanAll.
|
||||
return nullptr;
|
||||
}
|
||||
// First, try to see if we can use label+property index. If not, use just
|
||||
// the label index (which ought to exist).
|
||||
const auto &modified_symbols = scan.ModifiedSymbols(*symbol_table_);
|
||||
std::unordered_set<Symbol> bound_symbols(modified_symbols.begin(),
|
||||
modified_symbols.end());
|
||||
auto found_index = FindBestLabelPropertyIndex(node_symbol, bound_symbols);
|
||||
if (found_index &&
|
||||
// Use label+property index if we satisfy max_vertex_count.
|
||||
|
@ -1530,4 +1530,23 @@ TYPED_TEST(TestPlanner, CallProcedureAfterScanAll) {
|
||||
ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, ScanAllById) {
|
||||
// Test MATCH (n) WHERE id(n) = 42 RETURN n
|
||||
AstStorage storage;
|
||||
auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))),
|
||||
WHERE(EQ(FN("id", IDENT("n")), LITERAL(42))),
|
||||
RETURN("n")));
|
||||
CheckPlan<TypeParam>(query, storage, ExpectScanAllById(), ExpectProduce());
|
||||
}
|
||||
|
||||
TYPED_TEST(TestPlanner, ScanAllByIdExpandToExisting) {
|
||||
// Test MATCH (n)-[r]-(m) WHERE id(m) = 42 RETURN r
|
||||
AstStorage storage;
|
||||
auto *query = QUERY(
|
||||
SINGLE_QUERY(MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m"))),
|
||||
WHERE(EQ(FN("id", IDENT("m")), LITERAL(42))), RETURN("r")));
|
||||
CheckPlan<TypeParam>(query, storage, ExpectScanAll(), ExpectScanAllById(),
|
||||
ExpectExpand(), ExpectProduce());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -51,6 +51,7 @@ class PlanChecker : public virtual HierarchicalLogicalOperatorVisitor {
|
||||
PRE_VISIT(ScanAllByLabel);
|
||||
PRE_VISIT(ScanAllByLabelPropertyValue);
|
||||
PRE_VISIT(ScanAllByLabelPropertyRange);
|
||||
PRE_VISIT(ScanAllById);
|
||||
PRE_VISIT(Expand);
|
||||
PRE_VISIT(ExpandVariable);
|
||||
PRE_VISIT(Filter);
|
||||
@ -123,6 +124,7 @@ using ExpectCreateExpand = OpChecker<CreateExpand>;
|
||||
using ExpectDelete = OpChecker<Delete>;
|
||||
using ExpectScanAll = OpChecker<ScanAll>;
|
||||
using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>;
|
||||
using ExpectScanAllById = OpChecker<ScanAllById>;
|
||||
using ExpectExpand = OpChecker<Expand>;
|
||||
using ExpectFilter = OpChecker<Filter>;
|
||||
using ExpectConstructNamedPath = OpChecker<ConstructNamedPath>;
|
||||
|
Loading…
Reference in New Issue
Block a user