Extend Cypher queries with the index hinting feature (#1345)
This commit is contained in:
parent
a94588bde3
commit
989bb97514
2
.gitignore
vendored
2
.gitignore
vendored
@ -60,3 +60,5 @@ src/storage/distributed/rpc/concurrent_id_mapper_rpc_messages.hpp
|
||||
src/transactions/distributed/engine_rpc_messages.hpp
|
||||
/tests/manual/js/transaction_timeout/package-lock.json
|
||||
/tests/manual/js/transaction_timeout/node_modules/
|
||||
.vscode/
|
||||
src/query/frontend/opencypher/grammar/.antlr/*
|
||||
|
@ -2069,6 +2069,27 @@ class Query : public memgraph::query::Tree, public utils::Visitable<QueryVisitor
|
||||
friend class AstStorage;
|
||||
};
|
||||
|
||||
struct IndexHint {
|
||||
static const utils::TypeInfo kType;
|
||||
const utils::TypeInfo &GetTypeInfo() const { return kType; }
|
||||
|
||||
enum class IndexType { LABEL, LABEL_PROPERTY };
|
||||
|
||||
memgraph::query::IndexHint::IndexType index_type_;
|
||||
memgraph::query::LabelIx label_;
|
||||
std::optional<memgraph::query::PropertyIx> property_{std::nullopt};
|
||||
|
||||
IndexHint Clone(AstStorage *storage) const {
|
||||
IndexHint object;
|
||||
object.index_type_ = index_type_;
|
||||
object.label_ = storage->GetLabelIx(label_.name);
|
||||
if (property_) {
|
||||
object.property_ = storage->GetPropertyIx(property_->name);
|
||||
}
|
||||
return object;
|
||||
}
|
||||
};
|
||||
|
||||
class CypherQuery : public memgraph::query::Query, public utils::Visitable<HierarchicalTreeVisitor> {
|
||||
public:
|
||||
static const utils::TypeInfo kType;
|
||||
@ -2093,6 +2114,11 @@ class CypherQuery : public memgraph::query::Query, public utils::Visitable<Hiera
|
||||
memgraph::query::SingleQuery *single_query_{nullptr};
|
||||
/// Contains remaining queries that should form and union with `single_query_`.
|
||||
std::vector<memgraph::query::CypherUnion *> cypher_unions_;
|
||||
/// Index hint
|
||||
/// Suggestion: If we’re going to have multiple pre-query directives (not only index_hints_), they need to be
|
||||
/// contained within a dedicated class/struct
|
||||
std::vector<memgraph::query::IndexHint> index_hints_;
|
||||
/// Memory limit
|
||||
memgraph::query::Expression *memory_limit_{nullptr};
|
||||
size_t memory_scale_{1024U};
|
||||
|
||||
@ -2103,6 +2129,10 @@ class CypherQuery : public memgraph::query::Query, public utils::Visitable<Hiera
|
||||
for (auto i5 = 0; i5 < cypher_unions_.size(); ++i5) {
|
||||
object->cypher_unions_[i5] = cypher_unions_[i5] ? cypher_unions_[i5]->Clone(storage) : nullptr;
|
||||
}
|
||||
object->index_hints_.resize(index_hints_.size());
|
||||
for (auto i6 = 0; i6 < index_hints_.size(); ++i6) {
|
||||
object->index_hints_[i6] = index_hints_[i6].Clone(storage);
|
||||
}
|
||||
object->memory_limit_ = memory_limit_ ? memory_limit_->Clone(storage) : nullptr;
|
||||
object->memory_scale_ = memory_scale_;
|
||||
return object;
|
||||
|
@ -202,6 +202,20 @@ antlrcpp::Any CypherMainVisitor::visitCypherQuery(MemgraphCypher::CypherQueryCon
|
||||
cypher_query->cypher_unions_.push_back(std::any_cast<CypherUnion *>(child->accept(this)));
|
||||
}
|
||||
|
||||
if (auto *index_hints_ctx = ctx->indexHints()) {
|
||||
for (auto *index_hint_ctx : index_hints_ctx->indexHint()) {
|
||||
auto label = AddLabel(std::any_cast<std::string>(index_hint_ctx->labelName()->accept(this)));
|
||||
if (!index_hint_ctx->propertyKeyName()) {
|
||||
cypher_query->index_hints_.emplace_back(IndexHint{.index_type_ = IndexHint::IndexType::LABEL, .label_ = label});
|
||||
continue;
|
||||
}
|
||||
cypher_query->index_hints_.emplace_back(
|
||||
IndexHint{.index_type_ = IndexHint::IndexType::LABEL_PROPERTY,
|
||||
.label_ = label,
|
||||
.property_ = std::any_cast<PropertyIx>(index_hint_ctx->propertyKeyName()->accept(this))});
|
||||
}
|
||||
}
|
||||
|
||||
if (auto *memory_limit_ctx = ctx->queryMemoryLimit()) {
|
||||
const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this);
|
||||
if (memory_limit_info) {
|
||||
|
@ -114,6 +114,7 @@ memgraphCypherKeyword : cypherKeyword
|
||||
| USE
|
||||
| USER
|
||||
| USERS
|
||||
| USING
|
||||
| VERSION
|
||||
| TERMINATE
|
||||
| TRANSACTIONS
|
||||
@ -151,6 +152,8 @@ query : cypherQuery
|
||||
| edgeImportModeQuery
|
||||
;
|
||||
|
||||
cypherQuery : ( indexHints )? singleQuery ( cypherUnion )* ( queryMemoryLimit )? ;
|
||||
|
||||
authQuery : createRole
|
||||
| dropRole
|
||||
| showRoles
|
||||
@ -209,6 +212,10 @@ updateClause : set
|
||||
|
||||
foreach : FOREACH '(' variable IN expression '|' updateClause+ ')' ;
|
||||
|
||||
indexHints: USING INDEX indexHint ( ',' indexHint )* ;
|
||||
|
||||
indexHint: ':' labelName ( '(' propertyKeyName ')' )? ;
|
||||
|
||||
callSubquery : CALL '{' cypherQuery '}' ;
|
||||
|
||||
streamQuery : checkStream
|
||||
@ -259,9 +266,9 @@ userOrRoleName : symbolicName ;
|
||||
|
||||
createRole : CREATE ROLE role=userOrRoleName ;
|
||||
|
||||
dropRole : DROP ROLE role=userOrRoleName ;
|
||||
dropRole : DROP ROLE role=userOrRoleName ;
|
||||
|
||||
showRoles : SHOW ROLES ;
|
||||
showRoles : SHOW ROLES ;
|
||||
|
||||
createUser : CREATE USER user=userOrRoleName
|
||||
( IDENTIFIED BY password=literal )? ;
|
||||
@ -347,11 +354,11 @@ showRoleForUser : SHOW ROLE FOR user=userOrRoleName ;
|
||||
|
||||
showUsersForRole : SHOW USERS FOR role=userOrRoleName ;
|
||||
|
||||
dumpQuery: DUMP DATABASE ;
|
||||
dumpQuery : DUMP DATABASE ;
|
||||
|
||||
analyzeGraphQuery: ANALYZE GRAPH ( ON LABELS ( listOfColonSymbolicNames | ASTERISK ) ) ? ( DELETE STATISTICS ) ? ;
|
||||
analyzeGraphQuery : ANALYZE GRAPH ( ON LABELS ( listOfColonSymbolicNames | ASTERISK ) ) ? ( DELETE STATISTICS ) ? ;
|
||||
|
||||
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
|
||||
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
|
||||
( WITH PORT port=literal ) ? ;
|
||||
|
||||
showReplicationRole : SHOW REPLICATION ROLE ;
|
||||
@ -365,7 +372,7 @@ registerReplica : REGISTER REPLICA replicaName ( SYNC | ASYNC )
|
||||
|
||||
dropReplica : DROP REPLICA replicaName ;
|
||||
|
||||
showReplicas : SHOW REPLICAS ;
|
||||
showReplicas : SHOW REPLICAS ;
|
||||
|
||||
lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY | DATA DIRECTORY LOCK STATUS;
|
||||
|
||||
@ -402,7 +409,7 @@ streamName : symbolicName ;
|
||||
|
||||
symbolicNameWithMinus : symbolicName ( MINUS symbolicName )* ;
|
||||
|
||||
symbolicNameWithDotsAndMinus: symbolicNameWithMinus ( DOT symbolicNameWithMinus )* ;
|
||||
symbolicNameWithDotsAndMinus : symbolicNameWithMinus ( DOT symbolicNameWithMinus )* ;
|
||||
|
||||
symbolicTopicNames : symbolicNameWithDotsAndMinus ( COMMA symbolicNameWithDotsAndMinus )* ;
|
||||
|
||||
@ -480,6 +487,6 @@ useDatabase : USE DATABASE databaseName ;
|
||||
|
||||
dropDatabase : DROP DATABASE databaseName ;
|
||||
|
||||
showDatabases: SHOW DATABASES ;
|
||||
showDatabases : SHOW DATABASES ;
|
||||
|
||||
edgeImportModeQuery : EDGE IMPORT MODE ( ACTIVE | INACTIVE ) ;
|
||||
|
@ -137,5 +137,6 @@ UPDATE : U P D A T E ;
|
||||
USE : U S E ;
|
||||
USER : U S E R ;
|
||||
USERS : U S E R S ;
|
||||
USING : U S I N G ;
|
||||
VERSION : V E R S I O N ;
|
||||
WEBSOCKET : W E B S O C K E T ;
|
||||
|
@ -1131,8 +1131,12 @@ class Filter : public memgraph::query::plan::LogicalOperator {
|
||||
static std::string SingleFilterName(const query::plan::FilterInfo &single_filter) {
|
||||
using Type = query::plan::FilterInfo::Type;
|
||||
if (single_filter.type == Type::Generic) {
|
||||
return fmt::format("Generic {{{}}}", utils::IterableToString(single_filter.used_symbols, ", ",
|
||||
[](const auto &symbol) { return symbol.name(); }));
|
||||
std::set<std::string> symbol_names;
|
||||
for (const auto &symbol : single_filter.used_symbols) {
|
||||
symbol_names.insert(symbol.name());
|
||||
}
|
||||
return fmt::format("Generic {{{}}}",
|
||||
utils::IterableToString(symbol_names, ", ", [](const auto &name) { return name; }));
|
||||
} else if (single_filter.type == Type::Id) {
|
||||
return fmt::format("id({})", single_filter.id_filter->symbol_.name());
|
||||
} else if (single_filter.type == Type::Label) {
|
||||
@ -1140,16 +1144,18 @@ class Filter : public memgraph::query::plan::LogicalOperator {
|
||||
LOG_FATAL("Label filters not using LabelsTest are not supported for query inspection!");
|
||||
}
|
||||
auto filter_expression = static_cast<LabelsTest *>(single_filter.expression);
|
||||
std::set<std::string> label_names;
|
||||
for (const auto &label : filter_expression->labels_) {
|
||||
label_names.insert(label.name);
|
||||
}
|
||||
|
||||
if (filter_expression->expression_->GetTypeInfo() != Identifier::kType) {
|
||||
return fmt::format("(:{})", utils::IterableToString(filter_expression->labels_, ":",
|
||||
[](const auto &label) { return label.name; }));
|
||||
return fmt::format("(:{})", utils::IterableToString(label_names, ":", [](const auto &name) { return name; }));
|
||||
}
|
||||
auto identifier_expression = static_cast<Identifier *>(filter_expression->expression_);
|
||||
|
||||
return fmt::format(
|
||||
"({} :{})", identifier_expression->name_,
|
||||
utils::IterableToString(filter_expression->labels_, ":", [](const auto &label) { return label.name; }));
|
||||
return fmt::format("({} :{})", identifier_expression->name_,
|
||||
utils::IterableToString(label_names, ":", [](const auto &name) { return name; }));
|
||||
} else if (single_filter.type == Type::Pattern) {
|
||||
return "Pattern";
|
||||
} else if (single_filter.type == Type::Property) {
|
||||
@ -1161,9 +1167,11 @@ class Filter : public memgraph::query::plan::LogicalOperator {
|
||||
}
|
||||
|
||||
std::string ToString() const override {
|
||||
return fmt::format("Filter {}", utils::IterableToString(all_filters_, ", ", [](const auto &single_filter) {
|
||||
return Filter::SingleFilterName(single_filter);
|
||||
}));
|
||||
std::set<std::string> filter_names;
|
||||
for (const auto &filter : all_filters_) {
|
||||
filter_names.insert(Filter::SingleFilterName(filter));
|
||||
}
|
||||
return fmt::format("Filter {}", utils::IterableToString(filter_names, ", ", [](const auto &name) { return name; }));
|
||||
}
|
||||
|
||||
std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override {
|
||||
|
@ -38,14 +38,20 @@ class PostProcessor final {
|
||||
Parameters parameters_;
|
||||
|
||||
public:
|
||||
IndexHints index_hints_{};
|
||||
|
||||
using ProcessedPlan = std::unique_ptr<LogicalOperator>;
|
||||
|
||||
explicit PostProcessor(const Parameters ¶meters) : parameters_(parameters) {}
|
||||
|
||||
template <class TDbAccessor>
|
||||
PostProcessor(const Parameters ¶meters, std::vector<IndexHint> index_hints, TDbAccessor *db)
|
||||
: parameters_(parameters), index_hints_(IndexHints(index_hints, db)) {}
|
||||
|
||||
template <class TPlanningContext>
|
||||
std::unique_ptr<LogicalOperator> Rewrite(std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
|
||||
auto index_lookup_plan =
|
||||
RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db);
|
||||
RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db, index_hints_);
|
||||
return RewriteWithJoinRewriter(std::move(index_lookup_plan), context->symbol_table, context->ast_storage,
|
||||
context->db);
|
||||
}
|
||||
@ -122,7 +128,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
|
||||
|
||||
template <class TPlanningContext>
|
||||
auto MakeLogicalPlan(TPlanningContext *context, const Parameters ¶meters, bool use_variable_planner) {
|
||||
PostProcessor post_processor(parameters);
|
||||
PostProcessor post_processor(parameters, context->query->index_hints_, context->db);
|
||||
return MakeLogicalPlan(context, &post_processor, use_variable_planner);
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,36 @@ DECLARE_int64(query_vertex_count_to_expand_existing);
|
||||
|
||||
namespace memgraph::query::plan {
|
||||
|
||||
/// Holds a given query's index hints after sorting them by type
|
||||
struct IndexHints {
|
||||
IndexHints() = default;
|
||||
|
||||
template <class TDbAccessor>
|
||||
IndexHints(std::vector<IndexHint> index_hints, TDbAccessor *db) {
|
||||
for (const auto &index_hint : index_hints) {
|
||||
const auto index_type = index_hint.index_type_;
|
||||
const auto label_name = index_hint.label_.name;
|
||||
if (index_type == IndexHint::IndexType::LABEL) {
|
||||
if (!db->LabelIndexExists(db->NameToLabel(label_name))) {
|
||||
spdlog::debug("Index for label {} doesn't exist", label_name);
|
||||
continue;
|
||||
}
|
||||
label_index_hints_.emplace_back(index_hint);
|
||||
} else if (index_type == IndexHint::IndexType::LABEL_PROPERTY) {
|
||||
auto property_name = index_hint.property_->name;
|
||||
if (!db->LabelPropertyIndexExists(db->NameToLabel(label_name), db->NameToProperty(property_name))) {
|
||||
spdlog::debug("Index for label {} and property {} doesn't exist", label_name, property_name);
|
||||
continue;
|
||||
}
|
||||
label_property_index_hints_.emplace_back(index_hint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<IndexHint> label_index_hints_{};
|
||||
std::vector<IndexHint> label_property_index_hints_{};
|
||||
};
|
||||
|
||||
namespace impl {
|
||||
|
||||
struct ExpressionRemovalResult {
|
||||
@ -43,11 +73,18 @@ struct ExpressionRemovalResult {
|
||||
// given expression tree.
|
||||
ExpressionRemovalResult RemoveExpressions(Expression *expr, const std::unordered_set<Expression *> &exprs_to_remove);
|
||||
|
||||
struct HashPair {
|
||||
template <class T1, class T2>
|
||||
std::size_t operator()(const std::pair<T1, T2> &pair) const {
|
||||
return utils::HashCombine<T1, T2>{}(pair.first, pair.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <class TDbAccessor>
|
||||
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
public:
|
||||
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db)
|
||||
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
|
||||
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db, IndexHints index_hints)
|
||||
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db), index_hints_(index_hints) {}
|
||||
|
||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||
@ -535,6 +572,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
// Expressions which no longer need a plain Filter operator.
|
||||
std::unordered_set<Expression *> filter_exprs_for_removal_;
|
||||
std::vector<LogicalOperator *> prev_ops_;
|
||||
IndexHints index_hints_;
|
||||
|
||||
// additional symbols that are present from other non-main branches but have influence on indexing
|
||||
std::unordered_set<Symbol> additional_bound_symbols_;
|
||||
@ -575,7 +613,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
}
|
||||
|
||||
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
|
||||
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_);
|
||||
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_, index_hints_);
|
||||
(*branch)->Accept(rewriter);
|
||||
if (rewriter.new_root_) {
|
||||
*branch = rewriter.new_root_;
|
||||
@ -588,6 +626,13 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
|
||||
std::optional<LabelIx> FindBestLabelIndex(const std::unordered_set<LabelIx> &labels) {
|
||||
MG_ASSERT(!labels.empty(), "Trying to find the best label without any labels.");
|
||||
|
||||
for (const auto &[index_type, label, _] : index_hints_.label_index_hints_) {
|
||||
if (labels.contains(label)) {
|
||||
return label;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<LabelIx> best_label;
|
||||
for (const auto &label : labels) {
|
||||
if (!db_->LabelIndexExists(GetLabel(label))) continue;
|
||||
@ -600,13 +645,12 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
return best_label;
|
||||
}
|
||||
|
||||
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
|
||||
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average
|
||||
// group size based on key distribution. If average group size is equal, choose the index that has distribution
|
||||
// closer to uniform distribution. Conditions based on average group size and key distribution can be only taken
|
||||
// into account if the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
|
||||
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
|
||||
const std::unordered_set<Symbol> &bound_symbols) {
|
||||
struct CandidateIndices {
|
||||
std::vector<std::pair<IndexHint, FilterInfo>> candidate_indices_{};
|
||||
std::unordered_map<std::pair<LabelIx, PropertyIx>, FilterInfo, HashPair> candidate_index_lookup_{};
|
||||
};
|
||||
|
||||
CandidateIndices GetCandidateIndices(const Symbol &symbol, const std::unordered_set<Symbol> &bound_symbols) {
|
||||
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
||||
for (const auto &used_symbol : used_symbols) {
|
||||
if (!utils::Contains(bound_symbols, used_symbol)) {
|
||||
@ -616,6 +660,39 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
return true;
|
||||
};
|
||||
|
||||
std::vector<std::pair<IndexHint, FilterInfo>> candidate_indices{};
|
||||
std::unordered_map<std::pair<LabelIx, PropertyIx>, FilterInfo, HashPair> candidate_index_lookup{};
|
||||
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
||||
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
||||
if (filter.property_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) {
|
||||
// Skip filter expressions which use the symbol whose property we are
|
||||
// looking up or aren't bound. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
|
||||
// cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto &property = filter.property_filter->property_;
|
||||
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
|
||||
continue;
|
||||
}
|
||||
candidate_indices.emplace_back(std::make_pair(
|
||||
IndexHint{.index_type_ = IndexHint::IndexType::LABEL_PROPERTY, .label_ = label, .property_ = property},
|
||||
filter));
|
||||
candidate_index_lookup.insert({std::make_pair(label, property), filter});
|
||||
}
|
||||
}
|
||||
|
||||
return CandidateIndices{.candidate_indices_ = candidate_indices, .candidate_index_lookup_ = candidate_index_lookup};
|
||||
}
|
||||
|
||||
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
|
||||
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average group
|
||||
// size based on key distribution. If average group size is equal, choose the index that has distribution closer to
|
||||
// uniform distribution. Conditions based on average group size and key distribution can be only taken into account if
|
||||
// the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
|
||||
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
|
||||
const std::unordered_set<Symbol> &bound_symbols) {
|
||||
/*
|
||||
* Comparator function between two indices. If new index has >= 10x vertices than the existing, it cannot be
|
||||
* better. If it is <= 10x in number of vertices, check average group size of property values. The index with
|
||||
@ -640,54 +717,59 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
return utils::CompareDecimal(new_stats->statistic, found->index_stats->statistic);
|
||||
};
|
||||
|
||||
auto [candidate_indices, candidate_index_lookup] = GetCandidateIndices(symbol, bound_symbols);
|
||||
|
||||
for (const auto &[index_type, label, maybe_property] : index_hints_.label_property_index_hints_) {
|
||||
auto property = *maybe_property;
|
||||
if (candidate_index_lookup.contains(std::make_pair(label, property))) {
|
||||
return LabelPropertyIndex{.label = label,
|
||||
.filter = candidate_index_lookup.at(std::make_pair(label, property)),
|
||||
.vertex_count = std::numeric_limits<std::int64_t>::max()};
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<LabelPropertyIndex> found;
|
||||
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
||||
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
||||
if (filter.property_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) {
|
||||
// Skip filter expressions which use the symbol whose property we are
|
||||
// looking up or aren't bound. We cannot scan by such expressions. For
|
||||
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
|
||||
// cannot scan `n` by property index.
|
||||
continue;
|
||||
}
|
||||
const auto &property = filter.property_filter->property_;
|
||||
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
|
||||
continue;
|
||||
}
|
||||
auto is_better_type = [&found](PropertyFilter::Type type) {
|
||||
// Order the types by the most preferred index lookup type.
|
||||
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
||||
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, PropertyFilter::Type::REGEX_MATCH};
|
||||
auto *found_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, found->filter.property_filter->type_);
|
||||
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
||||
return type_sort_ix < found_sort_ix;
|
||||
};
|
||||
// for (const auto &[label_and_property, filter] : candidate_indices) {
|
||||
// const auto &[label, property] = label_and_property;
|
||||
for (const auto &[candidate, filter] : candidate_indices) {
|
||||
const auto &[_, label, maybe_property] = candidate;
|
||||
auto property = *maybe_property;
|
||||
|
||||
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||
std::optional<storage::LabelPropertyIndexStats> new_stats =
|
||||
db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
||||
auto is_better_type = [&found](PropertyFilter::Type type) {
|
||||
// Order the types by the most preferred index lookup type.
|
||||
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
||||
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, PropertyFilter::Type::REGEX_MATCH};
|
||||
auto *found_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, found->filter.property_filter->type_);
|
||||
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
||||
return type_sort_ix < found_sort_ix;
|
||||
};
|
||||
|
||||
// Conditions, from more to less important:
|
||||
// the index with 10x less vertices is better.
|
||||
// the index with smaller average group size is better.
|
||||
// the index with equal avg group size and distribution closer to the uniform is better.
|
||||
// the index with less vertices is better.
|
||||
// the index with same number of vertices but more optimized filter is better.
|
||||
if (!found || vertex_count * 10 < found->vertex_count) {
|
||||
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||
continue;
|
||||
}
|
||||
// Conditions, from more to less important:
|
||||
// the index with 10x less vertices is better.
|
||||
// the index with smaller average group size is better.
|
||||
// the index with equal avg group size and distribution closer to the uniform is better.
|
||||
// the index with less vertices is better.
|
||||
// the index with same number of vertices but more optimized filter is better.
|
||||
|
||||
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
|
||||
cmp_res == -1 ||
|
||||
cmp_res == 0 && (found->vertex_count > vertex_count ||
|
||||
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
|
||||
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||
}
|
||||
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||
std::optional<storage::LabelPropertyIndexStats> new_stats =
|
||||
db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
||||
|
||||
if (!found || vertex_count * 10 < found->vertex_count) {
|
||||
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||
continue;
|
||||
}
|
||||
|
||||
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
|
||||
cmp_res == -1 ||
|
||||
cmp_res == 0 && (found->vertex_count > vertex_count ||
|
||||
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
|
||||
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
// Creates a ScanAll by the best possible index for the `node_symbol`. If the node
|
||||
// does not have at least a label, no indexed lookup can be created and
|
||||
// `nullptr` is returned. The operator is chained after `input`. Optional
|
||||
@ -798,8 +880,8 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
||||
template <class TDbAccessor>
|
||||
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(std::unique_ptr<LogicalOperator> root_op,
|
||||
SymbolTable *symbol_table, AstStorage *ast_storage,
|
||||
TDbAccessor *db) {
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db);
|
||||
TDbAccessor *db, IndexHints index_hints) {
|
||||
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db, index_hints);
|
||||
root_op->Accept(rewriter);
|
||||
if (rewriter.new_root_) {
|
||||
// This shouldn't happen in real use case, because IndexLookupRewriter
|
||||
|
@ -66,6 +66,7 @@ add_subdirectory(concurrent_query_modules)
|
||||
add_subdirectory(show_index_info)
|
||||
add_subdirectory(set_properties)
|
||||
add_subdirectory(transaction_rollback)
|
||||
add_subdirectory(index_hints)
|
||||
add_subdirectory(query_modules)
|
||||
add_subdirectory(constraints)
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
disk_storage: &disk_storage
|
||||
cluster:
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level", "TRACE", "--memory-limit", "125"]
|
||||
args: ["--bolt-port", "7687", "--log-level", "TRACE", "--memory-limit", "150"]
|
||||
log_file: "disk_storage.log"
|
||||
setup_queries: []
|
||||
validation_queries: []
|
||||
|
6
tests/e2e/index_hints/CMakeLists.txt
Normal file
6
tests/e2e/index_hints/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
function(copy_index_hints_e2e_python_files FILE_NAME)
|
||||
copy_e2e_python_files(index_hints ${FILE_NAME})
|
||||
endfunction()
|
||||
|
||||
copy_index_hints_e2e_python_files(common.py)
|
||||
copy_index_hints_e2e_python_files(index_hints.py)
|
45
tests/e2e/index_hints/common.py
Normal file
45
tests/e2e/index_hints/common.py
Normal file
@ -0,0 +1,45 @@
|
||||
# Copyright 2023 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import typing
|
||||
|
||||
import mgclient
|
||||
import pytest
|
||||
from gqlalchemy import Memgraph
|
||||
|
||||
|
||||
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
|
||||
cursor.execute(query, params)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def connect(**kwargs) -> mgclient.Connection:
|
||||
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
|
||||
connection.autocommit = True
|
||||
cursor = connection.cursor()
|
||||
execute_and_fetch_all(cursor, "USE DATABASE memgraph")
|
||||
try:
|
||||
execute_and_fetch_all(cursor, "DROP DATABASE clean")
|
||||
except:
|
||||
pass
|
||||
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n")
|
||||
yield connection
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memgraph(**kwargs) -> Memgraph:
|
||||
memgraph = Memgraph()
|
||||
|
||||
yield memgraph
|
||||
|
||||
memgraph.drop_database()
|
||||
memgraph.drop_indexes()
|
479
tests/e2e/index_hints/index_hints.py
Normal file
479
tests/e2e/index_hints/index_hints.py
Normal file
@ -0,0 +1,479 @@
|
||||
# Copyright 2023 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import sys
|
||||
|
||||
import mgclient
|
||||
import pytest
|
||||
from common import memgraph
|
||||
|
||||
|
||||
def test_label_index_hint(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_no_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label1:Label2)",
|
||||
" * ScanAllByLabel (n :Label1)",
|
||||
" * Once",
|
||||
]
|
||||
expected_explain_with_hint = [row.replace("(n :Label1)", "(n :Label2)") for row in expected_explain_no_hint]
|
||||
|
||||
explain_no_hint = [
|
||||
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
assert explain_no_hint == expected_explain_no_hint and explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_label_index_hint_alternative_orderings(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label1:Label2)",
|
||||
" * ScanAllByLabel (n :Label2)",
|
||||
" * Once",
|
||||
]
|
||||
expected_explain_with_hint_ordering_3 = expected_explain_with_hint[:]
|
||||
expected_explain_with_hint_ordering_3[1] = " * Filter (n :Label1:Label2:Label3)" # since it matches 3 labels
|
||||
|
||||
explain_with_hint_ordering_1 = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
explain_with_hint_ordering_2 = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label2:Label1) RETURN n;")
|
||||
]
|
||||
explain_with_hint_ordering_3 = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label3:Label2:Label1) RETURN n;")
|
||||
]
|
||||
|
||||
assert (
|
||||
expected_explain_with_hint == explain_with_hint_ordering_1 == explain_with_hint_ordering_2
|
||||
and expected_explain_with_hint_ordering_3 == explain_with_hint_ordering_3
|
||||
)
|
||||
|
||||
|
||||
def test_multiple_label_index_hints(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label0;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label1:Label2)",
|
||||
" * ScanAllByLabel (n :Label2)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label0, :Label2 MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_multiple_applicable_label_index_hints(memgraph):
|
||||
# Out of all applicable index hints, the first one given in the query should be used
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
memgraph.execute("CREATE INDEX ON :Label3;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label2:Label3)",
|
||||
" * ScanAllByLabel (n :Label3)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label2:Label3) RETURN n;")
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_multiple_applicable_label_index_hints_alternative_orderings(memgraph):
|
||||
# Out of all applicable index hints, the first one given in the query should be used
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
memgraph.execute("CREATE INDEX ON :Label3;")
|
||||
|
||||
expected_explain_with_hint_1 = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label2:Label3)",
|
||||
" * ScanAllByLabel (n :Label3)",
|
||||
" * Once",
|
||||
]
|
||||
expected_explain_with_hint_2 = [row.replace("(n :Label3)", "(n :Label2)") for row in expected_explain_with_hint_1]
|
||||
|
||||
explain_with_hint_ordering_1a = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label2:Label3) RETURN n;")
|
||||
]
|
||||
explain_with_hint_ordering_1b = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label3:Label2) RETURN n;")
|
||||
]
|
||||
explain_with_hint_ordering_2a = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2, :Label3 MATCH (n:Label2:Label3) RETURN n;")
|
||||
]
|
||||
explain_with_hint_ordering_2b = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2, :Label3 MATCH (n:Label3:Label2) RETURN n;")
|
||||
]
|
||||
assert (expected_explain_with_hint_1 == explain_with_hint_ordering_1a == explain_with_hint_ordering_1b) and (
|
||||
expected_explain_with_hint_2 == explain_with_hint_ordering_2a == explain_with_hint_ordering_2b
|
||||
)
|
||||
|
||||
|
||||
def test_label_property_index_hint(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id1);")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id2);")
|
||||
|
||||
expected_explain_no_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label), {n.id1}, {n.id2}",
|
||||
" * ScanAllByLabelPropertyValue (n :Label {id2})",
|
||||
" * Once",
|
||||
]
|
||||
expected_explain_with_hint = [
|
||||
row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
|
||||
]
|
||||
|
||||
explain_no_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||
]
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_no_hint == expected_explain_no_hint and explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_label_property_index_hint_alternative_orderings(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id1);")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id2);")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label), {n.id1}, {n.id2}",
|
||||
" * ScanAllByLabelPropertyValue (n :Label {id1})",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint_ordering_1 = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
explain_with_hint_ordering_2 = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert expected_explain_with_hint == explain_with_hint_ordering_1 == explain_with_hint_ordering_2
|
||||
|
||||
|
||||
def test_multiple_label_property_index_hints(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id0);")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id1);")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label), {n.id1}, {n.id2}",
|
||||
" * ScanAllByLabelPropertyValue (n :Label {id1})",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id0), :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
explain_with_hint_alternative_ordering = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id0), :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint == explain_with_hint_alternative_ordering
|
||||
|
||||
|
||||
def test_multiple_applicable_label_property_index_hints(memgraph):
|
||||
# Out of all applicable index hints, the first one given in the query should be used
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id1);")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id2);")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label), {n.id1}, {n.id2}",
|
||||
" * ScanAllByLabelPropertyValue (n :Label {id1})",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_multiple_applicable_label_property_index_hints_alternative_orderings(memgraph):
|
||||
# Out of all applicable index hints, the first one given in the query should be used
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id1);")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id2);")
|
||||
|
||||
expected_explain_with_hint_1 = [
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label), {n.id1}, {n.id2}",
|
||||
" * ScanAllByLabelPropertyValue (n :Label {id1})",
|
||||
" * Once",
|
||||
]
|
||||
expected_explain_with_hint_2 = [
|
||||
row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
|
||||
]
|
||||
|
||||
explain_with_hint_ordering_1a = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
explain_with_hint_ordering_1b = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
explain_with_hint_ordering_2a = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id2), :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
explain_with_hint_ordering_2b = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label(id2), :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert (expected_explain_with_hint_1 == explain_with_hint_ordering_1a == explain_with_hint_ordering_1b) and (
|
||||
expected_explain_with_hint_2 == explain_with_hint_ordering_2a == explain_with_hint_ordering_2b
|
||||
)
|
||||
|
||||
|
||||
def test_union_applicable_in_left_branch(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Distinct",
|
||||
" * Union {n : n}",
|
||||
" |\\ ",
|
||||
" | * Produce {n}",
|
||||
" | * ScanAllByLabel (n :Label2)",
|
||||
" | * Once",
|
||||
" * Produce {n}",
|
||||
" * ScanAllByLabel (n :Label1)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label1 MATCH (n:Label1) RETURN n UNION MATCH (n:Label2) RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_union_applicable_in_right_branch(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Distinct",
|
||||
" * Union {n : n}",
|
||||
" |\\ ",
|
||||
" | * Produce {n}",
|
||||
" | * ScanAllByLabel (n :Label1)",
|
||||
" | * Once",
|
||||
" * Produce {n}",
|
||||
" * ScanAllByLabel (n :Label2)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label1 MATCH (n:Label2) RETURN n UNION MATCH (n:Label1) RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_union_applicable_in_both_branches(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label1:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
memgraph.execute("CREATE INDEX ON :Label3;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Distinct",
|
||||
" * Union {n : n}",
|
||||
" |\\ ",
|
||||
" | * Produce {n}",
|
||||
" | * Filter (n :Label2:Label3)",
|
||||
" | * ScanAllByLabel (n :Label2)",
|
||||
" | * Once",
|
||||
" * Produce {n}",
|
||||
" * Filter (n :Label1:Label2)",
|
||||
" * ScanAllByLabel (n :Label1)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label1, :Label2 MATCH (n:Label1:Label2) RETURN n UNION MATCH (n:Label2:Label3) RETURN n;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_multiple_match_query(memgraph):
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label1:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
memgraph.execute("CREATE INDEX ON :Label2;")
|
||||
memgraph.execute("CREATE INDEX ON :Label3;")
|
||||
|
||||
expected_explain_with_hint = [
|
||||
" * Produce {n, m}",
|
||||
" * Cartesian {m : n}",
|
||||
" |\\ ",
|
||||
" | * Filter (n :Label1:Label2), {n.id}",
|
||||
" | * ScanAllByLabel (n :Label1)",
|
||||
" | * Once",
|
||||
" * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
|
||||
" * ScanAllByLabel (m :Label2)",
|
||||
" * Once",
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch(
|
||||
"EXPLAIN USING INDEX :Label1, :Label2 MATCH (n:Label1:Label2) WHERE n.id = 1 MATCH (m:Label2:Label3) return n, m;"
|
||||
)
|
||||
]
|
||||
|
||||
assert explain_with_hint == expected_explain_with_hint
|
||||
|
||||
|
||||
def test_nonexistent_label_index(memgraph):
|
||||
# In case of hinting at a nonexistent index, the query should execute without exceptions, and its output should be
|
||||
# the same as without that hint
|
||||
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label1;")
|
||||
|
||||
try:
|
||||
explain_no_hint = [
|
||||
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
assert explain_with_hint == explain_no_hint
|
||||
except mgclient.DatabaseError:
|
||||
assert False
|
||||
|
||||
|
||||
def test_nonexistent_label_property_index(memgraph):
|
||||
# In case of hinting at a nonexistent index, the query should execute without exceptions, and its output should be
|
||||
# the same as without that hint
|
||||
|
||||
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||
memgraph.execute("CREATE INDEX ON :Label(id2);")
|
||||
|
||||
try:
|
||||
explain_no_hint = [
|
||||
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
explain_with_hint = [
|
||||
row["QUERY PLAN"]
|
||||
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label(id1) MATCH (n:Label1:Label2) RETURN n;")
|
||||
]
|
||||
|
||||
assert explain_with_hint == explain_no_hint
|
||||
except mgclient.DatabaseError:
|
||||
assert False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
13
tests/e2e/index_hints/workloads.yaml
Normal file
13
tests/e2e/index_hints/workloads.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
index_hints_cluster: &index_hints_cluster
|
||||
cluster:
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
||||
log_file: "index_hints.log"
|
||||
setup_queries: []
|
||||
validation_queries: []
|
||||
|
||||
workloads:
|
||||
- name: "Analyze graph for better indexing"
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
args: ["index_hints/index_hints.py"]
|
||||
<<: *index_hints_cluster
|
@ -241,7 +241,7 @@ TYPED_TEST(OperatorToStringTest, Filter) {
|
||||
EQ(PROPERTY_LOOKUP(this->dba, "person", property), LITERAL(5)), filters);
|
||||
|
||||
std::string expected_string{
|
||||
"Filter Generic {person}, id(person), (person :Customer:Visitor), (:Customer:Visitor), {person.name}, Pattern"};
|
||||
"Filter (:Customer:Visitor), (person :Customer:Visitor), Generic {person}, Pattern, id(person), {person.name}"};
|
||||
EXPECT_EQ(last_op->ToString(), expected_string);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user