Extend Cypher queries with the index hinting feature (#1345)

This commit is contained in:
Ante Pušić 2023-10-27 14:26:19 +02:00 committed by GitHub
parent a94588bde3
commit 989bb97514
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 768 additions and 74 deletions

2
.gitignore vendored
View File

@ -60,3 +60,5 @@ src/storage/distributed/rpc/concurrent_id_mapper_rpc_messages.hpp
src/transactions/distributed/engine_rpc_messages.hpp
/tests/manual/js/transaction_timeout/package-lock.json
/tests/manual/js/transaction_timeout/node_modules/
.vscode/
src/query/frontend/opencypher/grammar/.antlr/*

View File

@ -2069,6 +2069,27 @@ class Query : public memgraph::query::Tree, public utils::Visitable<QueryVisitor
friend class AstStorage;
};
struct IndexHint {
static const utils::TypeInfo kType;
const utils::TypeInfo &GetTypeInfo() const { return kType; }
enum class IndexType { LABEL, LABEL_PROPERTY };
memgraph::query::IndexHint::IndexType index_type_;
memgraph::query::LabelIx label_;
std::optional<memgraph::query::PropertyIx> property_{std::nullopt};
IndexHint Clone(AstStorage *storage) const {
IndexHint object;
object.index_type_ = index_type_;
object.label_ = storage->GetLabelIx(label_.name);
if (property_) {
object.property_ = storage->GetPropertyIx(property_->name);
}
return object;
}
};
class CypherQuery : public memgraph::query::Query, public utils::Visitable<HierarchicalTreeVisitor> {
public:
static const utils::TypeInfo kType;
@ -2093,6 +2114,11 @@ class CypherQuery : public memgraph::query::Query, public utils::Visitable<Hiera
memgraph::query::SingleQuery *single_query_{nullptr};
/// Contains remaining queries that should form and union with `single_query_`.
std::vector<memgraph::query::CypherUnion *> cypher_unions_;
/// Index hint
/// Suggestion: If were going to have multiple pre-query directives (not only index_hints_), they need to be
/// contained within a dedicated class/struct
std::vector<memgraph::query::IndexHint> index_hints_;
/// Memory limit
memgraph::query::Expression *memory_limit_{nullptr};
size_t memory_scale_{1024U};
@ -2103,6 +2129,10 @@ class CypherQuery : public memgraph::query::Query, public utils::Visitable<Hiera
for (auto i5 = 0; i5 < cypher_unions_.size(); ++i5) {
object->cypher_unions_[i5] = cypher_unions_[i5] ? cypher_unions_[i5]->Clone(storage) : nullptr;
}
object->index_hints_.resize(index_hints_.size());
for (auto i6 = 0; i6 < index_hints_.size(); ++i6) {
object->index_hints_[i6] = index_hints_[i6].Clone(storage);
}
object->memory_limit_ = memory_limit_ ? memory_limit_->Clone(storage) : nullptr;
object->memory_scale_ = memory_scale_;
return object;

View File

@ -202,6 +202,20 @@ antlrcpp::Any CypherMainVisitor::visitCypherQuery(MemgraphCypher::CypherQueryCon
cypher_query->cypher_unions_.push_back(std::any_cast<CypherUnion *>(child->accept(this)));
}
if (auto *index_hints_ctx = ctx->indexHints()) {
for (auto *index_hint_ctx : index_hints_ctx->indexHint()) {
auto label = AddLabel(std::any_cast<std::string>(index_hint_ctx->labelName()->accept(this)));
if (!index_hint_ctx->propertyKeyName()) {
cypher_query->index_hints_.emplace_back(IndexHint{.index_type_ = IndexHint::IndexType::LABEL, .label_ = label});
continue;
}
cypher_query->index_hints_.emplace_back(
IndexHint{.index_type_ = IndexHint::IndexType::LABEL_PROPERTY,
.label_ = label,
.property_ = std::any_cast<PropertyIx>(index_hint_ctx->propertyKeyName()->accept(this))});
}
}
if (auto *memory_limit_ctx = ctx->queryMemoryLimit()) {
const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this);
if (memory_limit_info) {

View File

@ -114,6 +114,7 @@ memgraphCypherKeyword : cypherKeyword
| USE
| USER
| USERS
| USING
| VERSION
| TERMINATE
| TRANSACTIONS
@ -151,6 +152,8 @@ query : cypherQuery
| edgeImportModeQuery
;
cypherQuery : ( indexHints )? singleQuery ( cypherUnion )* ( queryMemoryLimit )? ;
authQuery : createRole
| dropRole
| showRoles
@ -209,6 +212,10 @@ updateClause : set
foreach : FOREACH '(' variable IN expression '|' updateClause+ ')' ;
indexHints: USING INDEX indexHint ( ',' indexHint )* ;
indexHint: ':' labelName ( '(' propertyKeyName ')' )? ;
callSubquery : CALL '{' cypherQuery '}' ;
streamQuery : checkStream
@ -259,9 +266,9 @@ userOrRoleName : symbolicName ;
createRole : CREATE ROLE role=userOrRoleName ;
dropRole : DROP ROLE role=userOrRoleName ;
dropRole : DROP ROLE role=userOrRoleName ;
showRoles : SHOW ROLES ;
showRoles : SHOW ROLES ;
createUser : CREATE USER user=userOrRoleName
( IDENTIFIED BY password=literal )? ;
@ -347,11 +354,11 @@ showRoleForUser : SHOW ROLE FOR user=userOrRoleName ;
showUsersForRole : SHOW USERS FOR role=userOrRoleName ;
dumpQuery: DUMP DATABASE ;
dumpQuery : DUMP DATABASE ;
analyzeGraphQuery: ANALYZE GRAPH ( ON LABELS ( listOfColonSymbolicNames | ASTERISK ) ) ? ( DELETE STATISTICS ) ? ;
analyzeGraphQuery : ANALYZE GRAPH ( ON LABELS ( listOfColonSymbolicNames | ASTERISK ) ) ? ( DELETE STATISTICS ) ? ;
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
( WITH PORT port=literal ) ? ;
showReplicationRole : SHOW REPLICATION ROLE ;
@ -365,7 +372,7 @@ registerReplica : REGISTER REPLICA replicaName ( SYNC | ASYNC )
dropReplica : DROP REPLICA replicaName ;
showReplicas : SHOW REPLICAS ;
showReplicas : SHOW REPLICAS ;
lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY | DATA DIRECTORY LOCK STATUS;
@ -402,7 +409,7 @@ streamName : symbolicName ;
symbolicNameWithMinus : symbolicName ( MINUS symbolicName )* ;
symbolicNameWithDotsAndMinus: symbolicNameWithMinus ( DOT symbolicNameWithMinus )* ;
symbolicNameWithDotsAndMinus : symbolicNameWithMinus ( DOT symbolicNameWithMinus )* ;
symbolicTopicNames : symbolicNameWithDotsAndMinus ( COMMA symbolicNameWithDotsAndMinus )* ;
@ -480,6 +487,6 @@ useDatabase : USE DATABASE databaseName ;
dropDatabase : DROP DATABASE databaseName ;
showDatabases: SHOW DATABASES ;
showDatabases : SHOW DATABASES ;
edgeImportModeQuery : EDGE IMPORT MODE ( ACTIVE | INACTIVE ) ;

View File

@ -137,5 +137,6 @@ UPDATE : U P D A T E ;
USE : U S E ;
USER : U S E R ;
USERS : U S E R S ;
USING : U S I N G ;
VERSION : V E R S I O N ;
WEBSOCKET : W E B S O C K E T ;

View File

@ -1131,8 +1131,12 @@ class Filter : public memgraph::query::plan::LogicalOperator {
static std::string SingleFilterName(const query::plan::FilterInfo &single_filter) {
using Type = query::plan::FilterInfo::Type;
if (single_filter.type == Type::Generic) {
return fmt::format("Generic {{{}}}", utils::IterableToString(single_filter.used_symbols, ", ",
[](const auto &symbol) { return symbol.name(); }));
std::set<std::string> symbol_names;
for (const auto &symbol : single_filter.used_symbols) {
symbol_names.insert(symbol.name());
}
return fmt::format("Generic {{{}}}",
utils::IterableToString(symbol_names, ", ", [](const auto &name) { return name; }));
} else if (single_filter.type == Type::Id) {
return fmt::format("id({})", single_filter.id_filter->symbol_.name());
} else if (single_filter.type == Type::Label) {
@ -1140,16 +1144,18 @@ class Filter : public memgraph::query::plan::LogicalOperator {
LOG_FATAL("Label filters not using LabelsTest are not supported for query inspection!");
}
auto filter_expression = static_cast<LabelsTest *>(single_filter.expression);
std::set<std::string> label_names;
for (const auto &label : filter_expression->labels_) {
label_names.insert(label.name);
}
if (filter_expression->expression_->GetTypeInfo() != Identifier::kType) {
return fmt::format("(:{})", utils::IterableToString(filter_expression->labels_, ":",
[](const auto &label) { return label.name; }));
return fmt::format("(:{})", utils::IterableToString(label_names, ":", [](const auto &name) { return name; }));
}
auto identifier_expression = static_cast<Identifier *>(filter_expression->expression_);
return fmt::format(
"({} :{})", identifier_expression->name_,
utils::IterableToString(filter_expression->labels_, ":", [](const auto &label) { return label.name; }));
return fmt::format("({} :{})", identifier_expression->name_,
utils::IterableToString(label_names, ":", [](const auto &name) { return name; }));
} else if (single_filter.type == Type::Pattern) {
return "Pattern";
} else if (single_filter.type == Type::Property) {
@ -1161,9 +1167,11 @@ class Filter : public memgraph::query::plan::LogicalOperator {
}
std::string ToString() const override {
return fmt::format("Filter {}", utils::IterableToString(all_filters_, ", ", [](const auto &single_filter) {
return Filter::SingleFilterName(single_filter);
}));
std::set<std::string> filter_names;
for (const auto &filter : all_filters_) {
filter_names.insert(Filter::SingleFilterName(filter));
}
return fmt::format("Filter {}", utils::IterableToString(filter_names, ", ", [](const auto &name) { return name; }));
}
std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override {

View File

@ -38,14 +38,20 @@ class PostProcessor final {
Parameters parameters_;
public:
IndexHints index_hints_{};
using ProcessedPlan = std::unique_ptr<LogicalOperator>;
explicit PostProcessor(const Parameters &parameters) : parameters_(parameters) {}
template <class TDbAccessor>
PostProcessor(const Parameters &parameters, std::vector<IndexHint> index_hints, TDbAccessor *db)
: parameters_(parameters), index_hints_(IndexHints(index_hints, db)) {}
template <class TPlanningContext>
std::unique_ptr<LogicalOperator> Rewrite(std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) {
auto index_lookup_plan =
RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db);
RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db, index_hints_);
return RewriteWithJoinRewriter(std::move(index_lookup_plan), context->symbol_table, context->ast_storage,
context->db);
}
@ -122,7 +128,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
template <class TPlanningContext>
auto MakeLogicalPlan(TPlanningContext *context, const Parameters &parameters, bool use_variable_planner) {
PostProcessor post_processor(parameters);
PostProcessor post_processor(parameters, context->query->index_hints_, context->db);
return MakeLogicalPlan(context, &post_processor, use_variable_planner);
}

View File

@ -32,6 +32,36 @@ DECLARE_int64(query_vertex_count_to_expand_existing);
namespace memgraph::query::plan {
/// Holds a given query's index hints after sorting them by type
struct IndexHints {
IndexHints() = default;
template <class TDbAccessor>
IndexHints(std::vector<IndexHint> index_hints, TDbAccessor *db) {
for (const auto &index_hint : index_hints) {
const auto index_type = index_hint.index_type_;
const auto label_name = index_hint.label_.name;
if (index_type == IndexHint::IndexType::LABEL) {
if (!db->LabelIndexExists(db->NameToLabel(label_name))) {
spdlog::debug("Index for label {} doesn't exist", label_name);
continue;
}
label_index_hints_.emplace_back(index_hint);
} else if (index_type == IndexHint::IndexType::LABEL_PROPERTY) {
auto property_name = index_hint.property_->name;
if (!db->LabelPropertyIndexExists(db->NameToLabel(label_name), db->NameToProperty(property_name))) {
spdlog::debug("Index for label {} and property {} doesn't exist", label_name, property_name);
continue;
}
label_property_index_hints_.emplace_back(index_hint);
}
}
}
std::vector<IndexHint> label_index_hints_{};
std::vector<IndexHint> label_property_index_hints_{};
};
namespace impl {
struct ExpressionRemovalResult {
@ -43,11 +73,18 @@ struct ExpressionRemovalResult {
// given expression tree.
ExpressionRemovalResult RemoveExpressions(Expression *expr, const std::unordered_set<Expression *> &exprs_to_remove);
struct HashPair {
template <class T1, class T2>
std::size_t operator()(const std::pair<T1, T2> &pair) const {
return utils::HashCombine<T1, T2>{}(pair.first, pair.second);
}
};
template <class TDbAccessor>
class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
public:
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db)
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {}
IndexLookupRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db, IndexHints index_hints)
: symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db), index_hints_(index_hints) {}
using HierarchicalLogicalOperatorVisitor::PostVisit;
using HierarchicalLogicalOperatorVisitor::PreVisit;
@ -535,6 +572,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
// Expressions which no longer need a plain Filter operator.
std::unordered_set<Expression *> filter_exprs_for_removal_;
std::vector<LogicalOperator *> prev_ops_;
IndexHints index_hints_;
// additional symbols that are present from other non-main branches but have influence on indexing
std::unordered_set<Symbol> additional_bound_symbols_;
@ -575,7 +613,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
}
void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) {
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_);
IndexLookupRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_, index_hints_);
(*branch)->Accept(rewriter);
if (rewriter.new_root_) {
*branch = rewriter.new_root_;
@ -588,6 +626,13 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
std::optional<LabelIx> FindBestLabelIndex(const std::unordered_set<LabelIx> &labels) {
MG_ASSERT(!labels.empty(), "Trying to find the best label without any labels.");
for (const auto &[index_type, label, _] : index_hints_.label_index_hints_) {
if (labels.contains(label)) {
return label;
}
}
std::optional<LabelIx> best_label;
for (const auto &label : labels) {
if (!db_->LabelIndexExists(GetLabel(label))) continue;
@ -600,13 +645,12 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
return best_label;
}
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average
// group size based on key distribution. If average group size is equal, choose the index that has distribution
// closer to uniform distribution. Conditions based on average group size and key distribution can be only taken
// into account if the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
const std::unordered_set<Symbol> &bound_symbols) {
struct CandidateIndices {
std::vector<std::pair<IndexHint, FilterInfo>> candidate_indices_{};
std::unordered_map<std::pair<LabelIx, PropertyIx>, FilterInfo, HashPair> candidate_index_lookup_{};
};
CandidateIndices GetCandidateIndices(const Symbol &symbol, const std::unordered_set<Symbol> &bound_symbols) {
auto are_bound = [&bound_symbols](const auto &used_symbols) {
for (const auto &used_symbol : used_symbols) {
if (!utils::Contains(bound_symbols, used_symbol)) {
@ -616,6 +660,39 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
return true;
};
std::vector<std::pair<IndexHint, FilterInfo>> candidate_indices{};
std::unordered_map<std::pair<LabelIx, PropertyIx>, FilterInfo, HashPair> candidate_index_lookup{};
for (const auto &label : filters_.FilteredLabels(symbol)) {
for (const auto &filter : filters_.PropertyFilters(symbol)) {
if (filter.property_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) {
// Skip filter expressions which use the symbol whose property we are
// looking up or aren't bound. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
// cannot scan `n` by property index.
continue;
}
const auto &property = filter.property_filter->property_;
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
continue;
}
candidate_indices.emplace_back(std::make_pair(
IndexHint{.index_type_ = IndexHint::IndexType::LABEL_PROPERTY, .label_ = label, .property_ = property},
filter));
candidate_index_lookup.insert({std::make_pair(label, property), filter});
}
}
return CandidateIndices{.candidate_indices_ = candidate_indices, .candidate_index_lookup_ = candidate_index_lookup};
}
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average group
// size based on key distribution. If average group size is equal, choose the index that has distribution closer to
// uniform distribution. Conditions based on average group size and key distribution can be only taken into account if
// the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
const std::unordered_set<Symbol> &bound_symbols) {
/*
* Comparator function between two indices. If new index has >= 10x vertices than the existing, it cannot be
* better. If it is <= 10x in number of vertices, check average group size of property values. The index with
@ -640,54 +717,59 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
return utils::CompareDecimal(new_stats->statistic, found->index_stats->statistic);
};
auto [candidate_indices, candidate_index_lookup] = GetCandidateIndices(symbol, bound_symbols);
for (const auto &[index_type, label, maybe_property] : index_hints_.label_property_index_hints_) {
auto property = *maybe_property;
if (candidate_index_lookup.contains(std::make_pair(label, property))) {
return LabelPropertyIndex{.label = label,
.filter = candidate_index_lookup.at(std::make_pair(label, property)),
.vertex_count = std::numeric_limits<std::int64_t>::max()};
}
}
std::optional<LabelPropertyIndex> found;
for (const auto &label : filters_.FilteredLabels(symbol)) {
for (const auto &filter : filters_.PropertyFilters(symbol)) {
if (filter.property_filter->is_symbol_in_value_ || !are_bound(filter.used_symbols)) {
// Skip filter expressions which use the symbol whose property we are
// looking up or aren't bound. We cannot scan by such expressions. For
// example, in `n.a = 2 + n.b` both sides of `=` refer to `n`, so we
// cannot scan `n` by property index.
continue;
}
const auto &property = filter.property_filter->property_;
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
continue;
}
auto is_better_type = [&found](PropertyFilter::Type type) {
// Order the types by the most preferred index lookup type.
static const PropertyFilter::Type kFilterTypeOrder[] = {
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, PropertyFilter::Type::REGEX_MATCH};
auto *found_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, found->filter.property_filter->type_);
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
return type_sort_ix < found_sort_ix;
};
// for (const auto &[label_and_property, filter] : candidate_indices) {
// const auto &[label, property] = label_and_property;
for (const auto &[candidate, filter] : candidate_indices) {
const auto &[_, label, maybe_property] = candidate;
auto property = *maybe_property;
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
std::optional<storage::LabelPropertyIndexStats> new_stats =
db_->GetIndexStats(GetLabel(label), GetProperty(property));
auto is_better_type = [&found](PropertyFilter::Type type) {
// Order the types by the most preferred index lookup type.
static const PropertyFilter::Type kFilterTypeOrder[] = {
PropertyFilter::Type::EQUAL, PropertyFilter::Type::RANGE, PropertyFilter::Type::REGEX_MATCH};
auto *found_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, found->filter.property_filter->type_);
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
return type_sort_ix < found_sort_ix;
};
// Conditions, from more to less important:
// the index with 10x less vertices is better.
// the index with smaller average group size is better.
// the index with equal avg group size and distribution closer to the uniform is better.
// the index with less vertices is better.
// the index with same number of vertices but more optimized filter is better.
if (!found || vertex_count * 10 < found->vertex_count) {
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
continue;
}
// Conditions, from more to less important:
// the index with 10x less vertices is better.
// the index with smaller average group size is better.
// the index with equal avg group size and distribution closer to the uniform is better.
// the index with less vertices is better.
// the index with same number of vertices but more optimized filter is better.
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
cmp_res == -1 ||
cmp_res == 0 && (found->vertex_count > vertex_count ||
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
}
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
std::optional<storage::LabelPropertyIndexStats> new_stats =
db_->GetIndexStats(GetLabel(label), GetProperty(property));
if (!found || vertex_count * 10 < found->vertex_count) {
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
continue;
}
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
cmp_res == -1 ||
cmp_res == 0 && (found->vertex_count > vertex_count ||
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
}
}
return found;
}
// Creates a ScanAll by the best possible index for the `node_symbol`. If the node
// does not have at least a label, no indexed lookup can be created and
// `nullptr` is returned. The operator is chained after `input`. Optional
@ -798,8 +880,8 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
template <class TDbAccessor>
std::unique_ptr<LogicalOperator> RewriteWithIndexLookup(std::unique_ptr<LogicalOperator> root_op,
SymbolTable *symbol_table, AstStorage *ast_storage,
TDbAccessor *db) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db);
TDbAccessor *db, IndexHints index_hints) {
impl::IndexLookupRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db, index_hints);
root_op->Accept(rewriter);
if (rewriter.new_root_) {
// This shouldn't happen in real use case, because IndexLookupRewriter

View File

@ -66,6 +66,7 @@ add_subdirectory(concurrent_query_modules)
add_subdirectory(show_index_info)
add_subdirectory(set_properties)
add_subdirectory(transaction_rollback)
add_subdirectory(index_hints)
add_subdirectory(query_modules)
add_subdirectory(constraints)

View File

@ -1,7 +1,7 @@
disk_storage: &disk_storage
cluster:
main:
args: ["--bolt-port", "7687", "--log-level", "TRACE", "--memory-limit", "125"]
args: ["--bolt-port", "7687", "--log-level", "TRACE", "--memory-limit", "150"]
log_file: "disk_storage.log"
setup_queries: []
validation_queries: []

View File

@ -0,0 +1,6 @@
function(copy_index_hints_e2e_python_files FILE_NAME)
copy_e2e_python_files(index_hints ${FILE_NAME})
endfunction()
copy_index_hints_e2e_python_files(common.py)
copy_index_hints_e2e_python_files(index_hints.py)

View File

@ -0,0 +1,45 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import typing
import mgclient
import pytest
from gqlalchemy import Memgraph
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
cursor.execute(query, params)
return cursor.fetchall()
@pytest.fixture
def connect(**kwargs) -> mgclient.Connection:
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
connection.autocommit = True
cursor = connection.cursor()
execute_and_fetch_all(cursor, "USE DATABASE memgraph")
try:
execute_and_fetch_all(cursor, "DROP DATABASE clean")
except:
pass
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n")
yield connection
@pytest.fixture
def memgraph(**kwargs) -> Memgraph:
memgraph = Memgraph()
yield memgraph
memgraph.drop_database()
memgraph.drop_indexes()

View File

@ -0,0 +1,479 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import mgclient
import pytest
from common import memgraph
def test_label_index_hint(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_no_hint = [
" * Produce {n}",
" * Filter (n :Label1:Label2)",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
expected_explain_with_hint = [row.replace("(n :Label1)", "(n :Label2)") for row in expected_explain_no_hint]
explain_no_hint = [
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
]
assert explain_no_hint == expected_explain_no_hint and explain_with_hint == expected_explain_with_hint
def test_label_index_hint_alternative_orderings(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label1:Label2)",
" * ScanAllByLabel (n :Label2)",
" * Once",
]
expected_explain_with_hint_ordering_3 = expected_explain_with_hint[:]
expected_explain_with_hint_ordering_3[1] = " * Filter (n :Label1:Label2:Label3)" # since it matches 3 labels
explain_with_hint_ordering_1 = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
]
explain_with_hint_ordering_2 = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label2:Label1) RETURN n;")
]
explain_with_hint_ordering_3 = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label3:Label2:Label1) RETURN n;")
]
assert (
expected_explain_with_hint == explain_with_hint_ordering_1 == explain_with_hint_ordering_2
and expected_explain_with_hint_ordering_3 == explain_with_hint_ordering_3
)
def test_multiple_label_index_hints(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label0;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label1:Label2)",
" * ScanAllByLabel (n :Label2)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label0, :Label2 MATCH (n:Label1:Label2) RETURN n;")
]
assert explain_with_hint == expected_explain_with_hint
def test_multiple_applicable_label_index_hints(memgraph):
# Out of all applicable index hints, the first one given in the query should be used
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label2:Label3)",
" * ScanAllByLabel (n :Label3)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label2:Label3) RETURN n;")
]
assert explain_with_hint == expected_explain_with_hint
def test_multiple_applicable_label_index_hints_alternative_orderings(memgraph):
# Out of all applicable index hints, the first one given in the query should be used
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
expected_explain_with_hint_1 = [
" * Produce {n}",
" * Filter (n :Label2:Label3)",
" * ScanAllByLabel (n :Label3)",
" * Once",
]
expected_explain_with_hint_2 = [row.replace("(n :Label3)", "(n :Label2)") for row in expected_explain_with_hint_1]
explain_with_hint_ordering_1a = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label2:Label3) RETURN n;")
]
explain_with_hint_ordering_1b = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label3, :Label2 MATCH (n:Label3:Label2) RETURN n;")
]
explain_with_hint_ordering_2a = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2, :Label3 MATCH (n:Label2:Label3) RETURN n;")
]
explain_with_hint_ordering_2b = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2, :Label3 MATCH (n:Label3:Label2) RETURN n;")
]
assert (expected_explain_with_hint_1 == explain_with_hint_ordering_1a == explain_with_hint_ordering_1b) and (
expected_explain_with_hint_2 == explain_with_hint_ordering_2a == explain_with_hint_ordering_2b
)
def test_label_property_index_hint(memgraph):
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id1);")
memgraph.execute("CREATE INDEX ON :Label(id2);")
expected_explain_no_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id2})",
" * Once",
]
expected_explain_with_hint = [
row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
]
explain_no_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
assert explain_no_hint == expected_explain_no_hint and explain_with_hint == expected_explain_with_hint
def test_label_property_index_hint_alternative_orderings(memgraph):
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id1);")
memgraph.execute("CREATE INDEX ON :Label(id2);")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
explain_with_hint_ordering_1 = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
explain_with_hint_ordering_2 = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
)
]
assert expected_explain_with_hint == explain_with_hint_ordering_1 == explain_with_hint_ordering_2
def test_multiple_label_property_index_hints(memgraph):
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id0);")
memgraph.execute("CREATE INDEX ON :Label(id1);")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id0), :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
explain_with_hint_alternative_ordering = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id0), :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
)
]
assert explain_with_hint == expected_explain_with_hint == explain_with_hint_alternative_ordering
def test_multiple_applicable_label_property_index_hints(memgraph):
# Out of all applicable index hints, the first one given in the query should be used
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id1);")
memgraph.execute("CREATE INDEX ON :Label(id2);")
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
assert explain_with_hint == expected_explain_with_hint
def test_multiple_applicable_label_property_index_hints_alternative_orderings(memgraph):
# Out of all applicable index hints, the first one given in the query should be used
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id1);")
memgraph.execute("CREATE INDEX ON :Label(id2);")
expected_explain_with_hint_1 = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
expected_explain_with_hint_2 = [
row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
]
explain_with_hint_ordering_1a = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
explain_with_hint_ordering_1b = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id1), :Label(id2) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
)
]
explain_with_hint_ordering_2a = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id2), :Label(id1) MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;"
)
]
explain_with_hint_ordering_2b = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label(id2), :Label(id1) MATCH (n:Label) WHERE n.id1 = 3 AND n.id2 = 3 RETURN n;"
)
]
assert (expected_explain_with_hint_1 == explain_with_hint_ordering_1a == explain_with_hint_ordering_1b) and (
expected_explain_with_hint_2 == explain_with_hint_ordering_2a == explain_with_hint_ordering_2b
)
def test_union_applicable_in_left_branch(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_with_hint = [
" * Distinct",
" * Union {n : n}",
" |\\ ",
" | * Produce {n}",
" | * ScanAllByLabel (n :Label2)",
" | * Once",
" * Produce {n}",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label1 MATCH (n:Label1) RETURN n UNION MATCH (n:Label2) RETURN n;"
)
]
assert explain_with_hint == expected_explain_with_hint
def test_union_applicable_in_right_branch(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
expected_explain_with_hint = [
" * Distinct",
" * Union {n : n}",
" |\\ ",
" | * Produce {n}",
" | * ScanAllByLabel (n :Label1)",
" | * Once",
" * Produce {n}",
" * ScanAllByLabel (n :Label2)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label1 MATCH (n:Label2) RETURN n UNION MATCH (n:Label1) RETURN n;"
)
]
assert explain_with_hint == expected_explain_with_hint
def test_union_applicable_in_both_branches(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label1:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
expected_explain_with_hint = [
" * Distinct",
" * Union {n : n}",
" |\\ ",
" | * Produce {n}",
" | * Filter (n :Label2:Label3)",
" | * ScanAllByLabel (n :Label2)",
" | * Once",
" * Produce {n}",
" * Filter (n :Label1:Label2)",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label1, :Label2 MATCH (n:Label1:Label2) RETURN n UNION MATCH (n:Label2:Label3) RETURN n;"
)
]
assert explain_with_hint == expected_explain_with_hint
def test_multiple_match_query(memgraph):
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2:Label3 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label1:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
expected_explain_with_hint = [
" * Produce {n, m}",
" * Cartesian {m : n}",
" |\\ ",
" | * Filter (n :Label1:Label2), {n.id}",
" | * ScanAllByLabel (n :Label1)",
" | * Once",
" * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
" * ScanAllByLabel (m :Label2)",
" * Once",
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch(
"EXPLAIN USING INDEX :Label1, :Label2 MATCH (n:Label1:Label2) WHERE n.id = 1 MATCH (m:Label2:Label3) return n, m;"
)
]
assert explain_with_hint == expected_explain_with_hint
def test_nonexistent_label_index(memgraph):
# In case of hinting at a nonexistent index, the query should execute without exceptions, and its output should be
# the same as without that hint
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label1:Label2 {id: i}));")
memgraph.execute("FOREACH (i IN range(1, 10) | CREATE (n:Label2 {id: i+50}));")
memgraph.execute("CREATE INDEX ON :Label1;")
try:
explain_no_hint = [
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label2 MATCH (n:Label1:Label2) RETURN n;")
]
assert explain_with_hint == explain_no_hint
except mgclient.DatabaseError:
assert False
def test_nonexistent_label_property_index(memgraph):
# In case of hinting at a nonexistent index, the query should execute without exceptions, and its output should be
# the same as without that hint
memgraph.execute("FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
memgraph.execute("FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
memgraph.execute("CREATE INDEX ON :Label(id2);")
try:
explain_no_hint = [
row["QUERY PLAN"] for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) RETURN n;")
]
explain_with_hint = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN USING INDEX :Label(id1) MATCH (n:Label1:Label2) RETURN n;")
]
assert explain_with_hint == explain_no_hint
except mgclient.DatabaseError:
assert False
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,13 @@
index_hints_cluster: &index_hints_cluster
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE"]
log_file: "index_hints.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Analyze graph for better indexing"
binary: "tests/e2e/pytest_runner.sh"
args: ["index_hints/index_hints.py"]
<<: *index_hints_cluster

View File

@ -241,7 +241,7 @@ TYPED_TEST(OperatorToStringTest, Filter) {
EQ(PROPERTY_LOOKUP(this->dba, "person", property), LITERAL(5)), filters);
std::string expected_string{
"Filter Generic {person}, id(person), (person :Customer:Visitor), (:Customer:Visitor), {person.name}, Pattern"};
"Filter (:Customer:Visitor), (person :Customer:Visitor), Generic {person}, Pattern, id(person), {person.name}"};
EXPECT_EQ(last_op->ToString(), expected_string);
}