Add index statistics for better query planning (#812)
This commit is contained in:
parent
0819b40202
commit
398503da7a
@ -16,6 +16,7 @@
|
|||||||
#include "auth/crypto.hpp"
|
#include "auth/crypto.hpp"
|
||||||
#include "auth/exceptions.hpp"
|
#include "auth/exceptions.hpp"
|
||||||
#include "license/license.hpp"
|
#include "license/license.hpp"
|
||||||
|
#include "query/constants.hpp"
|
||||||
#include "utils/cast.hpp"
|
#include "utils/cast.hpp"
|
||||||
#include "utils/logging.hpp"
|
#include "utils/logging.hpp"
|
||||||
#include "utils/settings.hpp"
|
#include "utils/settings.hpp"
|
||||||
@ -270,7 +271,7 @@ PermissionLevel FineGrainedAccessPermissions::Has(const std::string &permission,
|
|||||||
|
|
||||||
void FineGrainedAccessPermissions::Grant(const std::string &permission,
|
void FineGrainedAccessPermissions::Grant(const std::string &permission,
|
||||||
const FineGrainedPermission fine_grained_permission) {
|
const FineGrainedPermission fine_grained_permission) {
|
||||||
if (permission == kAsterisk) {
|
if (permission == query::kAsterisk) {
|
||||||
global_permission_ = CalculateGrant(fine_grained_permission);
|
global_permission_ = CalculateGrant(fine_grained_permission);
|
||||||
} else {
|
} else {
|
||||||
permissions_[permission] = CalculateGrant(fine_grained_permission);
|
permissions_[permission] = CalculateGrant(fine_grained_permission);
|
||||||
@ -278,7 +279,7 @@ void FineGrainedAccessPermissions::Grant(const std::string &permission,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void FineGrainedAccessPermissions::Revoke(const std::string &permission) {
|
void FineGrainedAccessPermissions::Revoke(const std::string &permission) {
|
||||||
if (permission == kAsterisk) {
|
if (permission == query::kAsterisk) {
|
||||||
permissions_.clear();
|
permissions_.clear();
|
||||||
global_permission_ = std::nullopt;
|
global_permission_ = std::nullopt;
|
||||||
} else {
|
} else {
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
#include <json/json.hpp>
|
#include <json/json.hpp>
|
||||||
|
|
||||||
namespace memgraph::auth {
|
namespace memgraph::auth {
|
||||||
const std::string kAsterisk = "*";
|
|
||||||
// These permissions must have values that are applicable for usage in a
|
// These permissions must have values that are applicable for usage in a
|
||||||
// bitmask.
|
// bitmask.
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "auth/models.hpp"
|
#include "auth/models.hpp"
|
||||||
#include "glue/auth.hpp"
|
#include "glue/auth.hpp"
|
||||||
#include "license/license.hpp"
|
#include "license/license.hpp"
|
||||||
|
#include "query/constants.hpp"
|
||||||
#include "query/frontend/ast/ast.hpp"
|
#include "query/frontend/ast/ast.hpp"
|
||||||
#include "utils/synchronized.hpp"
|
#include "utils/synchronized.hpp"
|
||||||
|
|
||||||
@ -38,7 +39,7 @@ bool IsUserAuthorizedGloballyLabels(const memgraph::auth::User &user,
|
|||||||
if (!memgraph::license::global_license_checker.IsEnterpriseValidFast()) {
|
if (!memgraph::license::global_license_checker.IsEnterpriseValidFast()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return user.GetFineGrainedAccessLabelPermissions().Has(memgraph::auth::kAsterisk, fine_grained_permission) ==
|
return user.GetFineGrainedAccessLabelPermissions().Has(memgraph::query::kAsterisk, fine_grained_permission) ==
|
||||||
memgraph::auth::PermissionLevel::GRANT;
|
memgraph::auth::PermissionLevel::GRANT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,7 +48,7 @@ bool IsUserAuthorizedGloballyEdges(const memgraph::auth::User &user,
|
|||||||
if (!memgraph::license::global_license_checker.IsEnterpriseValidFast()) {
|
if (!memgraph::license::global_license_checker.IsEnterpriseValidFast()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return user.GetFineGrainedAccessEdgeTypePermissions().Has(memgraph::auth::kAsterisk, fine_grained_permission) ==
|
return user.GetFineGrainedAccessEdgeTypePermissions().Has(memgraph::query::kAsterisk, fine_grained_permission) ==
|
||||||
memgraph::auth::PermissionLevel::GRANT;
|
memgraph::auth::PermissionLevel::GRANT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -18,6 +18,7 @@
|
|||||||
#include "auth/models.hpp"
|
#include "auth/models.hpp"
|
||||||
#include "glue/auth.hpp"
|
#include "glue/auth.hpp"
|
||||||
#include "license/license.hpp"
|
#include "license/license.hpp"
|
||||||
|
#include "query/constants.hpp"
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -253,19 +254,18 @@ bool AuthQueryHandler::CreateUser(const std::string &username, const std::option
|
|||||||
|
|
||||||
if (first_user) {
|
if (first_user) {
|
||||||
spdlog::info("{} is first created user. Granting all privileges.", username);
|
spdlog::info("{} is first created user. Granting all privileges.", username);
|
||||||
GrantPrivilege(username, memgraph::query::kPrivilegesAll
|
GrantPrivilege(
|
||||||
|
username, memgraph::query::kPrivilegesAll
|
||||||
#ifdef MG_ENTERPRISE
|
#ifdef MG_ENTERPRISE
|
||||||
,
|
,
|
||||||
{{{memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE, {memgraph::auth::kAsterisk}}}},
|
{{{memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE, {memgraph::query::kAsterisk}}}},
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE, {
|
memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE, { memgraph::query::kAsterisk }
|
||||||
memgraph::auth::kAsterisk
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -16,4 +16,6 @@
|
|||||||
namespace memgraph::query {
|
namespace memgraph::query {
|
||||||
inline constexpr uint16_t kDefaultReplicationPort = 10000;
|
inline constexpr uint16_t kDefaultReplicationPort = 10000;
|
||||||
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
|
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
|
||||||
|
inline const std::string kAsterisk = "*";
|
||||||
|
inline constexpr uint16_t kDeleteStatisticsNumResults = 6;
|
||||||
} // namespace memgraph::query
|
} // namespace memgraph::query
|
||||||
|
@ -430,6 +430,25 @@ class DbAccessor final {
|
|||||||
return accessor_->LabelPropertyIndexExists(label, prop);
|
return accessor_->LabelPropertyIndexExists(label, prop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
|
return accessor_->GetIndexStats(label, property);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> ClearIndexStats() {
|
||||||
|
return accessor_->ClearIndexStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> DeleteIndexStatsForLabels(
|
||||||
|
const std::span<std::string> labels) {
|
||||||
|
return accessor_->DeleteIndexStatsForLabels(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
||||||
|
const storage::IndexStats &stats) {
|
||||||
|
accessor_->SetIndexStats(label, property, stats);
|
||||||
|
}
|
||||||
|
|
||||||
int64_t VerticesCount() const { return accessor_->ApproximateVertexCount(); }
|
int64_t VerticesCount() const { return accessor_->ApproximateVertexCount(); }
|
||||||
|
|
||||||
int64_t VerticesCount(storage::LabelId label) const { return accessor_->ApproximateVertexCount(label); }
|
int64_t VerticesCount(storage::LabelId label) const { return accessor_->ApproximateVertexCount(label); }
|
||||||
|
@ -229,6 +229,12 @@ class VersionInfoInMulticommandTxException : public QueryException {
|
|||||||
: QueryException("Version info query not allowed in multicommand transactions.") {}
|
: QueryException("Version info query not allowed in multicommand transactions.") {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class AnalyzeGraphInMulticommandTxException : public QueryException {
|
||||||
|
public:
|
||||||
|
AnalyzeGraphInMulticommandTxException()
|
||||||
|
: QueryException("Analyze graph query not allowed in multicommand transactions.") {}
|
||||||
|
};
|
||||||
|
|
||||||
class ReplicationException : public utils::BasicException {
|
class ReplicationException : public utils::BasicException {
|
||||||
public:
|
public:
|
||||||
using utils::BasicException::BasicException;
|
using utils::BasicException::BasicException;
|
||||||
|
@ -260,6 +260,9 @@ constexpr utils::TypeInfo query::Foreach::kType{utils::TypeId::AST_FOREACH, "For
|
|||||||
constexpr utils::TypeInfo query::ShowConfigQuery::kType{utils::TypeId::AST_SHOW_CONFIG_QUERY, "ShowConfigQuery",
|
constexpr utils::TypeInfo query::ShowConfigQuery::kType{utils::TypeId::AST_SHOW_CONFIG_QUERY, "ShowConfigQuery",
|
||||||
&query::Query::kType};
|
&query::Query::kType};
|
||||||
|
|
||||||
|
constexpr utils::TypeInfo query::AnalyzeGraphQuery::kType{utils::TypeId::AST_ANALYZE_GRAPH_QUERY, "AnalyzeGraphQuery",
|
||||||
|
&query::Query::kType};
|
||||||
|
|
||||||
constexpr utils::TypeInfo query::TransactionQueueQuery::kType{utils::TypeId::AST_TRANSACTION_QUEUE_QUERY,
|
constexpr utils::TypeInfo query::TransactionQueueQuery::kType{utils::TypeId::AST_TRANSACTION_QUEUE_QUERY,
|
||||||
"TransactionQueueQuery", &query::Query::kType};
|
"TransactionQueueQuery", &query::Query::kType};
|
||||||
|
|
||||||
|
@ -3230,6 +3230,26 @@ class TransactionQueueQuery : public memgraph::query::Query {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class AnalyzeGraphQuery : public memgraph::query::Query {
|
||||||
|
public:
|
||||||
|
static const utils::TypeInfo kType;
|
||||||
|
const utils::TypeInfo &GetTypeInfo() const override { return kType; }
|
||||||
|
|
||||||
|
DEFVISITABLE(QueryVisitor<void>);
|
||||||
|
|
||||||
|
enum class Action { ANALYZE, DELETE };
|
||||||
|
|
||||||
|
memgraph::query::AnalyzeGraphQuery::Action action_;
|
||||||
|
std::vector<std::string> labels_;
|
||||||
|
|
||||||
|
AnalyzeGraphQuery *Clone(AstStorage *storage) const override {
|
||||||
|
auto *object = storage->Create<AnalyzeGraphQuery>();
|
||||||
|
object->action_ = action_;
|
||||||
|
object->labels_ = labels_;
|
||||||
|
return object;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Exists : public memgraph::query::Expression {
|
class Exists : public memgraph::query::Expression {
|
||||||
public:
|
public:
|
||||||
static const utils::TypeInfo kType;
|
static const utils::TypeInfo kType;
|
||||||
|
@ -2403,6 +2403,26 @@ cpp<#
|
|||||||
(:serialize (:slk))
|
(:serialize (:slk))
|
||||||
(:clone))
|
(:clone))
|
||||||
|
|
||||||
|
(lcp:define-class analyze-graph-query (query)
|
||||||
|
((action "Action" :scope :public)
|
||||||
|
(labels "std::vector<std::string>" :scope :public))
|
||||||
|
|
||||||
|
(:public
|
||||||
|
(lcp:define-enum action
|
||||||
|
(analyze delete)
|
||||||
|
(:serialize))
|
||||||
|
#>cpp
|
||||||
|
AnalyzeGraphQuery() = default;
|
||||||
|
|
||||||
|
DEFVISITABLE(QueryVisitor<void>);
|
||||||
|
cpp<#)
|
||||||
|
(:private
|
||||||
|
#>cpp
|
||||||
|
friend class AstStorage;
|
||||||
|
cpp<#)
|
||||||
|
(:serialize (:slk))
|
||||||
|
(:clone))
|
||||||
|
|
||||||
(lcp:define-class replication-query (query)
|
(lcp:define-class replication-query (query)
|
||||||
((action "Action" :scope :public)
|
((action "Action" :scope :public)
|
||||||
(role "ReplicationRole" :scope :public)
|
(role "ReplicationRole" :scope :public)
|
||||||
|
@ -95,6 +95,7 @@ class SettingQuery;
|
|||||||
class VersionQuery;
|
class VersionQuery;
|
||||||
class Foreach;
|
class Foreach;
|
||||||
class ShowConfigQuery;
|
class ShowConfigQuery;
|
||||||
|
class AnalyzeGraphQuery;
|
||||||
class TransactionQueueQuery;
|
class TransactionQueueQuery;
|
||||||
class Exists;
|
class Exists;
|
||||||
|
|
||||||
@ -131,7 +132,7 @@ template <class TResult>
|
|||||||
class QueryVisitor
|
class QueryVisitor
|
||||||
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, AuthQuery, InfoQuery,
|
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, AuthQuery, InfoQuery,
|
||||||
ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, FreeMemoryQuery, TriggerQuery,
|
ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, FreeMemoryQuery, TriggerQuery,
|
||||||
IsolationLevelQuery, CreateSnapshotQuery, StreamQuery, SettingQuery, TransactionQueueQuery,
|
IsolationLevelQuery, CreateSnapshotQuery, StreamQuery, SettingQuery, VersionQuery,
|
||||||
VersionQuery, ShowConfigQuery> {};
|
ShowConfigQuery, TransactionQueueQuery, AnalyzeGraphQuery> {};
|
||||||
|
|
||||||
} // namespace memgraph::query
|
} // namespace memgraph::query
|
||||||
|
@ -241,6 +241,23 @@ antlrcpp::Any CypherMainVisitor::visitDumpQuery(MemgraphCypher::DumpQueryContext
|
|||||||
return dump_query;
|
return dump_query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
antlrcpp::Any CypherMainVisitor::visitAnalyzeGraphQuery(MemgraphCypher::AnalyzeGraphQueryContext *ctx) {
|
||||||
|
auto *analyze_graph_query = storage_->Create<AnalyzeGraphQuery>();
|
||||||
|
if (ctx->listOfColonSymbolicNames()) {
|
||||||
|
analyze_graph_query->labels_ =
|
||||||
|
std::any_cast<std::vector<std::string>>(ctx->listOfColonSymbolicNames()->accept(this));
|
||||||
|
} else {
|
||||||
|
analyze_graph_query->labels_.emplace_back("*");
|
||||||
|
}
|
||||||
|
if (ctx->DELETE()) {
|
||||||
|
analyze_graph_query->action_ = AnalyzeGraphQuery::Action::DELETE;
|
||||||
|
} else {
|
||||||
|
analyze_graph_query->action_ = AnalyzeGraphQuery::Action::ANALYZE;
|
||||||
|
}
|
||||||
|
query_ = analyze_graph_query;
|
||||||
|
return analyze_graph_query;
|
||||||
|
}
|
||||||
|
|
||||||
antlrcpp::Any CypherMainVisitor::visitReplicationQuery(MemgraphCypher::ReplicationQueryContext *ctx) {
|
antlrcpp::Any CypherMainVisitor::visitReplicationQuery(MemgraphCypher::ReplicationQueryContext *ctx) {
|
||||||
MG_ASSERT(ctx->children.size() == 1, "ReplicationQuery should have exactly one child!");
|
MG_ASSERT(ctx->children.size() == 1, "ReplicationQuery should have exactly one child!");
|
||||||
auto *replication_query = std::any_cast<ReplicationQuery *>(ctx->children[0]->accept(this));
|
auto *replication_query = std::any_cast<ReplicationQuery *>(ctx->children[0]->accept(this));
|
||||||
@ -1441,19 +1458,23 @@ antlrcpp::Any CypherMainVisitor::visitEntityPrivilegeList(MemgraphCypher::Entity
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
antlrcpp::Any CypherMainVisitor::visitListOfColonSymbolicNames(MemgraphCypher::ListOfColonSymbolicNamesContext *ctx) {
|
||||||
|
std::vector<std::string> symbolic_names;
|
||||||
|
for (auto *symbolic_name : ctx->colonSymbolicName()) {
|
||||||
|
symbolic_names.push_back(std::any_cast<std::string>(symbolic_name->symbolicName()->accept(this)));
|
||||||
|
}
|
||||||
|
return symbolic_names;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return std::vector<std::string>
|
* @return std::vector<std::string>
|
||||||
*/
|
*/
|
||||||
antlrcpp::Any CypherMainVisitor::visitEntitiesList(MemgraphCypher::EntitiesListContext *ctx) {
|
antlrcpp::Any CypherMainVisitor::visitEntitiesList(MemgraphCypher::EntitiesListContext *ctx) {
|
||||||
std::vector<std::string> entities;
|
std::vector<std::string> entities;
|
||||||
if (ctx->listOfEntities()) {
|
if (ctx->listOfColonSymbolicNames()) {
|
||||||
for (auto *entity : ctx->listOfEntities()->entity()) {
|
return ctx->listOfColonSymbolicNames()->accept(this);
|
||||||
entities.push_back(std::any_cast<std::string>(entity->symbolicName()->accept(this)));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
entities.emplace_back("*");
|
|
||||||
}
|
}
|
||||||
|
entities.emplace_back("*");
|
||||||
return entities;
|
return entities;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,6 +183,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
|
|||||||
*/
|
*/
|
||||||
antlrcpp::Any visitDumpQuery(MemgraphCypher::DumpQueryContext *ctx) override;
|
antlrcpp::Any visitDumpQuery(MemgraphCypher::DumpQueryContext *ctx) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
@return std::vector<std::string>
|
||||||
|
*/
|
||||||
|
antlrcpp::Any visitListOfColonSymbolicNames(MemgraphCypher::ListOfColonSymbolicNamesContext *ctx) override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return AnalyzeGraphQuery*
|
||||||
|
*/
|
||||||
|
antlrcpp::Any visitAnalyzeGraphQuery(MemgraphCypher::AnalyzeGraphQueryContext *ctx) override;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return ReplicationQuery*
|
* @return ReplicationQuery*
|
||||||
*/
|
*/
|
||||||
|
@ -22,6 +22,7 @@ import Cypher ;
|
|||||||
memgraphCypherKeyword : cypherKeyword
|
memgraphCypherKeyword : cypherKeyword
|
||||||
| AFTER
|
| AFTER
|
||||||
| ALTER
|
| ALTER
|
||||||
|
| ANALYZE
|
||||||
| ASYNC
|
| ASYNC
|
||||||
| AUTH
|
| AUTH
|
||||||
| BAD
|
| BAD
|
||||||
@ -53,6 +54,7 @@ memgraphCypherKeyword : cypherKeyword
|
|||||||
| FREE
|
| FREE
|
||||||
| FROM
|
| FROM
|
||||||
| GLOBAL
|
| GLOBAL
|
||||||
|
| GRAPH
|
||||||
| GRANT
|
| GRANT
|
||||||
| HEADER
|
| HEADER
|
||||||
| IDENTIFIED
|
| IDENTIFIED
|
||||||
@ -119,6 +121,7 @@ query : cypherQuery
|
|||||||
| constraintQuery
|
| constraintQuery
|
||||||
| authQuery
|
| authQuery
|
||||||
| dumpQuery
|
| dumpQuery
|
||||||
|
| analyzeGraphQuery
|
||||||
| replicationQuery
|
| replicationQuery
|
||||||
| lockPathQuery
|
| lockPathQuery
|
||||||
| freeMemoryQuery
|
| freeMemoryQuery
|
||||||
@ -291,11 +294,11 @@ revokePrivilegesList : privilegeOrEntities ( ',' privilegeOrEntities )* ;
|
|||||||
|
|
||||||
privilegesList : privilege ( ',' privilege )* ;
|
privilegesList : privilege ( ',' privilege )* ;
|
||||||
|
|
||||||
entitiesList : ASTERISK | listOfEntities ;
|
entitiesList : ASTERISK | listOfColonSymbolicNames ;
|
||||||
|
|
||||||
listOfEntities : entity ( ',' entity )* ;
|
listOfColonSymbolicNames : colonSymbolicName ( ',' colonSymbolicName )* ;
|
||||||
|
|
||||||
entity : COLON symbolicName ;
|
colonSymbolicName : COLON symbolicName ;
|
||||||
|
|
||||||
showPrivileges : SHOW PRIVILEGES FOR userOrRole=userOrRoleName ;
|
showPrivileges : SHOW PRIVILEGES FOR userOrRole=userOrRoleName ;
|
||||||
|
|
||||||
@ -305,6 +308,8 @@ showUsersForRole : SHOW USERS FOR role=userOrRoleName ;
|
|||||||
|
|
||||||
dumpQuery: DUMP DATABASE ;
|
dumpQuery: DUMP DATABASE ;
|
||||||
|
|
||||||
|
analyzeGraphQuery: ANALYZE GRAPH ( ON LABELS ( listOfColonSymbolicNames | ASTERISK ) ) ? ( DELETE STATISTICS ) ? ;
|
||||||
|
|
||||||
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
|
setReplicationRole : SET REPLICATION ROLE TO ( MAIN | REPLICA )
|
||||||
( WITH PORT port=literal ) ? ;
|
( WITH PORT port=literal ) ? ;
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ UNDERSCORE : '_' ;
|
|||||||
|
|
||||||
AFTER : A F T E R ;
|
AFTER : A F T E R ;
|
||||||
ALTER : A L T E R ;
|
ALTER : A L T E R ;
|
||||||
|
ANALYZE : A N A L Y Z E ;
|
||||||
ASYNC : A S Y N C ;
|
ASYNC : A S Y N C ;
|
||||||
AUTH : A U T H ;
|
AUTH : A U T H ;
|
||||||
BAD : B A D ;
|
BAD : B A D ;
|
||||||
@ -62,6 +63,7 @@ FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ;
|
|||||||
FROM : F R O M ;
|
FROM : F R O M ;
|
||||||
GLOBAL : G L O B A L ;
|
GLOBAL : G L O B A L ;
|
||||||
GRANT : G R A N T ;
|
GRANT : G R A N T ;
|
||||||
|
GRAPH : G R A P H ;
|
||||||
GRANTS : G R A N T S ;
|
GRANTS : G R A N T S ;
|
||||||
HEADER : H E A D E R ;
|
HEADER : H E A D E R ;
|
||||||
IDENTIFIED : I D E N T I F I E D ;
|
IDENTIFIED : I D E N T I F I E D ;
|
||||||
@ -99,6 +101,7 @@ SETTING : S E T T I N G ;
|
|||||||
SETTINGS : S E T T I N G S ;
|
SETTINGS : S E T T I N G S ;
|
||||||
SNAPSHOT : S N A P S H O T ;
|
SNAPSHOT : S N A P S H O T ;
|
||||||
START : S T A R T ;
|
START : S T A R T ;
|
||||||
|
STATISTICS : S T A T I S T I C S ;
|
||||||
STATS : S T A T S ;
|
STATS : S T A T S ;
|
||||||
STOP : S T O P ;
|
STOP : S T O P ;
|
||||||
STREAM : S T R E A M ;
|
STREAM : S T R E A M ;
|
||||||
|
@ -25,9 +25,11 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis
|
|||||||
|
|
||||||
std::vector<AuthQuery::Privilege> privileges() { return privileges_; }
|
std::vector<AuthQuery::Privilege> privileges() { return privileges_; }
|
||||||
|
|
||||||
void Visit(IndexQuery &) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
void Visit(IndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
||||||
|
|
||||||
void Visit(AuthQuery &) override { AddPrivilege(AuthQuery::Privilege::AUTH); }
|
void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
||||||
|
|
||||||
|
void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); }
|
||||||
|
|
||||||
void Visit(ExplainQuery &query) override { query.cypher_query_->Accept(*this); }
|
void Visit(ExplainQuery &query) override { query.cypher_query_->Accept(*this); }
|
||||||
|
|
||||||
|
@ -331,11 +331,11 @@ Callback HandleAuthQuery(AuthQuery *auth_query, AuthQueryHandler *auth, const Pa
|
|||||||
auth->GrantPrivilege(username, kPrivilegesAll
|
auth->GrantPrivilege(username, kPrivilegesAll
|
||||||
#ifdef MG_ENTERPRISE
|
#ifdef MG_ENTERPRISE
|
||||||
,
|
,
|
||||||
{{{AuthQuery::FineGrainedPrivilege::CREATE_DELETE, {auth::kAsterisk}}}},
|
{{{AuthQuery::FineGrainedPrivilege::CREATE_DELETE, {query::kAsterisk}}}},
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
AuthQuery::FineGrainedPrivilege::CREATE_DELETE, { auth::kAsterisk }
|
AuthQuery::FineGrainedPrivilege::CREATE_DELETE, { query::kAsterisk }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1408,6 +1408,139 @@ PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map<std::string, T
|
|||||||
RWType::R};
|
RWType::R};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphCreateStatistics(
|
||||||
|
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
||||||
|
using LPIndex = std::pair<storage::LabelId, storage::PropertyId>;
|
||||||
|
|
||||||
|
std::vector<std::vector<TypedValue>> results;
|
||||||
|
std::map<LPIndex, std::map<storage::PropertyValue, int64_t>> counter;
|
||||||
|
|
||||||
|
// Preprocess labels to avoid later checks
|
||||||
|
std::vector<LPIndex> indices_info = execution_db_accessor->ListAllIndices().label_property;
|
||||||
|
if (labels[0] != kAsterisk) {
|
||||||
|
for (auto it = indices_info.cbegin(); it != indices_info.cend();) {
|
||||||
|
if (std::find(labels.begin(), labels.end(), execution_db_accessor->LabelToName(it->first)) == labels.end()) {
|
||||||
|
it = indices_info.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Iterate over all indexed vertices
|
||||||
|
std::for_each(indices_info.begin(), indices_info.end(), [execution_db_accessor, &counter](const LPIndex &index_info) {
|
||||||
|
auto vertices = execution_db_accessor->Vertices(storage::View::OLD, index_info.first, index_info.second);
|
||||||
|
std::for_each(vertices.begin(), vertices.end(), [&index_info, &counter](const auto &vertex) {
|
||||||
|
counter[index_info][*vertex.GetProperty(storage::View::OLD, index_info.second)]++;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
results.reserve(counter.size());
|
||||||
|
std::for_each(counter.begin(), counter.end(), [&results, execution_db_accessor](const auto &counter_entry) {
|
||||||
|
const auto &[label_property, values_map] = counter_entry;
|
||||||
|
std::vector<TypedValue> result;
|
||||||
|
result.reserve(kDeleteStatisticsNumResults);
|
||||||
|
// Extract info
|
||||||
|
int64_t count_property_value = std::accumulate(
|
||||||
|
values_map.begin(), values_map.end(), 0,
|
||||||
|
[](int64_t prev_value, const auto &prop_value_count) { return prev_value + prop_value_count.second; });
|
||||||
|
// num_distinc_values will never be 0
|
||||||
|
double avg_group_size = static_cast<double>(count_property_value) / static_cast<double>(values_map.size());
|
||||||
|
double chi_squared_stat = std::accumulate(
|
||||||
|
values_map.begin(), values_map.end(), 0.0, [avg_group_size](double prev_result, const auto &value_entry) {
|
||||||
|
return prev_result + utils::ChiSquaredValue(value_entry.second, avg_group_size);
|
||||||
|
});
|
||||||
|
execution_db_accessor->SetIndexStats(
|
||||||
|
label_property.first, label_property.second,
|
||||||
|
storage::IndexStats{.statistic = chi_squared_stat, .avg_group_size = avg_group_size});
|
||||||
|
// Save result
|
||||||
|
result.emplace_back(execution_db_accessor->LabelToName(label_property.first));
|
||||||
|
result.emplace_back(execution_db_accessor->PropertyToName(label_property.second));
|
||||||
|
result.emplace_back(count_property_value);
|
||||||
|
result.emplace_back(static_cast<int64_t>(values_map.size()));
|
||||||
|
result.emplace_back(avg_group_size);
|
||||||
|
result.emplace_back(chi_squared_stat);
|
||||||
|
results.push_back(std::move(result));
|
||||||
|
});
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphDeleteStatistics(
|
||||||
|
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
||||||
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> loc_results;
|
||||||
|
if (labels[0] == kAsterisk) {
|
||||||
|
loc_results = execution_db_accessor->ClearIndexStats();
|
||||||
|
} else {
|
||||||
|
loc_results = execution_db_accessor->DeleteIndexStatsForLabels(labels);
|
||||||
|
}
|
||||||
|
std::vector<std::vector<TypedValue>> results;
|
||||||
|
std::transform(loc_results.begin(), loc_results.end(), std::back_inserter(results),
|
||||||
|
[execution_db_accessor](const auto &label_property_index) {
|
||||||
|
return std::vector<TypedValue>{
|
||||||
|
TypedValue(execution_db_accessor->LabelToName(label_property_index.first)),
|
||||||
|
TypedValue(execution_db_accessor->PropertyToName(label_property_index.second))};
|
||||||
|
});
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
Callback HandleAnalyzeGraphQuery(AnalyzeGraphQuery *analyze_graph_query, DbAccessor *execution_db_accessor) {
|
||||||
|
Callback callback;
|
||||||
|
switch (analyze_graph_query->action_) {
|
||||||
|
case AnalyzeGraphQuery::Action::ANALYZE: {
|
||||||
|
callback.header = {"label", "property", "num estimation nodes",
|
||||||
|
"num groups", "avg group size", "chi-squared value"};
|
||||||
|
callback.fn = [handler = AnalyzeGraphQueryHandler(), labels = analyze_graph_query->labels_,
|
||||||
|
execution_db_accessor]() mutable {
|
||||||
|
return handler.AnalyzeGraphCreateStatistics(labels, execution_db_accessor);
|
||||||
|
};
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AnalyzeGraphQuery::Action::DELETE: {
|
||||||
|
callback.header = {"label", "property"};
|
||||||
|
callback.fn = [handler = AnalyzeGraphQueryHandler(), labels = analyze_graph_query->labels_,
|
||||||
|
execution_db_accessor]() mutable {
|
||||||
|
return handler.AnalyzeGraphDeleteStatistics(labels, execution_db_accessor);
|
||||||
|
};
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
PreparedQuery PrepareAnalyzeGraphQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
|
||||||
|
DbAccessor *execution_db_accessor, InterpreterContext *interpreter_context) {
|
||||||
|
if (in_explicit_transaction) {
|
||||||
|
throw AnalyzeGraphInMulticommandTxException();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creating an index influences computed plan costs.
|
||||||
|
auto invalidate_plan_cache = [plan_cache = &interpreter_context->plan_cache] {
|
||||||
|
auto access = plan_cache->access();
|
||||||
|
for (auto &kv : access) {
|
||||||
|
access.remove(kv.first);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
utils::OnScopeExit cache_invalidator(invalidate_plan_cache);
|
||||||
|
|
||||||
|
auto *analyze_graph_query = utils::Downcast<AnalyzeGraphQuery>(parsed_query.query);
|
||||||
|
MG_ASSERT(analyze_graph_query);
|
||||||
|
auto callback = HandleAnalyzeGraphQuery(analyze_graph_query, execution_db_accessor);
|
||||||
|
|
||||||
|
return PreparedQuery{std::move(callback.header), std::move(parsed_query.required_privileges),
|
||||||
|
[callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr<PullPlanVector>{nullptr}](
|
||||||
|
AnyStream *stream, std::optional<int> n) mutable -> std::optional<QueryHandlerResult> {
|
||||||
|
if (UNLIKELY(!pull_plan)) {
|
||||||
|
pull_plan = std::make_shared<PullPlanVector>(callback_fn());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pull_plan->Pull(stream, n)) {
|
||||||
|
return QueryHandlerResult::COMMIT;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
},
|
||||||
|
RWType::NONE};
|
||||||
|
}
|
||||||
|
|
||||||
PreparedQuery PrepareIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
|
PreparedQuery PrepareIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
|
||||||
std::vector<Notification> *notifications, InterpreterContext *interpreter_context) {
|
std::vector<Notification> *notifications, InterpreterContext *interpreter_context) {
|
||||||
if (in_explicit_transaction) {
|
if (in_explicit_transaction) {
|
||||||
@ -2504,7 +2637,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
|
|||||||
if (!in_explicit_transaction_ &&
|
if (!in_explicit_transaction_ &&
|
||||||
(utils::Downcast<CypherQuery>(parsed_query.query) || utils::Downcast<ExplainQuery>(parsed_query.query) ||
|
(utils::Downcast<CypherQuery>(parsed_query.query) || utils::Downcast<ExplainQuery>(parsed_query.query) ||
|
||||||
utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) ||
|
utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) ||
|
||||||
utils::Downcast<TriggerQuery>(parsed_query.query) ||
|
utils::Downcast<TriggerQuery>(parsed_query.query) || utils::Downcast<AnalyzeGraphQuery>(parsed_query.query) ||
|
||||||
utils::Downcast<TransactionQueueQuery>(parsed_query.query))) {
|
utils::Downcast<TransactionQueueQuery>(parsed_query.query))) {
|
||||||
db_accessor_ =
|
db_accessor_ =
|
||||||
std::make_unique<storage::Storage::Accessor>(interpreter_context_->db->Access(GetIsolationLevelOverride()));
|
std::make_unique<storage::Storage::Accessor>(interpreter_context_->db->Access(GetIsolationLevelOverride()));
|
||||||
@ -2537,6 +2670,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
|
|||||||
} else if (utils::Downcast<IndexQuery>(parsed_query.query)) {
|
} else if (utils::Downcast<IndexQuery>(parsed_query.query)) {
|
||||||
prepared_query = PrepareIndexQuery(std::move(parsed_query), in_explicit_transaction_,
|
prepared_query = PrepareIndexQuery(std::move(parsed_query), in_explicit_transaction_,
|
||||||
&query_execution->notifications, interpreter_context_);
|
&query_execution->notifications, interpreter_context_);
|
||||||
|
} else if (utils::Downcast<AnalyzeGraphQuery>(parsed_query.query)) {
|
||||||
|
prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_,
|
||||||
|
&*execution_db_accessor_, interpreter_context_);
|
||||||
} else if (utils::Downcast<AuthQuery>(parsed_query.query)) {
|
} else if (utils::Downcast<AuthQuery>(parsed_query.query)) {
|
||||||
prepared_query = PrepareAuthQuery(
|
prepared_query = PrepareAuthQuery(
|
||||||
std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, interpreter_context_,
|
std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, interpreter_context_,
|
||||||
|
@ -172,6 +172,24 @@ class ReplicationQueryHandler {
|
|||||||
virtual std::vector<Replica> ShowReplicas() const = 0;
|
virtual std::vector<Replica> ShowReplicas() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class AnalyzeGraphQueryHandler {
|
||||||
|
public:
|
||||||
|
AnalyzeGraphQueryHandler() = default;
|
||||||
|
virtual ~AnalyzeGraphQueryHandler() = default;
|
||||||
|
|
||||||
|
AnalyzeGraphQueryHandler(const AnalyzeGraphQueryHandler &) = default;
|
||||||
|
AnalyzeGraphQueryHandler &operator=(const AnalyzeGraphQueryHandler &) = default;
|
||||||
|
|
||||||
|
AnalyzeGraphQueryHandler(AnalyzeGraphQueryHandler &&) = default;
|
||||||
|
AnalyzeGraphQueryHandler &operator=(AnalyzeGraphQueryHandler &&) = default;
|
||||||
|
|
||||||
|
static std::vector<std::vector<TypedValue>> AnalyzeGraphCreateStatistics(const std::span<std::string> labels,
|
||||||
|
DbAccessor *execution_db_accessor);
|
||||||
|
|
||||||
|
static std::vector<std::vector<TypedValue>> AnalyzeGraphDeleteStatistics(const std::span<std::string> labels,
|
||||||
|
DbAccessor *execution_db_accessor);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A container for data related to the preparation of a query.
|
* A container for data related to the preparation of a query.
|
||||||
*/
|
*/
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
|
|
||||||
#include "query/plan/operator.hpp"
|
#include "query/plan/operator.hpp"
|
||||||
#include "query/plan/preprocess.hpp"
|
#include "query/plan/preprocess.hpp"
|
||||||
|
#include "storage/v2/indices.hpp"
|
||||||
|
|
||||||
DECLARE_int64(query_vertex_count_to_expand_existing);
|
DECLARE_int64(query_vertex_count_to_expand_existing);
|
||||||
|
|
||||||
@ -482,6 +483,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
// FilterInfo with PropertyFilter.
|
// FilterInfo with PropertyFilter.
|
||||||
FilterInfo filter;
|
FilterInfo filter;
|
||||||
int64_t vertex_count;
|
int64_t vertex_count;
|
||||||
|
std::optional<storage::IndexStats> index_stats;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool DefaultPreVisit() override { throw utils::NotYetImplemented("optimizing index lookup"); }
|
bool DefaultPreVisit() override { throw utils::NotYetImplemented("optimizing index lookup"); }
|
||||||
@ -522,8 +524,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
return best_label;
|
return best_label;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finds the label-property combination which has indexed the lowest amount of
|
// Finds the label-property combination. The first criteria based on number of vertices indexed -> if one index has
|
||||||
// vertices. If the index cannot be found, nullopt is returned.
|
// 10x less than the other one, always choose the smaller one. Otherwise, choose the index with smallest average group
|
||||||
|
// size based on key distribution. If average group size is equal, choose the index that has distribution closer to
|
||||||
|
// uniform distribution. Conditions based on average group size and key distribution can be only taken into account if
|
||||||
|
// the user has run `ANALYZE GRAPH` query before If the index cannot be found, nullopt is returned.
|
||||||
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
|
std::optional<LabelPropertyIndex> FindBestLabelPropertyIndex(const Symbol &symbol,
|
||||||
const std::unordered_set<Symbol> &bound_symbols) {
|
const std::unordered_set<Symbol> &bound_symbols) {
|
||||||
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
auto are_bound = [&bound_symbols](const auto &used_symbols) {
|
||||||
@ -534,6 +539,27 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Comparator function between two indices. If new index has >= 10x vertices than the existing, it cannot be better.
|
||||||
|
* If it is <= 10x in number of vertices, check average group size of property values. The index with smaller
|
||||||
|
* average group size is better. If the average group size is the same, choose the one closer to the uniform
|
||||||
|
* distribution
|
||||||
|
* @param found: Current best label-property index.
|
||||||
|
* @param new_stats: Label-property index candidate.
|
||||||
|
* @param vertex_count: New index's number of vertices.
|
||||||
|
* @return -1 if the new index is better, 0 if they are equal and 1 if the existing one is better.
|
||||||
|
*/
|
||||||
|
auto compare_indices = [](std::optional<LabelPropertyIndex> &found, std::optional<storage::IndexStats> &new_stats,
|
||||||
|
int vertex_count) {
|
||||||
|
if (!new_stats.has_value() || vertex_count / 10.0 > found->vertex_count) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
int cmp_avg_group = utils::CompareDecimal(new_stats->avg_group_size, found->index_stats->avg_group_size);
|
||||||
|
if (cmp_avg_group != 0) return cmp_avg_group;
|
||||||
|
return utils::CompareDecimal(new_stats->statistic, found->index_stats->statistic);
|
||||||
|
};
|
||||||
|
|
||||||
std::optional<LabelPropertyIndex> found;
|
std::optional<LabelPropertyIndex> found;
|
||||||
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
for (const auto &label : filters_.FilteredLabels(symbol)) {
|
||||||
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
for (const auto &filter : filters_.PropertyFilters(symbol)) {
|
||||||
@ -548,7 +574,6 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
|
if (!db_->LabelPropertyIndexExists(GetLabel(label), GetProperty(property))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
|
||||||
auto is_better_type = [&found](PropertyFilter::Type type) {
|
auto is_better_type = [&found](PropertyFilter::Type type) {
|
||||||
// Order the types by the most preferred index lookup type.
|
// Order the types by the most preferred index lookup type.
|
||||||
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
static const PropertyFilter::Type kFilterTypeOrder[] = {
|
||||||
@ -557,17 +582,32 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
auto *type_sort_ix = std::find(kFilterTypeOrder, kFilterTypeOrder + 3, type);
|
||||||
return type_sort_ix < found_sort_ix;
|
return type_sort_ix < found_sort_ix;
|
||||||
};
|
};
|
||||||
if (!found || vertex_count < found->vertex_count ||
|
|
||||||
(vertex_count == found->vertex_count && is_better_type(filter.property_filter->type_))) {
|
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||||
found = LabelPropertyIndex{label, filter, vertex_count};
|
std::optional<storage::IndexStats> new_stats = db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
||||||
|
|
||||||
|
// Conditions, from more to less important:
|
||||||
|
// the index with 10x less vertices is better.
|
||||||
|
// the index with smaller average group size is better.
|
||||||
|
// the index with equal avg group size and distribution closer to the uniform is better.
|
||||||
|
// the index with less vertices is better.
|
||||||
|
// the index with same number of vertices but more optimized filter is better.
|
||||||
|
if (!found || vertex_count * 10 < found->vertex_count) {
|
||||||
|
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (int cmp_res = compare_indices(found, new_stats, vertex_count);
|
||||||
|
cmp_res == -1 ||
|
||||||
|
cmp_res == 0 && (found->vertex_count > vertex_count ||
|
||||||
|
found->vertex_count == vertex_count && is_better_type(filter.property_filter->type_))) {
|
||||||
|
found = LabelPropertyIndex{label, filter, vertex_count, new_stats};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
// Creates a ScanAll by the best possible index for the `node_symbol`. If the node
|
||||||
// Creates a ScanAll by the best possible index for the `node_symbol`. Best
|
|
||||||
// index is defined as the index with least number of vertices. If the node
|
|
||||||
// does not have at least a label, no indexed lookup can be created and
|
// does not have at least a label, no indexed lookup can be created and
|
||||||
// `nullptr` is returned. The operator is chained after `input`. Optional
|
// `nullptr` is returned. The operator is chained after `input`. Optional
|
||||||
// `max_vertex_count` controls, whether no operator should be created if the
|
// `max_vertex_count` controls, whether no operator should be created if the
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -78,6 +78,11 @@ class VertexCountCache {
|
|||||||
return db_->LabelPropertyIndexExists(label, property);
|
return db_->LabelPropertyIndexExists(label, property);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
|
return db_->GetIndexStats(label, property);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef std::pair<storage::LabelId, storage::PropertyId> LabelPropertyKey;
|
typedef std::pair<storage::LabelId, storage::PropertyId> LabelPropertyKey;
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -10,6 +10,8 @@
|
|||||||
// licenses/APL.txt.
|
// licenses/APL.txt.
|
||||||
|
|
||||||
#include "indices.hpp"
|
#include "indices.hpp"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include "storage/v2/mvcc.hpp"
|
#include "storage/v2/mvcc.hpp"
|
||||||
@ -688,6 +690,45 @@ int64_t LabelPropertyIndex::ApproximateVertexCount(LabelId label, PropertyId pro
|
|||||||
return acc.estimate_range_count(lower, upper, utils::SkipListLayerForCountEstimation(acc.size()));
|
return acc.estimate_range_count(lower, upper, utils::SkipListLayerForCountEstimation(acc.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Iterate over all property-label pairs and deletes if label from the index is equal to label parameter.
|
||||||
|
*/
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> LabelPropertyIndex::DeleteIndexStatsForLabel(
|
||||||
|
const storage::LabelId &label) {
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
||||||
|
for (auto it = stats_.cbegin(); it != stats_.cend();) {
|
||||||
|
if (it->first.first == label) {
|
||||||
|
deleted_indexes.push_back(it->first);
|
||||||
|
it = stats_.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return deleted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> LabelPropertyIndex::ClearIndexStats() {
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
||||||
|
deleted_indexes.reserve(stats_.size());
|
||||||
|
std::transform(stats_.begin(), stats_.end(), std::back_inserter(deleted_indexes),
|
||||||
|
[](const auto &elem) { return elem.first; });
|
||||||
|
stats_.clear();
|
||||||
|
return deleted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LabelPropertyIndex::SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
||||||
|
const IndexStats &stats) {
|
||||||
|
stats_[{label, property}] = stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<IndexStats> LabelPropertyIndex::GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
|
if (auto it = stats_.find({label, property}); it != stats_.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
void LabelPropertyIndex::RunGC() {
|
void LabelPropertyIndex::RunGC() {
|
||||||
for (auto &index_entry : index_) {
|
for (auto &index_entry : index_) {
|
||||||
index_entry.second.run_gc();
|
index_entry.second.run_gc();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -131,6 +131,10 @@ class LabelIndex {
|
|||||||
Config::Items config_;
|
Config::Items config_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct IndexStats {
|
||||||
|
double statistic, avg_group_size;
|
||||||
|
};
|
||||||
|
|
||||||
class LabelPropertyIndex {
|
class LabelPropertyIndex {
|
||||||
private:
|
private:
|
||||||
struct Entry {
|
struct Entry {
|
||||||
@ -237,12 +241,23 @@ class LabelPropertyIndex {
|
|||||||
const std::optional<utils::Bound<PropertyValue>> &lower,
|
const std::optional<utils::Bound<PropertyValue>> &lower,
|
||||||
const std::optional<utils::Bound<PropertyValue>> &upper) const;
|
const std::optional<utils::Bound<PropertyValue>> &upper) const;
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> ClearIndexStats();
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> DeleteIndexStatsForLabel(const storage::LabelId &label);
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
||||||
|
const storage::IndexStats &stats);
|
||||||
|
|
||||||
|
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const;
|
||||||
|
|
||||||
void Clear() { index_.clear(); }
|
void Clear() { index_.clear(); }
|
||||||
|
|
||||||
void RunGC();
|
void RunGC();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;
|
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;
|
||||||
|
std::map<std::pair<LabelId, PropertyId>, storage::IndexStats> stats_;
|
||||||
Indices *indices_;
|
Indices *indices_;
|
||||||
Constraints *constraints_;
|
Constraints *constraints_;
|
||||||
Config::Items config_;
|
Config::Items config_;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
|
#include <span>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
|
|
||||||
#include "io/network/endpoint.hpp"
|
#include "io/network/endpoint.hpp"
|
||||||
@ -265,6 +266,30 @@ class Storage final {
|
|||||||
return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper);
|
return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
|
return storage_->indices_.label_property_index.GetIndexStats(label, property);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> ClearIndexStats() {
|
||||||
|
return storage_->indices_.label_property_index.ClearIndexStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> DeleteIndexStatsForLabels(const std::span<std::string> labels) {
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
||||||
|
std::for_each(labels.begin(), labels.end(), [this, &deleted_indexes](const auto &label_str) {
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> loc_results =
|
||||||
|
storage_->indices_.label_property_index.DeleteIndexStatsForLabel(NameToLabel(label_str));
|
||||||
|
deleted_indexes.insert(deleted_indexes.end(), std::make_move_iterator(loc_results.begin()),
|
||||||
|
std::make_move_iterator(loc_results.end()));
|
||||||
|
});
|
||||||
|
return deleted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property, const IndexStats &stats) {
|
||||||
|
storage_->indices_.label_property_index.SetIndexStats(label, property, stats);
|
||||||
|
}
|
||||||
|
|
||||||
/// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise
|
/// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise
|
||||||
/// @throw std::bad_alloc
|
/// @throw std::bad_alloc
|
||||||
Result<std::optional<VertexAccessor>> DeleteVertex(VertexAccessor *vertex);
|
Result<std::optional<VertexAccessor>> DeleteVertex(VertexAccessor *vertex);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -11,11 +11,15 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <boost/math/special_functions/math_fwd.hpp>
|
||||||
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <boost/math/special_functions/relative_difference.hpp>
|
||||||
|
|
||||||
namespace memgraph::utils {
|
namespace memgraph::utils {
|
||||||
|
|
||||||
static_assert(std::is_same_v<uint64_t, unsigned long>,
|
static_assert(std::is_same_v<uint64_t, unsigned long>,
|
||||||
@ -64,4 +68,36 @@ constexpr std::optional<uint64_t> RoundUint64ToMultiple(uint64_t val, uint64_t m
|
|||||||
return (numerator / multiple) * multiple;
|
return (numerator / multiple) * multiple;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
concept FloatingPoint = std::is_floating_point_v<T>;
|
||||||
|
|
||||||
|
template <FloatingPoint T>
|
||||||
|
bool ApproxEqualDecimal(T a, T b) {
|
||||||
|
return boost::math::relative_difference(a, b) < std::numeric_limits<T>::epsilon();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <FloatingPoint T>
|
||||||
|
bool LessThanDecimal(T a, T b) {
|
||||||
|
return (b - a) > std::numeric_limits<T>::epsilon();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return 0 if a == b
|
||||||
|
* return 1 if a > b
|
||||||
|
* return -1 if a < b
|
||||||
|
*/
|
||||||
|
template <FloatingPoint T>
|
||||||
|
int CompareDecimal(T a, T b) {
|
||||||
|
if (ApproxEqualDecimal(a, b)) return 0;
|
||||||
|
if (LessThanDecimal(a, b)) return -1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr double ChiSquaredValue(double observed, double expected) {
|
||||||
|
if (utils::ApproxEqualDecimal(expected, 0.0)) {
|
||||||
|
return std::numeric_limits<double>::max();
|
||||||
|
}
|
||||||
|
return (observed - expected) * (observed - expected) / expected;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace memgraph::utils
|
} // namespace memgraph::utils
|
||||||
|
@ -176,9 +176,9 @@ enum class TypeId : uint64_t {
|
|||||||
AST_VERSION_QUERY,
|
AST_VERSION_QUERY,
|
||||||
AST_FOREACH,
|
AST_FOREACH,
|
||||||
AST_SHOW_CONFIG_QUERY,
|
AST_SHOW_CONFIG_QUERY,
|
||||||
|
AST_ANALYZE_GRAPH_QUERY,
|
||||||
AST_TRANSACTION_QUEUE_QUERY,
|
AST_TRANSACTION_QUEUE_QUERY,
|
||||||
AST_EXISTS,
|
AST_EXISTS,
|
||||||
// Symbol
|
|
||||||
SYMBOL,
|
SYMBOL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@ add_subdirectory(module_file_manager)
|
|||||||
add_subdirectory(monitoring_server)
|
add_subdirectory(monitoring_server)
|
||||||
add_subdirectory(lba_procedures)
|
add_subdirectory(lba_procedures)
|
||||||
add_subdirectory(python_query_modules_reloading)
|
add_subdirectory(python_query_modules_reloading)
|
||||||
|
add_subdirectory(analyze_graph)
|
||||||
add_subdirectory(transaction_queue)
|
add_subdirectory(transaction_queue)
|
||||||
add_subdirectory(mock_api)
|
add_subdirectory(mock_api)
|
||||||
|
|
||||||
|
6
tests/e2e/analyze_graph/CMakeLists.txt
Normal file
6
tests/e2e/analyze_graph/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
function(copy_analyze_graph_e2e_python_files FILE_NAME)
|
||||||
|
copy_e2e_python_files(analyze_graph ${FILE_NAME})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
copy_analyze_graph_e2e_python_files(common.py)
|
||||||
|
copy_analyze_graph_e2e_python_files(optimize_indexes.py)
|
29
tests/e2e/analyze_graph/common.py
Normal file
29
tests/e2e/analyze_graph/common.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# Copyright 2023 Memgraph Ltd.
|
||||||
|
#
|
||||||
|
# Use of this software is governed by the Business Source License
|
||||||
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
# License, and you may not use this file except in compliance with the Business Source License.
|
||||||
|
#
|
||||||
|
# As of the Change Date specified in that file, in accordance with
|
||||||
|
# the Business Source License, use of this software will be governed
|
||||||
|
# by the Apache License, Version 2.0, included in the file
|
||||||
|
# licenses/APL.txt.
|
||||||
|
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import mgclient
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
|
||||||
|
cursor.execute(query, params)
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def connect(**kwargs) -> mgclient.Connection:
|
||||||
|
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
|
||||||
|
connection.autocommit = True
|
||||||
|
yield connection
|
||||||
|
cursor = connection.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n")
|
282
tests/e2e/analyze_graph/optimize_indexes.py
Normal file
282
tests/e2e/analyze_graph/optimize_indexes.py
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
# Copyright 2023 Memgraph Ltd.
|
||||||
|
#
|
||||||
|
# Use of this software is governed by the Business Source License
|
||||||
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
# License, and you may not use this file except in compliance with the Business Source License.
|
||||||
|
#
|
||||||
|
# As of the Change Date specified in that file, in accordance with
|
||||||
|
# the Business Source License, use of this software will be governed
|
||||||
|
# by the Apache License, Version 2.0, included in the file
|
||||||
|
# licenses/APL.txt.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from common import connect, execute_and_fetch_all
|
||||||
|
|
||||||
|
# E2E tests for checking query semantic
|
||||||
|
# ------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"delete_query",
|
||||||
|
[
|
||||||
|
"ANALYZE GRAPH DELETE STATISTICS",
|
||||||
|
"ANALYZE GRAPH ON LABELS * DELETE STATISTICS",
|
||||||
|
"ANALYZE GRAPH ON LABELS :Label DELETE STATISTICS",
|
||||||
|
"ANALYZE GRAPH ON LABELS :Label, :NONEXISTING DELETE STATISTICS",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_analyze_graph_delete_statistics(delete_query, connect):
|
||||||
|
"""Tests that all variants of delete queries work as expected."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
assert len(analyze_graph_results) == 2
|
||||||
|
delete_stats_results = execute_and_fetch_all(cursor, delete_query)
|
||||||
|
assert len(delete_stats_results) == 2
|
||||||
|
if delete_stats_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
assert delete_stats_results[first_index] == ("Label", "id1")
|
||||||
|
assert delete_stats_results[1 - first_index] == ("Label", "id2")
|
||||||
|
# After deleting statistics, id2 should be chosen because it has less vertices
|
||||||
|
expected_explain_after_delete_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_delete_analysis
|
||||||
|
)
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"analyze_query",
|
||||||
|
[
|
||||||
|
"ANALYZE GRAPH",
|
||||||
|
"ANALYZE GRAPH ON LABELS *",
|
||||||
|
"ANALYZE GRAPH ON LABELS :Label",
|
||||||
|
"ANALYZE GRAPH ON LABELS :Label, :NONEXISTING",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_analyze_full_graph(analyze_query, connect):
|
||||||
|
"""Tests analyzing full graph and choosing better index based on the smaller average group size.
|
||||||
|
It also tests querying based on labels and that nothing bad will happen by providing non-existing label.
|
||||||
|
"""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i % 5}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
# Choose id2 before tha analysis because it has less vertices
|
||||||
|
expected_explain_before_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_before_analysis
|
||||||
|
)
|
||||||
|
# Run analyze query
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, analyze_query)
|
||||||
|
assert len(analyze_graph_results) == 2
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 5, 10, 0)
|
||||||
|
# After analyzing graph, id1 index should be chosen because it has smaller average group size
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
# Explicit index choosing tests
|
||||||
|
# -----------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_cardinality_different_avg_group_size_uniform_dist(connect):
|
||||||
|
"""Tests index optimization with indices both having uniform distribution but one has smaller avg. group size."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 100) | CREATE (n:Label {id2: i % 20}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0)
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(connect):
|
||||||
|
"""Tests index choosing where both indices have uniform key distribution with same avg. group size but one has less vertices."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 100) | CREATE (n:Label {id1: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 50) | CREATE (n:Label {id2: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0)
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
def test_large_diff_in_num_vertices_v1(connect):
|
||||||
|
"""Tests that when one index has > 10x vertices than the other one, it should be chosen no matter avg group size and uniform distribution."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 1000) | CREATE (n:Label {id1: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 99) | CREATE (n:Label {id2: 1}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 1000, 1000, 1, 0)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0)
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
def test_large_diff_in_num_vertices_v2(connect):
|
||||||
|
"""Tests that when one index has > 10x vertices than the other one, it should be chosen no matter avg group size and uniform distribution."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 99) | CREATE (n:Label {id1: 1}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 1000) | CREATE (n:Label {id2: i}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 99, 1, 99, 0)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0)
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_avg_group_size_diff_distribution(connect):
|
||||||
|
"""Tests index choice decision based on key distribution."""
|
||||||
|
cursor = connect.cursor()
|
||||||
|
# Setup first key distribution
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 10) | CREATE (n:Label {id1: 1}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 30) | CREATE (n:Label {id1: 2}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id1: 3}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 35) | CREATE (n:Label {id1: 4}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 5) | CREATE (n:Label {id1: 5}));")
|
||||||
|
# Setup second key distribution
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id2: 1}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id2: 2}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id2: 3}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id2: 4}));")
|
||||||
|
execute_and_fetch_all(cursor, "FOREACH (i IN range(1, 20) | CREATE (n:Label {id2: 5}));")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "CREATE INDEX ON :Label(id2);")
|
||||||
|
analyze_graph_results = execute_and_fetch_all(cursor, "ANALYZE GRAPH")
|
||||||
|
if analyze_graph_results[0][1] == "id1":
|
||||||
|
first_index = 0
|
||||||
|
else:
|
||||||
|
first_index = 1
|
||||||
|
# Check results
|
||||||
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 5, 20, 32.5)
|
||||||
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0)
|
||||||
|
expected_explain_after_analysis = [
|
||||||
|
(f" * Produce {{n}}",),
|
||||||
|
(f" * Filter",),
|
||||||
|
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
|
||||||
|
(f" * Once",),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
execute_and_fetch_all(cursor, "EXPLAIN MATCH (n:Label) WHERE n.id2 = 3 AND n.id1 = 3 RETURN n;")
|
||||||
|
== expected_explain_after_analysis
|
||||||
|
)
|
||||||
|
assert len(execute_and_fetch_all(cursor, "ANALYZE GRAPH DELETE STATISTICS")) == 2
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id1);")
|
||||||
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(pytest.main([__file__, "-rA"]))
|
14
tests/e2e/analyze_graph/workloads.yaml
Normal file
14
tests/e2e/analyze_graph/workloads.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
analyze_graph_cluster: &analyze_graph_cluster
|
||||||
|
cluster:
|
||||||
|
main:
|
||||||
|
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
||||||
|
log_file: "analyze_graph.log"
|
||||||
|
setup_queries: []
|
||||||
|
validation_queries: []
|
||||||
|
|
||||||
|
|
||||||
|
workloads:
|
||||||
|
- name: "Analyze graph for better indexing"
|
||||||
|
binary: "tests/e2e/pytest_runner.sh"
|
||||||
|
args: ["analyze_graph/optimize_indexes.py"]
|
||||||
|
<<: *analyze_graph_cluster
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -213,6 +213,11 @@ class InteractiveDbAccessor {
|
|||||||
return label_property_index_.at(key);
|
return label_property_index_.at(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<memgraph::storage::IndexStats> GetIndexStats(memgraph::storage::LabelId label,
|
||||||
|
memgraph::storage::PropertyId property) const {
|
||||||
|
return dba_->GetIndexStats(label, property);
|
||||||
|
}
|
||||||
|
|
||||||
// Save the cached vertex counts to a stream.
|
// Save the cached vertex counts to a stream.
|
||||||
void Save(std::ostream &out) {
|
void Save(std::ostream &out) {
|
||||||
out << "vertex-count " << vertices_count_ << std::endl;
|
out << "vertex-count " << vertices_count_ << std::endl;
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||||
#include "query/frontend/semantic/symbol_table.hpp"
|
#include "query/frontend/semantic/symbol_table.hpp"
|
||||||
@ -459,6 +460,11 @@ class FakeDbAccessor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memgraph::storage::IndexStats GetIndexStats(memgraph::storage::LabelId label,
|
||||||
|
memgraph::storage::PropertyId property) const {
|
||||||
|
return memgraph::storage::IndexStats{.statistic = 0, .avg_group_size = 1}; // unique id
|
||||||
|
}
|
||||||
|
|
||||||
void SetIndexCount(memgraph::storage::LabelId label, int64_t count) { label_index_[label] = count; }
|
void SetIndexCount(memgraph::storage::LabelId label, int64_t count) { label_index_[label] = count; }
|
||||||
|
|
||||||
void SetIndexCount(memgraph::storage::LabelId label, memgraph::storage::PropertyId property, int64_t count) {
|
void SetIndexCount(memgraph::storage::LabelId label, memgraph::storage::PropertyId property, int64_t count) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -10,6 +10,7 @@
|
|||||||
// licenses/APL.txt.
|
// licenses/APL.txt.
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
@ -21,3 +22,29 @@ TEST(UtilsMath, Log2) {
|
|||||||
ASSERT_EQ(memgraph::utils::Log2(i), static_cast<uint64_t>(log2(i)));
|
ASSERT_EQ(memgraph::utils::Log2(i), static_cast<uint64_t>(log2(i)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(UtilsMath, EqualFloat) {
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2f, 0.2f));
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2f, 0.199999999999f));
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2f, 0.200000000001f));
|
||||||
|
ASSERT_FALSE(memgraph::utils::ApproxEqualDecimal(0.2f, 0.19995f));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(UtilsMath, EqualDouble) {
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2, 0.2));
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2, 0.19999999999999999999));
|
||||||
|
ASSERT_TRUE(memgraph::utils::ApproxEqualDecimal(0.2, 0.20000000000000000001));
|
||||||
|
ASSERT_FALSE(memgraph::utils::ApproxEqualDecimal(0.2, 0.19995));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(UtilsMath, LessThan) {
|
||||||
|
ASSERT_TRUE(memgraph::utils::LessThanDecimal(0.2, 0.3));
|
||||||
|
ASSERT_TRUE(memgraph::utils::LessThanDecimal(0.2, 0.20001));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(UtilsMath, ChiSquared) {
|
||||||
|
ASSERT_EQ(std::numeric_limits<double>::max(), memgraph::utils::ChiSquaredValue(2.0, 0.0));
|
||||||
|
ASSERT_DOUBLE_EQ(0.0, memgraph::utils::ChiSquaredValue(2.0, 2.0));
|
||||||
|
ASSERT_DOUBLE_EQ(1.0, memgraph::utils::ChiSquaredValue(2.0, 1.0));
|
||||||
|
ASSERT_DOUBLE_EQ(1. / 3., memgraph::utils::ChiSquaredValue(4.0, 3.0));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user