Merge pull request #1466 from memgraph/Implement-constant-time-label-and-edge-type-retrieval

Implement constant time label and edge type retrieval

Memgraph now includes two additional queries designed to retrieve
information about the schema of the stored graphs. The SHOW
NODE_LABELS INFO and SHOW EDGE_TYPES INFO queries return
the list of vertex-labels and edge-types that are currently present or at
some point were present in the database respectively. In order for
these queries to work, the flag --storage-enable-schema-metadata has
to be set to True on startup.
This commit is contained in:
gvolfing 2023-12-04 19:56:48 +01:00 committed by GitHub
commit d836b38a8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 399 additions and 3 deletions

View File

@ -111,6 +111,10 @@ modifications:
value: "false"
override: true
- name: "storage_enable_schema_metadata"
value: "false"
override: true
- name: "query_callable_mappings_path"
value: "/etc/memgraph/apoc_compatibility_mappings.json"
override: true

View File

@ -114,6 +114,10 @@ DEFINE_uint64(storage_recovery_thread_count,
memgraph::storage::Config::Durability().recovery_thread_count),
"The number of threads used to recover persisted data from disk.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_bool(storage_enable_schema_metadata, false,
"Controls whether metadata should be collected about the resident labels and edge types.");
#ifdef MG_ENTERPRISE
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_bool(storage_delete_on_drop, true,

View File

@ -77,6 +77,8 @@ DECLARE_uint64(storage_items_per_batch);
DECLARE_bool(storage_parallel_index_recovery);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_uint64(storage_recovery_thread_count);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_bool(storage_enable_schema_metadata);
#ifdef MG_ENTERPRISE
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_bool(storage_delete_on_drop);

View File

@ -294,7 +294,8 @@ int main(int argc, char **argv) {
memgraph::storage::Config db_config{
.gc = {.type = memgraph::storage::Config::Gc::Type::PERIODIC,
.interval = std::chrono::seconds(FLAGS_storage_gc_cycle_sec)},
.items = {.properties_on_edges = FLAGS_storage_properties_on_edges},
.items = {.properties_on_edges = FLAGS_storage_properties_on_edges,
.enable_schema_metadata = FLAGS_storage_enable_schema_metadata},
.durability = {.storage_directory = FLAGS_data_directory,
.recover_on_startup = FLAGS_storage_recover_on_startup || FLAGS_data_recovery_on_startup,
.snapshot_retention_count = FLAGS_storage_snapshot_retention_count,

View File

@ -597,6 +597,13 @@ class DbAccessor final {
return accessor_->ApproximateVertexCount(label, property, lower, upper);
}
std::vector<storage::LabelId> ListAllPossiblyPresentVertexLabels() const {
return accessor_->ListAllPossiblyPresentVertexLabels();
}
std::vector<storage::EdgeTypeId> ListAllPossiblyPresentEdgeTypes() const {
return accessor_->ListAllPossiblyPresentEdgeTypes();
}
storage::IndicesInfo ListAllIndices() const { return accessor_->ListAllIndices(); }
storage::ConstraintsInfo ListAllConstraints() const { return accessor_->ListAllConstraints(); }

View File

@ -2934,7 +2934,7 @@ class DatabaseInfoQuery : public memgraph::query::Query {
static const utils::TypeInfo kType;
const utils::TypeInfo &GetTypeInfo() const override { return kType; }
enum class InfoType { INDEX, CONSTRAINT };
enum class InfoType { INDEX, CONSTRAINT, EDGE_TYPES, NODE_LABELS };
DEFVISITABLE(QueryVisitor<void>);

View File

@ -124,6 +124,14 @@ antlrcpp::Any CypherMainVisitor::visitDatabaseInfoQuery(MemgraphCypher::Database
info_query->info_type_ = DatabaseInfoQuery::InfoType::CONSTRAINT;
return info_query;
}
if (ctx->edgetypeInfo()) {
info_query->info_type_ = DatabaseInfoQuery::InfoType::EDGE_TYPES;
return info_query;
}
if (ctx->nodelabelInfo()) {
info_query->info_type_ = DatabaseInfoQuery::InfoType::NODE_LABELS;
return info_query;
}
// Should never get here
throw utils::NotYetImplemented("Database info query: '{}'", ctx->getText());
}

View File

@ -47,9 +47,13 @@ indexInfo : INDEX INFO ;
constraintInfo : CONSTRAINT INFO ;
edgetypeInfo : EDGE_TYPES INFO ;
nodelabelInfo : NODE_LABELS INFO ;
buildInfo : BUILD INFO ;
databaseInfoQuery : SHOW ( indexInfo | constraintInfo ) ;
databaseInfoQuery : SHOW ( indexInfo | constraintInfo | edgetypeInfo | nodelabelInfo ) ;
systemInfoQuery : SHOW ( storageInfo | buildInfo ) ;

View File

@ -61,6 +61,7 @@ memgraphCypherKeyword : cypherKeyword
| GRANT
| HEADER
| IDENTIFIED
| NODE_LABELS
| NULLIF
| IMPORT
| INACTIVE

View File

@ -89,6 +89,7 @@ MULTI_DATABASE_EDIT : M U L T I UNDERSCORE D A T A B A S E UNDERSCORE E D I
MULTI_DATABASE_USE : M U L T I UNDERSCORE D A T A B A S E UNDERSCORE U S E ;
NEXT : N E X T ;
NO : N O ;
NODE_LABELS : N O D E UNDERSCORE L A B E L S ;
NOTHING : N O T H I N G ;
ON_DISK_TRANSACTIONAL : O N UNDERSCORE D I S K UNDERSCORE T R A N S A C T I O N A L ;
NULLIF : N U L L I F ;

View File

@ -38,6 +38,9 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis
void Visit(DatabaseInfoQuery &info_query) override {
switch (info_query.info_type_) {
case DatabaseInfoQuery::InfoType::INDEX:
// TODO: Reconsider priviliges, this 4 should have the same.
case DatabaseInfoQuery::InfoType::EDGE_TYPES:
case DatabaseInfoQuery::InfoType::NODE_LABELS:
// TODO: This should be INDEX | STATS, but we don't have support for
// *or* with privileges.
AddPrivilege(AuthQuery::Privilege::INDEX);

View File

@ -3042,6 +3042,46 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
};
break;
}
case DatabaseInfoQuery::InfoType::EDGE_TYPES: {
header = {"edge types"};
handler = [storage = current_db.db_acc_->get()->storage(), dba] {
if (!storage->config_.items.enable_schema_metadata) {
throw QueryRuntimeException(
"The metadata collection for edge-types is disabled. To enable it, restart your instance and set the "
"storage-enable-schema-metadata flag to True.");
}
auto edge_types = dba->ListAllPossiblyPresentEdgeTypes();
std::vector<std::vector<TypedValue>> results;
results.reserve(edge_types.size());
for (auto &edge_type : edge_types) {
results.push_back({TypedValue(storage->EdgeTypeToName(edge_type))});
}
return std::pair{results, QueryHandlerResult::COMMIT};
};
break;
}
case DatabaseInfoQuery::InfoType::NODE_LABELS: {
header = {"node labels"};
handler = [storage = current_db.db_acc_->get()->storage(), dba] {
if (!storage->config_.items.enable_schema_metadata) {
throw QueryRuntimeException(
"The metadata collection for node-labels is disabled. To enable it, restart your instance and set the "
"storage-enable-schema-metadata flag to True.");
}
auto node_labels = dba->ListAllPossiblyPresentVertexLabels();
std::vector<std::vector<TypedValue>> results;
results.reserve(node_labels.size());
for (auto &node_label : node_labels) {
results.push_back({TypedValue(storage->LabelToName(node_label))});
}
return std::pair{results, QueryHandlerResult::COMMIT};
};
break;
}
}
return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges),

View File

@ -40,6 +40,7 @@ struct Config {
struct Items {
bool properties_on_edges{true};
bool enable_schema_metadata{false};
friend bool operator==(const Items &lrh, const Items &rhs) = default;
} items;

View File

@ -975,6 +975,9 @@ Result<EdgeAccessor> DiskStorage::DiskAccessor::CreateEdge(VertexAccessor *from,
transaction_.manyDeltasCache.Invalidate(from_vertex, edge_type, EdgeDirection::OUT);
transaction_.manyDeltasCache.Invalidate(to_vertex, edge_type, EdgeDirection::IN);
if (storage_->config_.items.enable_schema_metadata) {
storage_->stored_edge_types_.try_insert(edge_type);
}
storage_->edge_count_.fetch_add(1, std::memory_order_acq_rel);
return EdgeAccessor(edge, edge_type, from_vertex, to_vertex, storage_, &transaction_);

View File

@ -321,6 +321,9 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso
if (to_vertex->deleted) return Error::DELETED_OBJECT;
}
if (storage_->config_.items.enable_schema_metadata) {
storage_->stored_edge_types_.try_insert(edge_type);
}
auto *mem_storage = static_cast<InMemoryStorage *>(storage_);
auto gid = storage::Gid::FromUint(mem_storage->edge_id_.fetch_add(1, std::memory_order_acq_rel));
EdgeRef edge(gid);
@ -403,6 +406,10 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAcces
if (to_vertex->deleted) return Error::DELETED_OBJECT;
}
if (storage_->config_.items.enable_schema_metadata) {
storage_->stored_edge_types_.try_insert(edge_type);
}
// NOTE: When we update the next `edge_id_` here we perform a RMW
// (read-modify-write) operation that ISN'T atomic! But, that isn't an issue
// because this function is only called from the replication delta applier

View File

@ -104,6 +104,18 @@ std::optional<uint64_t> Storage::Accessor::GetTransactionId() const {
return {};
}
std::vector<LabelId> Storage::Accessor::ListAllPossiblyPresentVertexLabels() const {
std::vector<LabelId> vertex_labels;
storage_->stored_node_labels_.for_each([&vertex_labels](const auto &label) { vertex_labels.push_back(label); });
return vertex_labels;
}
std::vector<EdgeTypeId> Storage::Accessor::ListAllPossiblyPresentEdgeTypes() const {
std::vector<EdgeTypeId> edge_types;
storage_->stored_edge_types_.for_each([&edge_types](const auto &type) { edge_types.push_back(type); });
return edge_types;
}
void Storage::Accessor::AdvanceCommand() {
transaction_.manyDeltasCache.Clear(); // TODO: Just invalidate the View::OLD cache, NEW should still be fine
++transaction_.command_id;

View File

@ -40,6 +40,7 @@
#include "utils/event_histogram.hpp"
#include "utils/resource_lock.hpp"
#include "utils/scheduler.hpp"
#include "utils/synchronized_metadata_store.hpp"
#include "utils/timer.hpp"
#include "utils/uuid.hpp"
@ -242,6 +243,10 @@ class Storage {
const std::string &id() const { return storage_->id(); }
std::vector<LabelId> ListAllPossiblyPresentVertexLabels() const;
std::vector<EdgeTypeId> ListAllPossiblyPresentEdgeTypes() const;
virtual utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label) = 0;
virtual utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label, PropertyId property) = 0;
@ -393,6 +398,18 @@ class Storage {
Indices indices_;
Constraints constraints_;
// Datastructures to provide fast retrieval of node-label and
// edge-type related metadata.
// Currently we should not remove any node-labels or edge-types even
// if the set of given types are currently not present in the
// database. This metadata is usually used by client side
// applications that want to be aware of the kind of data that *may*
// be present in the database.
// TODO(gvolfing): check if this would be faster with flat_maps.
utils::SynchronizedMetaDataStore<LabelId> stored_node_labels_;
utils::SynchronizedMetaDataStore<EdgeTypeId> stored_edge_types_;
std::atomic<uint64_t> vertex_id_{0};
std::atomic<uint64_t> edge_id_{0};
const std::string id_; //!< High-level assigned ID

View File

@ -110,6 +110,10 @@ Result<bool> VertexAccessor::AddLabel(LabelId label) {
CreateAndLinkDelta(transaction_, vertex_, Delta::RemoveLabelTag(), label);
vertex_->labels.push_back(label);
if (storage_->config_.items.enable_schema_metadata) {
storage_->stored_node_labels_.try_insert(label);
}
/// TODO: some by pointers, some by reference => not good, make it better
storage_->constraints_.unique_constraints_->UpdateOnAddLabel(label, *vertex_, transaction_->start_timestamp);
transaction_->constraint_verification_info.AddedLabel(vertex_);

View File

@ -0,0 +1,65 @@
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include <mutex>
#include <shared_mutex>
#include <unordered_set>
#include "utils/rw_lock.hpp"
#include "utils/synchronized.hpp"
namespace memgraph::utils {
template <typename T>
class SynchronizedMetaDataStore {
public:
SynchronizedMetaDataStore() = default;
~SynchronizedMetaDataStore() = default;
SynchronizedMetaDataStore(const SynchronizedMetaDataStore &) = delete;
SynchronizedMetaDataStore(SynchronizedMetaDataStore &&) = delete;
SynchronizedMetaDataStore &operator=(const SynchronizedMetaDataStore &) = delete;
SynchronizedMetaDataStore &operator=(SynchronizedMetaDataStore &&) = delete;
void try_insert(const T &elem) {
{
std::shared_lock read_lock(lock_);
if (element_store_.contains(elem)) {
return;
}
}
{
std::unique_lock write_lock(lock_);
element_store_.insert(elem);
}
}
void erase(const T &elem) {
std::unique_lock write_lock(lock_);
element_store_.erase(elem);
}
template <typename TFunc>
void for_each(const TFunc &func) {
std::unique_lock write_lock(lock_);
for (const auto &elem : element_store_) {
func(elem);
}
}
private:
std::unordered_set<T> element_store_;
RWLock lock_{RWLock::Priority::READ};
};
} // namespace memgraph::utils

View File

@ -48,6 +48,7 @@ add_subdirectory(temporal_types)
add_subdirectory(write_procedures)
add_subdirectory(configuration)
add_subdirectory(magic_functions)
add_subdirectory(metadata_queries)
add_subdirectory(module_file_manager)
add_subdirectory(monitoring_server)
add_subdirectory(lba_procedures)

View File

@ -115,6 +115,11 @@ startup_config_dict = {
"false",
"Controls whether the index creation can be done in a multithreaded fashion.",
),
"storage_enable_schema_metadata": (
"false",
"false",
"Controls whether metadata should be collected about the resident labels and edge types.",
),
"password_encryption_algorithm": ("bcrypt", "bcrypt", "The password encryption algorithm used for authentication."),
"pulsar_service_url": ("", "", "Default URL used while connecting to Pulsar brokers."),
"query_execution_timeout_sec": (

View File

@ -0,0 +1,7 @@
function(copy_metadata_queries_e2e_python_files FILE_NAME)
copy_e2e_python_files(metadata_queries ${FILE_NAME})
endfunction()
copy_metadata_queries_e2e_python_files(common.py)
copy_metadata_queries_e2e_python_files(show_node_labels_info.py)
copy_metadata_queries_e2e_python_files(show_edge_types_info.py)

View File

@ -0,0 +1,34 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import typing
import mgclient
import pytest
@pytest.fixture(scope="module")
def cursor(**kwargs) -> mgclient.Connection:
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
connection.autocommit = True
return connection.cursor()
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = dict()) -> typing.List[tuple]:
cursor.execute(query, params)
return cursor.fetchall()
def are_results_equal(result1, result2):
if len(result1) != len(result2):
return False
return sorted(result1) == sorted(result2)

View File

@ -0,0 +1,80 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import pytest
from common import are_results_equal, cursor, execute_and_fetch_all
# Helper functions
def create_nodes(cursor):
execute_and_fetch_all(
cursor, "CREATE (charlie:Person:Actor {name: 'Charlie Sheen'}), (oliver:Person:Director {name: 'Oliver Stone'})"
)
def create_edges(cursor):
execute_and_fetch_all(
cursor,
"MATCH (charlie:Person {name: 'Charlie Sheen'}), (oliver:Person {name: 'Oliver Stone'}) CREATE (charlie)-[:ACTED_IN {role: 'Bud Fox'}]->(wallStreet:Movie {title: 'Wall Street'})<-[:DIRECTED]-(oliver)",
)
def edge_types_info(cursor):
return execute_and_fetch_all(cursor, "SHOW EDGE_TYPES INFO")
def default_expected_result(cursor):
return [("DIRECTED",), ("ACTED_IN",)]
# Tests
def test_return_empty(cursor):
create_nodes(cursor)
edge_types = edge_types_info(cursor)
expected = []
assert are_results_equal(expected, edge_types)
def test_return_edge_types_simple(cursor):
create_nodes(cursor)
create_edges(cursor)
edge_types = edge_types_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, edge_types)
def test_return_edge_types_repeating_identical_edges(cursor):
create_nodes(cursor)
for _ in range(100):
create_edges(cursor)
edge_types = edge_types_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, edge_types)
def test_return_edge_types_obtainable_after_edge_deletion(cursor):
create_nodes(cursor)
create_edges(cursor)
execute_and_fetch_all(cursor, "MATCH(n) DETACH DELETE n")
edge_types = edge_types_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, edge_types)
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,67 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import pytest
from common import are_results_equal, cursor, execute_and_fetch_all
# Helper functions
def create_nodes(cursor):
execute_and_fetch_all(
cursor, "CREATE (charlie:Person:Actor {name: 'Charlie Sheen'}), (oliver:Person:Director {name: 'Oliver Stone'})"
)
def node_labels_info(cursor):
return execute_and_fetch_all(cursor, "SHOW NODE_LABELS INFO")
def default_expected_result(cursor):
return [("Person",), ("Actor",), ("Director",)]
# Tests
def test_return_empty(cursor):
node_labels = node_labels_info(cursor)
expected = []
assert are_results_equal(expected, node_labels)
def test_return_node_labels_simple(cursor):
create_nodes(cursor)
node_labels = node_labels_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, node_labels)
def test_return_node_labels_repeating_identical_labels(cursor):
for _ in range(100):
create_nodes(cursor)
node_labels = node_labels_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, node_labels)
def test_return_node_labels_obtainable_after_vertex_deletion(cursor):
create_nodes(cursor)
execute_and_fetch_all(cursor, "MATCH(n) DELETE n")
node_labels = node_labels_info(cursor)
expected = default_expected_result(cursor)
assert are_results_equal(expected, node_labels)
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,18 @@
metadata_queries: &metadata_queries
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr", "--storage-enable-schema-metadata=TRUE"]
log_file: "metadata-queries.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Show edge types info"
binary: "tests/e2e/pytest_runner.sh"
args: ["metadata_queries/show_edge_types_info.py"]
<<: *metadata_queries
- name: "Show node labels info"
binary: "tests/e2e/pytest_runner.sh"
args: ["metadata_queries/show_node_labels_info.py"]
<<: *metadata_queries