From 50c485fe4044bf9ee59424e23dcf1a02f843c77f Mon Sep 17 00:00:00 2001 From: gvolfing Date: Mon, 6 Nov 2023 12:37:48 +0100 Subject: [PATCH] Add storage side capabilites to retrieve metadata In order to get the required metadata in constant time we need to keep track of the node labels and edge types that were ever present in the database. This is done by the two axuiliary datastructures that are present in the storage instances. The ability to get this metadata is propagated to the DBAccessor class, which the query modules can interact with. --- src/query/db_accessor.hpp | 7 +++++++ src/storage/v2/disk/storage.cpp | 1 + src/storage/v2/inmemory/storage.cpp | 3 +++ src/storage/v2/storage.cpp | 18 ++++++++++++++++++ src/storage/v2/storage.hpp | 16 ++++++++++++++++ src/storage/v2/vertex_accessor.cpp | 1 + 6 files changed, 46 insertions(+) diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index d6114edaf..866da0af9 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -597,6 +597,13 @@ class DbAccessor final { return accessor_->ApproximateVertexCount(label, property, lower, upper); } + std::vector ListAllPossiblyPresentVertexLabels() const { + return accessor_->ListAllPossiblyPresentVertexLabels(); + } + std::vector ListAllPossiblyPresentEdgeTypes() const { + return accessor_->ListAllPossiblyPresentEdgeTypes(); + } + storage::IndicesInfo ListAllIndices() const { return accessor_->ListAllIndices(); } storage::ConstraintsInfo ListAllConstraints() const { return accessor_->ListAllConstraints(); } diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 809d744ed..f0280fdc0 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -944,6 +944,7 @@ Result DiskStorage::DiskAccessor::CreateEdge(VertexAccessor *from, transaction_.manyDeltasCache.Invalidate(from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(to_vertex, edge_type, EdgeDirection::IN); + storage_->stored_edge_types_.insert(edge_type); storage_->edge_count_.fetch_add(1, std::memory_order_acq_rel); return EdgeAccessor(edge, edge_type, from_vertex, to_vertex, storage_, &transaction_); diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index d2ce80417..22d2f3c16 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -331,6 +331,7 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso if (to_vertex->deleted) return Error::DELETED_OBJECT; } + storage_->stored_edge_types_.insert(edge_type); auto *mem_storage = static_cast(storage_); auto gid = storage::Gid::FromUint(mem_storage->edge_id_.fetch_add(1, std::memory_order_acq_rel)); EdgeRef edge(gid); @@ -395,6 +396,8 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAcces if (to_vertex->deleted) return Error::DELETED_OBJECT; } + storage_->stored_edge_types_.insert(edge_type); + // NOTE: When we update the next `edge_id_` here we perform a RMW // (read-modify-write) operation that ISN'T atomic! But, that isn't an issue // because this function is only called from the replication delta applier diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index ec988285a..5b935c029 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -122,6 +122,24 @@ std::optional Storage::Accessor::GetTransactionId() const { return {}; } +std::vector Storage::Accessor::ListAllPossiblyPresentVertexLabels() const { + std::vector vertex_labels; + vertex_labels.reserve(storage_->stored_node_labels_.size()); + for (const auto label : storage_->stored_node_labels_) { + vertex_labels.emplace_back(LabelToName(label)); + } + return vertex_labels; +} + +std::vector Storage::Accessor::ListAllPossiblyPresentEdgeTypes() const { + std::vector edge_types; + edge_types.reserve(storage_->stored_edge_types_.size()); + for (const auto edge_type : storage_->stored_edge_types_) { + edge_types.emplace_back(EdgeTypeToName(edge_type)); + } + return edge_types; +} + void Storage::Accessor::AdvanceCommand() { transaction_.manyDeltasCache.Clear(); // TODO: Just invalidate the View::OLD cache, NEW should still be fine ++transaction_.command_id; diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 2d36d202a..f3253267e 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -237,6 +237,10 @@ class Storage { const std::string &id() const { return storage_->id(); } + std::vector ListAllPossiblyPresentVertexLabels() const; + + std::vector ListAllPossiblyPresentEdgeTypes() const; + virtual utils::BasicResult CreateIndex(LabelId label) = 0; virtual utils::BasicResult CreateIndex(LabelId label, PropertyId property) = 0; @@ -384,6 +388,18 @@ class Storage { Indices indices_; Constraints constraints_; + // Datastructures to provide fast retrieval of node-label and + // edge-type related metadata. + // Currently we should not remove any node-labels or edge-types even + // if the set of given types are currently not present in the + // database. This metadata is usually used by client side + // applications that want to be aware of the kind of data that *may* + // be present in the database. + + // TODO(gvolfing): check if this would be faster with flat_maps. + std::unordered_set stored_node_labels_; + std::unordered_set stored_edge_types_; + std::atomic vertex_id_{0}; std::atomic edge_id_{0}; const std::string id_; //!< High-level assigned ID diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 924c305ad..91ffd547e 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -109,6 +109,7 @@ Result VertexAccessor::AddLabel(LabelId label) { CreateAndLinkDelta(transaction_, vertex_, Delta::RemoveLabelTag(), label); vertex_->labels.push_back(label); + storage_->stored_node_labels_.insert(label); /// TODO: some by pointers, some by reference => not good, make it better storage_->constraints_.unique_constraints_->UpdateOnAddLabel(label, *vertex_, transaction_->start_timestamp);