From 59f4b893612f187d9abe7f0bb0cf02b07d0dcb79 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 6 Dec 2022 12:34:36 +0100 Subject: [PATCH 01/79] Remove redundandt shard properties --- src/storage/v3/shard.cpp | 5 +---- src/storage/v3/shard.hpp | 32 -------------------------------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index a77f42a76..39c0683cd 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -332,10 +332,7 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, - config_{config}, - uuid_{utils::GenerateUUID()}, - epoch_id_{utils::GenerateUUID()}, - global_locker_{file_retainer_.AddLocker()} { + config_{config} { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index b998c06cc..31259a0fe 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -393,38 +393,6 @@ class Shard final { // storage. std::list<Gid> deleted_edges_; - // UUID used to distinguish snapshots and to link snapshots to WALs - std::string uuid_; - // Sequence number used to keep track of the chain of WALs. - uint64_t wal_seq_num_{0}; - - // UUID to distinguish different main instance runs for replication process - // on SAME storage. - // Multiple instances can have same storage UUID and be MAIN at the same time. - // We cannot compare commit timestamps of those instances if one of them - // becomes the replica of the other so we use epoch_id_ as additional - // discriminating property. - // Example of this: - // We have 2 instances of the same storage, S1 and S2. - // S1 and S2 are MAIN and accept their own commits and write them to the WAL. - // At the moment when S1 commited a transaction with timestamp 20, and S2 - // a different transaction with timestamp 15, we change S2's role to REPLICA - // and register it on S1. - // Without using the epoch_id, we don't know that S1 and S2 have completely - // different transactions, we think that the S2 is behind only by 5 commits. - std::string epoch_id_; - // History of the previous epoch ids. - // Each value consists of the epoch id along the last commit belonging to that - // epoch. - std::deque<std::pair<std::string, uint64_t>> epoch_history_; - - uint64_t wal_unsynced_transactions_{0}; - - utils::FileRetainer file_retainer_; - - // Global locker that is used for clients file locking - utils::FileRetainer::FileLocker global_locker_; - // Holds all of the (in progress, committed and aborted) transactions that are read or write to this shard, but // haven't been cleaned up yet std::map<uint64_t, std::unique_ptr<Transaction>> start_logical_id_to_transaction_{}; From 153e9e2fac0a8b628e32585b1dd524f95811420f Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 7 Dec 2022 14:13:14 +0100 Subject: [PATCH 02/79] Begin split funcionlity from shard side --- src/storage/v3/shard.cpp | 39 ++++++++++++++++++++++++++++++--- src/storage/v3/shard.hpp | 18 +++++++++++++++ src/storage/v3/shard_rsm.hpp | 3 +++ src/storage/v3/shard_worker.hpp | 8 +++++++ 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 39c0683cd..1b3efbff4 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -18,10 +18,7 @@ #include <memory> #include <mutex> #include <optional> -#include <variant> -#include <bits/ranges_algo.h> -#include <gflags/gflags.h> #include <spdlog/spdlog.h> #include "io/network/endpoint.hpp" @@ -1042,6 +1039,42 @@ void Shard::StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name) { name_id_mapper_.StoreMapping(std::move(id_to_name)); } +std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { + if (vertices_.size() > 10000000) { + // Why should we care if the selected vertex is deleted + auto mid_elem = vertices_.begin(); + // mid_elem->first + std::ranges::advance(mid_elem, static_cast<VertexContainer::difference_type>(vertices_.size() / 2)); + return SplitInfo{shard_version_, mid_elem->first}; + } + return std::nullopt; +} + +SplitData Shard::PerformSplit(const PrimaryKey &split_key) { + SplitData data; + data.vertices = std::map(vertices_.find(split_key), vertices_.end()); + data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; + + // Get all edges related with those vertices + if (config_.items.properties_on_edges) { + data.edges = std::invoke([&split_vertices = data.vertices]() { + // How to reserve? + EdgeContainer split_edges; + for (const auto &vertex : split_vertices) { + for (const auto &in_edge : vertex.second.in_edges) { + auto edge = std::get<2>(in_edge).ptr; + split_edges.insert(edge->gid, Edge{.gid = edge->gid, .delta = edge->delta, .properties = edge->properties}); + } + } + return split_edges; + }); + } + // TODO We also need to send ongoing transactions to the shard + // since they own deltas + + return data; +} + bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { return vertex_id.primary_label == primary_label_ && vertex_id.primary_key >= min_primary_key_ && (!max_primary_key_.has_value() || vertex_id.primary_key < *max_primary_key_); diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 31259a0fe..301fc8132 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -175,6 +175,19 @@ struct SchemasInfo { Schemas::SchemasList schemas; }; +struct SplitInfo { + uint64_t shard_version; + PrimaryKey split_point; +}; + +// If edge properties-on-edges is false then we don't need to send edges but +// only vertices, since they will contain those edges +struct SplitData { + VertexContainer vertices; + std::optional<EdgeContainer> edges; + IndicesInfo indices_info; +}; + /// Structure used to return information about the storage. struct StorageInfo { uint64_t vertex_count; @@ -357,6 +370,10 @@ class Shard final { void StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name); + std::optional<SplitInfo> ShouldSplit() const noexcept; + + SplitData PerformSplit(const PrimaryKey &split_key); + private: Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); @@ -374,6 +391,7 @@ class Shard final { // list is used only when properties are enabled for edges. Because of that we // keep a separate count of edges that is always updated. uint64_t edge_count_{0}; + uint64_t shard_version_{0}; SchemaValidator schema_validator_; VertexValidator vertex_validator_; diff --git a/src/storage/v3/shard_rsm.hpp b/src/storage/v3/shard_rsm.hpp index d301bf40b..ba284a3ca 100644 --- a/src/storage/v3/shard_rsm.hpp +++ b/src/storage/v3/shard_rsm.hpp @@ -12,6 +12,7 @@ #pragma once #include <memory> +#include <optional> #include <variant> #include <openssl/ec.h> @@ -41,6 +42,8 @@ class ShardRsm { public: explicit ShardRsm(std::unique_ptr<Shard> &&shard) : shard_(std::move(shard)){}; + std::optional<SplitInfo> ShouldSplit() const noexcept { return shard_->ShouldSplit(); } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) msgs::ReadResponses Read(msgs::ReadRequests requests) { return std::visit([&](auto &&request) mutable { return HandleRead(std::forward<decltype(request)>(request)); }, diff --git a/src/storage/v3/shard_worker.hpp b/src/storage/v3/shard_worker.hpp index 547aa0a6f..dcdc6ee13 100644 --- a/src/storage/v3/shard_worker.hpp +++ b/src/storage/v3/shard_worker.hpp @@ -173,6 +173,14 @@ class ShardWorker { auto &rsm = rsm_map_.at(uuid); Time next_for_uuid = rsm.Cron(); + // Check if shard should split + if (const auto split_info = rsm.ShouldSplit(); split_info) { + // Request split from coordinator + // split_point => middle pk + // shard_id => uuid + // shard_version => + } + cron_schedule_.pop(); cron_schedule_.push(std::make_pair(next_for_uuid, uuid)); } else { From b968748d9f0932d9c4c2846c91b4dbb8afe5f538 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 6 Dec 2022 12:34:36 +0100 Subject: [PATCH 03/79] Remove redundandt shard properties --- src/storage/v3/shard.cpp | 5 +---- src/storage/v3/shard.hpp | 32 -------------------------------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 0d8b21402..de5161572 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -332,10 +332,7 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, - config_{config}, - uuid_{utils::GenerateUUID()}, - epoch_id_{utils::GenerateUUID()}, - global_locker_{file_retainer_.AddLocker()} { + config_{config} { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 8542d47eb..ed170fd55 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -392,38 +392,6 @@ class Shard final { // storage. std::list<Gid> deleted_edges_; - // UUID used to distinguish snapshots and to link snapshots to WALs - std::string uuid_; - // Sequence number used to keep track of the chain of WALs. - uint64_t wal_seq_num_{0}; - - // UUID to distinguish different main instance runs for replication process - // on SAME storage. - // Multiple instances can have same storage UUID and be MAIN at the same time. - // We cannot compare commit timestamps of those instances if one of them - // becomes the replica of the other so we use epoch_id_ as additional - // discriminating property. - // Example of this: - // We have 2 instances of the same storage, S1 and S2. - // S1 and S2 are MAIN and accept their own commits and write them to the WAL. - // At the moment when S1 commited a transaction with timestamp 20, and S2 - // a different transaction with timestamp 15, we change S2's role to REPLICA - // and register it on S1. - // Without using the epoch_id, we don't know that S1 and S2 have completely - // different transactions, we think that the S2 is behind only by 5 commits. - std::string epoch_id_; - // History of the previous epoch ids. - // Each value consists of the epoch id along the last commit belonging to that - // epoch. - std::deque<std::pair<std::string, uint64_t>> epoch_history_; - - uint64_t wal_unsynced_transactions_{0}; - - utils::FileRetainer file_retainer_; - - // Global locker that is used for clients file locking - utils::FileRetainer::FileLocker global_locker_; - // Holds all of the (in progress, committed and aborted) transactions that are read or write to this shard, but // haven't been cleaned up yet std::map<uint64_t, std::unique_ptr<Transaction>> start_logical_id_to_transaction_{}; From 486da0bd1cf6711f626145eff9334d56fc405525 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 7 Dec 2022 14:13:14 +0100 Subject: [PATCH 04/79] Begin split funcionlity from shard side --- src/storage/v3/shard.cpp | 39 ++++++++++++++++++++++++++++++--- src/storage/v3/shard.hpp | 18 +++++++++++++++ src/storage/v3/shard_rsm.hpp | 3 +++ src/storage/v3/shard_worker.hpp | 8 +++++++ 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index de5161572..294c99f65 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -18,10 +18,7 @@ #include <memory> #include <mutex> #include <optional> -#include <variant> -#include <bits/ranges_algo.h> -#include <gflags/gflags.h> #include <spdlog/spdlog.h> #include "io/network/endpoint.hpp" @@ -1045,6 +1042,42 @@ void Shard::StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name) { name_id_mapper_.StoreMapping(std::move(id_to_name)); } +std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { + if (vertices_.size() > 10000000) { + // Why should we care if the selected vertex is deleted + auto mid_elem = vertices_.begin(); + // mid_elem->first + std::ranges::advance(mid_elem, static_cast<VertexContainer::difference_type>(vertices_.size() / 2)); + return SplitInfo{shard_version_, mid_elem->first}; + } + return std::nullopt; +} + +SplitData Shard::PerformSplit(const PrimaryKey &split_key) { + SplitData data; + data.vertices = std::map(vertices_.find(split_key), vertices_.end()); + data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; + + // Get all edges related with those vertices + if (config_.items.properties_on_edges) { + data.edges = std::invoke([&split_vertices = data.vertices]() { + // How to reserve? + EdgeContainer split_edges; + for (const auto &vertex : split_vertices) { + for (const auto &in_edge : vertex.second.in_edges) { + auto edge = std::get<2>(in_edge).ptr; + split_edges.insert(edge->gid, Edge{.gid = edge->gid, .delta = edge->delta, .properties = edge->properties}); + } + } + return split_edges; + }); + } + // TODO We also need to send ongoing transactions to the shard + // since they own deltas + + return data; +} + bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { return vertex_id.primary_label == primary_label_ && vertex_id.primary_key >= min_primary_key_ && (!max_primary_key_.has_value() || vertex_id.primary_key < *max_primary_key_); diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index ed170fd55..88849184d 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -174,6 +174,19 @@ struct SchemasInfo { Schemas::SchemasList schemas; }; +struct SplitInfo { + uint64_t shard_version; + PrimaryKey split_point; +}; + +// If edge properties-on-edges is false then we don't need to send edges but +// only vertices, since they will contain those edges +struct SplitData { + VertexContainer vertices; + std::optional<EdgeContainer> edges; + IndicesInfo indices_info; +}; + /// Structure used to return information about the storage. struct StorageInfo { uint64_t vertex_count; @@ -356,6 +369,10 @@ class Shard final { void StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name); + std::optional<SplitInfo> ShouldSplit() const noexcept; + + SplitData PerformSplit(const PrimaryKey &split_key); + private: Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); @@ -373,6 +390,7 @@ class Shard final { // list is used only when properties are enabled for edges. Because of that we // keep a separate count of edges that is always updated. uint64_t edge_count_{0}; + uint64_t shard_version_{0}; SchemaValidator schema_validator_; VertexValidator vertex_validator_; diff --git a/src/storage/v3/shard_rsm.hpp b/src/storage/v3/shard_rsm.hpp index d301bf40b..ba284a3ca 100644 --- a/src/storage/v3/shard_rsm.hpp +++ b/src/storage/v3/shard_rsm.hpp @@ -12,6 +12,7 @@ #pragma once #include <memory> +#include <optional> #include <variant> #include <openssl/ec.h> @@ -41,6 +42,8 @@ class ShardRsm { public: explicit ShardRsm(std::unique_ptr<Shard> &&shard) : shard_(std::move(shard)){}; + std::optional<SplitInfo> ShouldSplit() const noexcept { return shard_->ShouldSplit(); } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) msgs::ReadResponses Read(msgs::ReadRequests requests) { return std::visit([&](auto &&request) mutable { return HandleRead(std::forward<decltype(request)>(request)); }, diff --git a/src/storage/v3/shard_worker.hpp b/src/storage/v3/shard_worker.hpp index 547aa0a6f..dcdc6ee13 100644 --- a/src/storage/v3/shard_worker.hpp +++ b/src/storage/v3/shard_worker.hpp @@ -173,6 +173,14 @@ class ShardWorker { auto &rsm = rsm_map_.at(uuid); Time next_for_uuid = rsm.Cron(); + // Check if shard should split + if (const auto split_info = rsm.ShouldSplit(); split_info) { + // Request split from coordinator + // split_point => middle pk + // shard_id => uuid + // shard_version => + } + cron_schedule_.pop(); cron_schedule_.push(std::make_pair(next_for_uuid, uuid)); } else { From bf21cbc9a9deec0618ce94089a63b49ae2e1c23f Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 16 Dec 2022 09:13:07 +0100 Subject: [PATCH 05/79] Split vetrices, edges and transactions --- src/storage/v3/delta.hpp | 6 +++ src/storage/v3/shard.cpp | 82 +++++++++++++++++++++++++++++++--------- src/storage/v3/shard.hpp | 10 +++++ 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 1c6e57d2b..69da63e77 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -12,6 +12,9 @@ #pragma once #include <memory> + +#include <boost/uuid/uuid.hpp> + #include "storage/v3/edge_ref.hpp" #include "storage/v3/id_types.hpp" #include "storage/v3/property_value.hpp" @@ -129,6 +132,9 @@ inline bool operator==(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer inline bool operator!=(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer &b) { return !(a == b); } struct Delta { + // Needed for splits + boost::uuids::uuid uuid; + enum class Action { // Used for both Vertex and Edge DELETE_OBJECT, diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 294c99f65..305ef8650 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -23,6 +23,8 @@ #include "io/network/endpoint.hpp" #include "io/time.hpp" +#include "storage/v3/delta.hpp" +#include "storage/v3/edge.hpp" #include "storage/v3/edge_accessor.hpp" #include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" @@ -1053,27 +1055,73 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { return std::nullopt; } +void CollectDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const { + while (delta != nullptr) { + collected_transactions_start_id.insert(delta->command_id); + delta = delta->next; + } +} + +VertexContainer Shard::CollectVertices(std::set<uint64_t> &collected_transactions_start_id, + const PrimaryKey &split_key) { + VertexContainer splitted_data; + auto split_key_it = vertices_.find(split_key); + + for (; split_key_it != vertices_.end(); split_key_it++) { + // Go through deltas and pick up transactions start_id + CollectDeltas(collected_transactions_start_id, split_key_it->second.delta); + splitted_data.insert(vertices_.extract(split_key_it->first)); + } + return splitted_data; +} + +std::optional<EdgeContainer> Shard::CollectEdges(std::set<uint64_t> &collected_transactions_start_id, + const VertexContainer &split_vertices) const { + if (!config_.items.properties_on_edges) { + return std::nullopt; + } + EdgeContainer splitted_edges; + // TODO This copies edges without removing the unecessary ones!! + for (const auto &vertex : split_vertices) { + for (const auto &in_edge : vertex.second.in_edges) { + // This is safe since if properties_on_edges is true, the this must be a + // ptr + auto *edge = std::get<2>(in_edge).ptr; + CollectDeltas(collected_transactions_start_id, edge->delta); + + splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); + } + for (const auto &in_edge : vertex.second.out_edges) { + auto *edge = std::get<2>(in_edge).ptr; + CollectDeltas(collected_transactions_start_id, edge->delta); + + splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); + } + } + return splitted_edges; +} + +std::list<Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) const { + std::list<Transaction> transactions; + for (const auto commit_start : collected_transactions_start_id) { + transactions.push_back(*start_logical_id_to_transaction_[commit_start]); + } + return transactions; +} + SplitData Shard::PerformSplit(const PrimaryKey &split_key) { SplitData data; - data.vertices = std::map(vertices_.find(split_key), vertices_.end()); + std::set<uint64_t> collected_transactions_start_id; + // Split Vertices + data.vertices = CollectVertices(collected_transactions_start_id, split_key); + // Resolve the deltas that were left on the shard, and are not referenced by + // neither of vertices + data.edges = CollectEdges(collected_transactions_start_id, data.vertices); data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; + // TODO Iterate over vertices and edges to replace their deltas with new ones tha are copied over + // use uuid - // Get all edges related with those vertices - if (config_.items.properties_on_edges) { - data.edges = std::invoke([&split_vertices = data.vertices]() { - // How to reserve? - EdgeContainer split_edges; - for (const auto &vertex : split_vertices) { - for (const auto &in_edge : vertex.second.in_edges) { - auto edge = std::get<2>(in_edge).ptr; - split_edges.insert(edge->gid, Edge{.gid = edge->gid, .delta = edge->delta, .properties = edge->properties}); - } - } - return split_edges; - }); - } - // TODO We also need to send ongoing transactions to the shard - // since they own deltas + data.transactions = CollectTransactions(collected_transactions_start_id); return data; } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 88849184d..ec9104ed6 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -185,6 +185,7 @@ struct SplitData { VertexContainer vertices; std::optional<EdgeContainer> edges; IndicesInfo indices_info; + std::list<Transaction> transactions; }; /// Structure used to return information about the storage. @@ -374,6 +375,15 @@ class Shard final { SplitData PerformSplit(const PrimaryKey &split_key); private: + void CollectDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; + + std::list<Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) const; + + VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); + + std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, + const VertexContainer &split_vertices) const; + Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); uint64_t CommitTimestamp(std::optional<uint64_t> desired_commit_timestamp = {}); From 7ef4114835e55d911dbc5ee79e5a26e14673ba56 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 9 Jan 2023 16:11:10 +0100 Subject: [PATCH 06/79] Fix splitting of edges --- src/storage/v3/shard.cpp | 78 +++++++++++++++++++-------------- src/storage/v3/shard.hpp | 34 +++++++++++--- src/storage/v3/shard_worker.hpp | 14 +++--- src/storage/v3/transaction.hpp | 18 +++++++- 4 files changed, 98 insertions(+), 46 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 305ef8650..86529ad5a 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -19,6 +19,7 @@ #include <mutex> #include <optional> +#include <bits/ranges_algo.h> #include <spdlog/spdlog.h> #include "io/network/endpoint.hpp" @@ -1055,7 +1056,7 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { return std::nullopt; } -void CollectDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const { +void Shard::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const { while (delta != nullptr) { collected_transactions_start_id.insert(delta->command_id); delta = delta->next; @@ -1069,59 +1070,72 @@ VertexContainer Shard::CollectVertices(std::set<uint64_t> &collected_transaction for (; split_key_it != vertices_.end(); split_key_it++) { // Go through deltas and pick up transactions start_id - CollectDeltas(collected_transactions_start_id, split_key_it->second.delta); + ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); splitted_data.insert(vertices_.extract(split_key_it->first)); } return splitted_data; } std::optional<EdgeContainer> Shard::CollectEdges(std::set<uint64_t> &collected_transactions_start_id, - const VertexContainer &split_vertices) const { + const VertexContainer &split_vertices, const PrimaryKey &split_key) { if (!config_.items.properties_on_edges) { return std::nullopt; } EdgeContainer splitted_edges; - // TODO This copies edges without removing the unecessary ones!! + const auto split_edges_from_vertex = [&](const auto &edges_ref) { + // This is safe since if properties_on_edges is true, the this must be a + // ptr + for (const auto &edge_ref : edges_ref) { + auto *edge = std::get<2>(edge_ref).ptr; + const auto &other_vtx = std::get<1>(edge_ref); + ScanDeltas(collected_transactions_start_id, edge->delta); + // Check if src and dest edge are both on splitted shard + // so we know if we should remove orphan edge + if (other_vtx.primary_key >= split_key) { + // Remove edge from shard + splitted_edges.insert(edges_.extract(edge->gid)); + } else { + splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); + } + } + }; + for (const auto &vertex : split_vertices) { - for (const auto &in_edge : vertex.second.in_edges) { - // This is safe since if properties_on_edges is true, the this must be a - // ptr - auto *edge = std::get<2>(in_edge).ptr; - CollectDeltas(collected_transactions_start_id, edge->delta); - - splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); - } - for (const auto &in_edge : vertex.second.out_edges) { - auto *edge = std::get<2>(in_edge).ptr; - CollectDeltas(collected_transactions_start_id, edge->delta); - - splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); - } + split_edges_from_vertex(vertex.second.in_edges); + split_edges_from_vertex(vertex.second.out_edges); } return splitted_edges; } -std::list<Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) const { - std::list<Transaction> transactions; - for (const auto commit_start : collected_transactions_start_id) { - transactions.push_back(*start_logical_id_to_transaction_[commit_start]); - } +std::map<uint64_t, std::unique_ptr<Transaction>> Shard::CollectTransactions( + const std::set<uint64_t> &collected_transactions_start_id) { + std::map<uint64_t, std::unique_ptr<Transaction>> transactions; + // for (const auto commit_start : collected_transactions_start_id) { + // transactions.insert( + // {commit_start, std::make_unique<Transaction>(*start_logical_id_to_transaction_.at(commit_start))}); + // } return transactions; } SplitData Shard::PerformSplit(const PrimaryKey &split_key) { SplitData data; std::set<uint64_t> collected_transactions_start_id; - // Split Vertices data.vertices = CollectVertices(collected_transactions_start_id, split_key); - // Resolve the deltas that were left on the shard, and are not referenced by - // neither of vertices - data.edges = CollectEdges(collected_transactions_start_id, data.vertices); - data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; - // TODO Iterate over vertices and edges to replace their deltas with new ones tha are copied over - // use uuid + data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); + // TODO indices wont work since timestamp cannot be replicated + // data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; - data.transactions = CollectTransactions(collected_transactions_start_id); + // data.transactions = CollectTransactions(collected_transactions_start_id); + + // Update delta addresses with the new addresses + // for (auto &vertex : data.vertices) { + // AdjustSplittedDataDeltas(vertex.second, data.transactions); + // } + // if (data.edges) { + // for (auto &edge : data.edges) { + // AdjustSplittedDataDeltas(edge, data.transactions); + // } + // } return data; } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index ec9104ed6..cab92eae9 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -185,7 +185,7 @@ struct SplitData { VertexContainer vertices; std::optional<EdgeContainer> edges; IndicesInfo indices_info; - std::list<Transaction> transactions; + std::map<uint64_t, Transaction> transactions; }; /// Structure used to return information about the storage. @@ -375,14 +375,36 @@ class Shard final { SplitData PerformSplit(const PrimaryKey &split_key); private: - void CollectDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; + template <typename TObj> + requires utils::SameAsAnyOf<TObj, Edge, VertexData> + void AdjustSplittedDataDeltas(TObj &delta_holder, const std::map<int64_t, Transaction> &transactions) { + auto *delta_chain = delta_holder.delta; + Delta *new_delta_chain{nullptr}; + while (delta_chain != nullptr) { + auto &transaction = transactions.at(delta_chain->command_id); + // This is the address of corresponding delta + const auto transaction_delta_it = std::ranges::find_if( + transaction->deltas, [delta_uuid = delta_chain->uuid](const auto &elem) { return elem.uuid == delta_uuid; }); + // Add this delta to the new chain + if (new_delta_chain == nullptr) { + new_delta_chain = &*transaction_delta_it; + } else { + new_delta_chain->next = &*transaction_delta_it; + } + delta_chain = delta_chain->next; + } + delta_holder.delta = new_delta_chain; + } - std::list<Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) const; + void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; + + std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( + const std::set<uint64_t> &collected_transactions_start_id); VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, - const VertexContainer &split_vertices) const; + const VertexContainer &split_vertices, const PrimaryKey &split_key); Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); @@ -391,7 +413,7 @@ class Shard final { // Main object storage NameIdMapper name_id_mapper_; LabelId primary_label_; - // The shard's range is [min, max) + // The shard's range is [min, max> PrimaryKey min_primary_key_; std::optional<PrimaryKey> max_primary_key_; VertexContainer vertices_; diff --git a/src/storage/v3/shard_worker.hpp b/src/storage/v3/shard_worker.hpp index dcdc6ee13..e3d57964f 100644 --- a/src/storage/v3/shard_worker.hpp +++ b/src/storage/v3/shard_worker.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -174,12 +174,12 @@ class ShardWorker { Time next_for_uuid = rsm.Cron(); // Check if shard should split - if (const auto split_info = rsm.ShouldSplit(); split_info) { - // Request split from coordinator - // split_point => middle pk - // shard_id => uuid - // shard_version => - } + // if (const auto split_info = rsm.ShouldSplit(); split_info) { + // Request split from coordinator + // split_point => middle pk + // shard_id => uuid + // shard_version => + // } cron_schedule_.pop(); cron_schedule_.push(std::make_pair(next_for_uuid, uuid)); diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 229e071b7..e0ed59290 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -31,6 +31,16 @@ struct CommitInfo { }; struct Transaction { + Transaction(coordinator::Hlc start_timestamp, CommitInfo commit_info, uint64_t command_id, + const std::list<Delta> &deltas, bool must_abort, bool is_aborted, IsolationLevel isolation_level) + : start_timestamp{start_timestamp}, + commit_info{std::make_unique<CommitInfo>(commit_info)}, + command_id(command_id), + deltas(CopyDeltas(deltas)), + must_abort(must_abort), + is_aborted(is_aborted), + isolation_level(isolation_level){}; + Transaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level) : start_timestamp(start_timestamp), commit_info(std::make_unique<CommitInfo>(CommitInfo{false, {start_timestamp}})), @@ -54,6 +64,12 @@ struct Transaction { ~Transaction() {} + std::list<Delta> CopyDeltas(const std::list<Delta> &deltas) const { return std::list<Delta>{}; } + + Transaction Clone() const { + return {start_timestamp, *commit_info, command_id, deltas, must_abort, is_aborted, isolation_level}; + } + coordinator::Hlc start_timestamp; std::unique_ptr<CommitInfo> commit_info; uint64_t command_id; From 9dc16deae2ff9e6953631f0548a4816877fed85e Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 10 Jan 2023 16:42:45 +0100 Subject: [PATCH 07/79] Copy deltas --- src/storage/v3/transaction.hpp | 52 ++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index e0ed59290..6b208877a 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -31,12 +31,12 @@ struct CommitInfo { }; struct Transaction { - Transaction(coordinator::Hlc start_timestamp, CommitInfo commit_info, uint64_t command_id, - const std::list<Delta> &deltas, bool must_abort, bool is_aborted, IsolationLevel isolation_level) + Transaction(coordinator::Hlc start_timestamp, CommitInfo new_commit_info, uint64_t command_id, bool must_abort, + bool is_aborted, IsolationLevel isolation_level) : start_timestamp{start_timestamp}, - commit_info{std::make_unique<CommitInfo>(commit_info)}, + commit_info{std::make_unique<CommitInfo>(new_commit_info)}, command_id(command_id), - deltas(CopyDeltas(deltas)), + deltas(CopyDeltas(commit_info.get())), must_abort(must_abort), is_aborted(is_aborted), isolation_level(isolation_level){}; @@ -64,10 +64,50 @@ struct Transaction { ~Transaction() {} - std::list<Delta> CopyDeltas(const std::list<Delta> &deltas) const { return std::list<Delta>{}; } + std::list<Delta> CopyDeltas(CommitInfo *commit_info) const { + std::list<Delta> copied_deltas; + for (const auto &delta : deltas) { + switch (delta.action) { + case Delta::Action::DELETE_OBJECT: + copied_deltas.emplace_back(Delta::DeleteObjectTag{}, commit_info, command_id); + break; + case Delta::Action::RECREATE_OBJECT: + copied_deltas.emplace_back(Delta::RecreateObjectTag{}, commit_info, command_id); + break; + case Delta::Action::ADD_LABEL: + copied_deltas.emplace_back(Delta::AddLabelTag{}, delta.label, commit_info, command_id); + break; + case Delta::Action::REMOVE_LABEL: + copied_deltas.emplace_back(Delta::RemoveLabelTag{}, delta.label, commit_info, command_id); + break; + case Delta::Action::ADD_IN_EDGE: + copied_deltas.emplace_back(Delta::AddInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, + delta.vertex_edge.edge, commit_info, command_id); + break; + case Delta::Action::ADD_OUT_EDGE: + copied_deltas.emplace_back(Delta::AddOutEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, + delta.vertex_edge.edge, commit_info, command_id); + break; + case Delta::Action::REMOVE_IN_EDGE: + copied_deltas.emplace_back(Delta::RemoveInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, + delta.vertex_edge.edge, commit_info, command_id); + break; + case Delta::Action::REMOVE_OUT_EDGE: + copied_deltas.emplace_back(Delta::RemoveOutEdgeTag{}, delta.vertex_edge.edge_type, + delta.vertex_edge.vertex_id, delta.vertex_edge.edge, commit_info, command_id); + break; + case Delta::Action::SET_PROPERTY: + copied_deltas.emplace_back(Delta::SetPropertyTag{}, delta.property.key, delta.property.value, commit_info, + command_id); + break; + } + } + return copied_deltas; + } + // This does not solve the whole problem of copying deltas Transaction Clone() const { - return {start_timestamp, *commit_info, command_id, deltas, must_abort, is_aborted, isolation_level}; + return {start_timestamp, *commit_info, command_id, must_abort, is_aborted, isolation_level}; } coordinator::Hlc start_timestamp; From 050d5efae73dbdb1537b7318276e6ff25825bb48 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 11 Jan 2023 14:16:41 +0100 Subject: [PATCH 08/79] Align deltas --- src/storage/v3/shard.cpp | 50 +++++++++++++++++++++++++++++----- src/storage/v3/shard.hpp | 8 ++++-- src/storage/v3/transaction.hpp | 1 + 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 86529ad5a..4ea89e6aa 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1107,13 +1107,49 @@ std::optional<EdgeContainer> Shard::CollectEdges(std::set<uint64_t> &collected_t return splitted_edges; } -std::map<uint64_t, std::unique_ptr<Transaction>> Shard::CollectTransactions( - const std::set<uint64_t> &collected_transactions_start_id) { - std::map<uint64_t, std::unique_ptr<Transaction>> transactions; - // for (const auto commit_start : collected_transactions_start_id) { - // transactions.insert( - // {commit_start, std::make_unique<Transaction>(*start_logical_id_to_transaction_.at(commit_start))}); - // } +void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, Transaction> &cloned_transactions) { + // Align next and prev in deltas + // NOTE It is important that the order of delta lists is in same order + auto delta_it = transaction.deltas.begin(); + auto cloned_delta_it = cloned_transaction.deltas.begin(); + while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { + MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); + // // We need to set prev and next on cloned_delta + // auto prev = delta_it->prev; + + // Find appropriate prev and delta->next for cloned deltas + auto *next = delta_it->next; + auto *cloned_next = &*cloned_delta_it; + while (next != nullptr) { + // No need to check we can be sure that it exists + cloned_next->next = &*std::ranges::find_if(cloned_transactions.at(next->command_id).deltas, + [next](const auto &delta) { return delta.uuid == next->uuid; }); + cloned_next = cloned_next->next; + next = next->next; + } + + ++delta_it; + ++cloned_delta_it; + } + MG_ASSERT(delta_it == transaction.deltas.end() && cloned_delta_it == cloned_transaction.deltas.end(), + "Both iterators must be exhausted!"); +} + +void Shard::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions) { + for (auto &[commit_start, cloned_transaction] : cloned_transactions) { + AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions); + } +} + +std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) { + std::map<uint64_t, Transaction> transactions; + for (const auto commit_start : collected_transactions_start_id) { + transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); + } + // It is necessary to clone all the transactions first so we have new addresses + // for deltas, before doing alignment + AlignClonedTransactions(transactions); return transactions; } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index cab92eae9..502278564 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -398,8 +398,12 @@ class Shard final { void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; - std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( - const std::set<uint64_t> &collected_transactions_start_id); + void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, Transaction> &cloned_transactions); + + void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions); + + std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id); VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 6b208877a..dba65067e 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -65,6 +65,7 @@ struct Transaction { ~Transaction() {} std::list<Delta> CopyDeltas(CommitInfo *commit_info) const { + // TODO This does not solve the next and prev deltas that also need to be set std::list<Delta> copied_deltas; for (const auto &delta : deltas) { switch (delta.action) { From 282725fd0f9258e34a07d22f29cc671870aec4ff Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 12 Jan 2023 14:23:19 +0100 Subject: [PATCH 09/79] Adjust pointers --- src/storage/v3/shard.cpp | 77 +++++++++++++++++++++++++--------------- src/storage/v3/shard.hpp | 9 +++-- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 4ea89e6aa..856906385 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1108,25 +1108,50 @@ std::optional<EdgeContainer> Shard::CollectEdges(std::set<uint64_t> &collected_t } void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions) { + std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { // Align next and prev in deltas // NOTE It is important that the order of delta lists is in same order auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); - // // We need to set prev and next on cloned_delta - // auto prev = delta_it->prev; - // Find appropriate prev and delta->next for cloned deltas - auto *next = delta_it->next; - auto *cloned_next = &*cloned_delta_it; - while (next != nullptr) { - // No need to check we can be sure that it exists - cloned_next->next = &*std::ranges::find_if(cloned_transactions.at(next->command_id).deltas, - [next](const auto &delta) { return delta.uuid == next->uuid; }); - cloned_next = cloned_next->next; - next = next->next; + // auto *prev = &delta_it->prev; + // auto *cloned_prev = &cloned_delta_it->prev; + + auto *delta = &*delta_it; + auto *cloned_delta = &*cloned_delta_it; + while (delta != nullptr) { + // Align delta + cloned_delta->next = &*std::ranges::find_if(cloned_transactions.at(delta->command_id).deltas, + [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + // Align prev ptr + auto ptr = delta->prev.Get(); + switch (ptr.type) { + case PreviousPtr::Type::NULLPTR: { + // noop + break; + } + case PreviousPtr::Type::DELTA: { + cloned_delta->prev.Set(ptr.delta); + break; + } + case PreviousPtr::Type::VERTEX: { + auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); + cloned_delta->prev.Set(cloned_vertex); + break; + } + case PreviousPtr::Type::EDGE: { + // TODO Case when there are no properties on edge is not handled + auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); + cloned_delta->prev.Set(&cloned_edge->second); + break; + } + }; + + cloned_delta = cloned_delta->next; + delta = delta->next; } ++delta_it; @@ -1136,20 +1161,24 @@ void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transa "Both iterators must be exhausted!"); } -void Shard::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions) { +void Shard::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions); + AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, + cloned_vertices, cloned_edges); } } -std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id) { +std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, + VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges) { std::map<uint64_t, Transaction> transactions; for (const auto commit_start : collected_transactions_start_id) { transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); } // It is necessary to clone all the transactions first so we have new addresses - // for deltas, before doing alignment - AlignClonedTransactions(transactions); + // for deltas, before doing alignment of deltas and prev_ptr + AlignClonedTransactions(transactions, cloned_vertices, cloned_edges); return transactions; } @@ -1158,21 +1187,11 @@ SplitData Shard::PerformSplit(const PrimaryKey &split_key) { std::set<uint64_t> collected_transactions_start_id; data.vertices = CollectVertices(collected_transactions_start_id, split_key); data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); + data.transactions = CollectTransactions(collected_transactions_start_id, data.vertices, *data.edges); + // TODO indices wont work since timestamp cannot be replicated // data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; - // data.transactions = CollectTransactions(collected_transactions_start_id); - - // Update delta addresses with the new addresses - // for (auto &vertex : data.vertices) { - // AdjustSplittedDataDeltas(vertex.second, data.transactions); - // } - // if (data.edges) { - // for (auto &edge : data.edges) { - // AdjustSplittedDataDeltas(edge, data.transactions); - // } - // } - return data; } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 502278564..5a92253f4 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -399,11 +399,14 @@ class Shard final { void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions); + std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); - void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions); + void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); - std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id); + std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); From a97d9945153a2af39e076ded76ade23c23ef92eb Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 12 Jan 2023 16:18:16 +0100 Subject: [PATCH 10/79] Fix vertice split --- src/storage/v3/delta.hpp | 5 +- src/storage/v3/shard.cpp | 6 +- tests/unit/CMakeLists.txt | 3 + tests/unit/storage_v3_shard_split.cpp | 87 +++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 tests/unit/storage_v3_shard_split.cpp diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 69da63e77..39c9975f6 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -133,7 +133,8 @@ inline bool operator!=(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer struct Delta { // Needed for splits - boost::uuids::uuid uuid; + // TODO Replace this with int identifier + boost::uuids::uuid uuid{boost::uuids::uuid()}; enum class Action { // Used for both Vertex and Edge diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 856906385..bf89ed58c 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1066,12 +1066,14 @@ void Shard::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delt VertexContainer Shard::CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key) { VertexContainer splitted_data; - auto split_key_it = vertices_.find(split_key); - for (; split_key_it != vertices_.end(); split_key_it++) { + auto split_key_it = vertices_.find(split_key); + while (split_key_it != vertices_.end()) { // Go through deltas and pick up transactions start_id ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); + auto next_it = std::next(split_key_it); splitted_data.insert(vertices_.extract(split_key_it->first)); + split_key_it = next_it; } return splitted_data; } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 5bfa26afd..cc23a3e5e 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -291,6 +291,9 @@ target_link_libraries(${test_prefix}storage_v3_expr mg-storage-v3 mg-expr) add_unit_test(storage_v3_schema.cpp) target_link_libraries(${test_prefix}storage_v3_schema mg-storage-v3) +add_unit_test(storage_v3_shard_split.cpp) +target_link_libraries(${test_prefix}storage_v3_shard_split mg-storage-v3 mg-query-v2) + # Test mg-query-v2 # These are commented out because of the new TypedValue in the query engine # add_unit_test(query_v2_interpreter.cpp ${CMAKE_SOURCE_DIR}/src/glue/v2/communication.cpp) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp new file mode 100644 index 000000000..d1bafb65c --- /dev/null +++ b/tests/unit/storage_v3_shard_split.cpp @@ -0,0 +1,87 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <gtest/gtest.h> +#include <cstdint> + +#include "query/v2/requests.hpp" +#include "storage/v3/id_types.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/property_value.hpp" +#include "storage/v3/shard.hpp" +#include "storage/v3/vertex_id.hpp" + +namespace memgraph::storage::v3::tests { + +class ShardSplitTest : public testing::Test { + protected: + void SetUp() override { storage.StoreMapping({{1, "label"}, {2, "property"}, {3, "edge_property"}}); } + + const PropertyId primary_property{PropertyId::FromUint(2)}; + std::vector<storage::v3::SchemaProperty> schema_property_vector = { + storage::v3::SchemaProperty{primary_property, common::SchemaType::INT}}; + const std::vector<PropertyValue> min_pk{PropertyValue{0}}; + const LabelId primary_label{LabelId::FromUint(1)}; + const EdgeTypeId edge_type_id{EdgeTypeId::FromUint(3)}; + Shard storage{primary_label, min_pk, std::nullopt /*max_primary_key*/, schema_property_vector}; + + coordinator::Hlc last_hlc{0, io::Time{}}; + + coordinator::Hlc GetNextHlc() { + ++last_hlc.logical_id; + last_hlc.coordinator_wall_clock += std::chrono::seconds(1); + return last_hlc; + } +}; + +TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + acc.Commit(GetNextHlc()); + storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 0); + EXPECT_EQ(splitted_data.transactions.size(), 0); +} + +TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + + acc.Commit(GetNextHlc()); + storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); +} + +} // namespace memgraph::storage::v3::tests From 791972a6b841d7f77df6c7e37948037950195692 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 12 Jan 2023 16:21:46 +0100 Subject: [PATCH 11/79] Fix edge split test --- tests/unit/storage_v3_shard_split.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index d1bafb65c..716734433 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -72,16 +72,19 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) .HasError()); EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(0)) + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) .HasError()); EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(0)) + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) .HasError()); acc.Commit(GetNextHlc()); storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 0); } } // namespace memgraph::storage::v3::tests From db13ee96b6ee8b33d9bef0026f6bdf5ee9933c11 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 12 Jan 2023 17:02:27 +0100 Subject: [PATCH 12/79] Fix transaction split --- src/storage/v3/shard.cpp | 11 +++++++---- src/storage/v3/transaction.hpp | 9 +++++---- tests/unit/storage_v3_shard_split.cpp | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index bf89ed58c..803678c8a 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1058,7 +1058,7 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { void Shard::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const { while (delta != nullptr) { - collected_transactions_start_id.insert(delta->command_id); + collected_transactions_start_id.insert(delta->commit_info->start_or_commit_timestamp.logical_id); delta = delta->next; } } @@ -1126,8 +1126,9 @@ void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transa auto *cloned_delta = &*cloned_delta_it; while (delta != nullptr) { // Align delta - cloned_delta->next = &*std::ranges::find_if(cloned_transactions.at(delta->command_id).deltas, - [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + cloned_delta->next = &*std::ranges::find_if( + cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, + [delta](const auto &elem) { return elem.uuid == delta->uuid; }); // Align prev ptr auto ptr = delta->prev.Get(); switch (ptr.type) { @@ -1140,6 +1141,8 @@ void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transa break; } case PreviousPtr::Type::VERTEX: { + // What if the vertex is already moved to garbage collection... + // Make test when you have deleted vertex auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); cloned_delta->prev.Set(cloned_vertex); break; @@ -1176,7 +1179,7 @@ std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64 EdgeContainer &cloned_edges) { std::map<uint64_t, Transaction> transactions; for (const auto commit_start : collected_transactions_start_id) { - transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); + transactions.insert({commit_start, start_logical_id_to_transaction_.at(commit_start)->Clone()}); } // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index dba65067e..6c396d969 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -31,12 +31,12 @@ struct CommitInfo { }; struct Transaction { - Transaction(coordinator::Hlc start_timestamp, CommitInfo new_commit_info, uint64_t command_id, bool must_abort, - bool is_aborted, IsolationLevel isolation_level) + Transaction(coordinator::Hlc start_timestamp, CommitInfo new_commit_info, std::list<Delta> deltas, + uint64_t command_id, bool must_abort, bool is_aborted, IsolationLevel isolation_level) : start_timestamp{start_timestamp}, commit_info{std::make_unique<CommitInfo>(new_commit_info)}, command_id(command_id), - deltas(CopyDeltas(commit_info.get())), + deltas(std::move(deltas)), must_abort(must_abort), is_aborted(is_aborted), isolation_level(isolation_level){}; @@ -108,7 +108,8 @@ struct Transaction { // This does not solve the whole problem of copying deltas Transaction Clone() const { - return {start_timestamp, *commit_info, command_id, must_abort, is_aborted, isolation_level}; + return {start_timestamp, *commit_info, CopyDeltas(commit_info.get()), command_id, must_abort, + is_aborted, isolation_level}; } coordinator::Hlc start_timestamp; diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 716734433..ebfca2921 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -87,4 +87,29 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.transactions.size(), 0); } +TEST_F(ShardSplitTest, TestBasicSplit) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) + .HasError()); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 1); +} + } // namespace memgraph::storage::v3::tests From 6de683d7f9b6367a2fcb6c24a3cf0db6e0a0bde3 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 16 Jan 2023 09:51:06 +0100 Subject: [PATCH 13/79] Ignore commited transactions --- src/storage/v3/shard.cpp | 5 ++++- tests/unit/storage_v3_shard_split.cpp | 29 ++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 803678c8a..d4191b4bb 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1179,7 +1179,10 @@ std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64 EdgeContainer &cloned_edges) { std::map<uint64_t, Transaction> transactions; for (const auto commit_start : collected_transactions_start_id) { - transactions.insert({commit_start, start_logical_id_to_transaction_.at(commit_start)->Clone()}); + // If it does not contain then the transaction has commited, and we ignore it + if (start_logical_id_to_transaction_.contains(commit_start)) { + transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); + } } // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index ebfca2921..1cca21d62 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -87,7 +87,7 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.transactions.size(), 0); } -TEST_F(ShardSplitTest, TestBasicSplit) { +TEST_F(ShardSplitTest, TestBasicSplitBeforeCommit) { auto acc = storage.Access(GetNextHlc()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); @@ -112,4 +112,31 @@ TEST_F(ShardSplitTest, TestBasicSplit) { EXPECT_EQ(splitted_data.transactions.size(), 1); } +TEST_F(ShardSplitTest, TestBasicSplitAfterCommit) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) + .HasError()); + + acc.Commit(GetNextHlc()); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 0); +} + } // namespace memgraph::storage::v3::tests From ff34bcf295d7897cd34170a006560951c29e4670 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 16 Jan 2023 13:44:57 +0100 Subject: [PATCH 14/79] Fix deltas from commited transacations --- src/storage/v3/shard.cpp | 16 ++++++++++----- tests/unit/storage_v3_shard_split.cpp | 29 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index d4191b4bb..ee205ad94 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1122,13 +1122,19 @@ void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transa // auto *prev = &delta_it->prev; // auto *cloned_prev = &cloned_delta_it->prev; - auto *delta = &*delta_it; + const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta != nullptr) { - // Align delta - cloned_delta->next = &*std::ranges::find_if( - cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, - [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + // Align delta, while ignoring deltas whose transactions have commited, + // or aborted + if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { + cloned_delta->next = &*std::ranges::find_if( + cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, + [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + } else { + delta = delta->next; + continue; + } // Align prev ptr auto ptr = delta->prev.Get(); switch (ptr.type) { diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 1cca21d62..54068955d 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -139,4 +139,33 @@ TEST_F(ShardSplitTest, TestBasicSplitAfterCommit) { EXPECT_EQ(splitted_data.transactions.size(), 0); } +TEST_F(ShardSplitTest, TestBasicSplitAfterCommit2) { + { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + acc.Commit(GetNextHlc()); + } + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) + .HasError()); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 1); +} + } // namespace memgraph::storage::v3::tests From 348b45360b257cf903bd35bf621a5105ebca9d2b Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 16 Jan 2023 14:54:06 +0100 Subject: [PATCH 15/79] Extract splitter --- src/storage/v3/CMakeLists.txt | 1 + src/storage/v3/shard.cpp | 156 +---------------------------- src/storage/v3/shard.hpp | 49 +-------- src/storage/v3/splitter.cpp | 183 ++++++++++++++++++++++++++++++++++ src/storage/v3/splitter.hpp | 70 +++++++++++++ 5 files changed, 259 insertions(+), 200 deletions(-) create mode 100644 src/storage/v3/splitter.cpp create mode 100644 src/storage/v3/splitter.hpp diff --git a/src/storage/v3/CMakeLists.txt b/src/storage/v3/CMakeLists.txt index b3a3d68a9..05f86a5dd 100644 --- a/src/storage/v3/CMakeLists.txt +++ b/src/storage/v3/CMakeLists.txt @@ -18,6 +18,7 @@ set(storage_v3_src_files bindings/typed_value.cpp expr.cpp vertex.cpp + splitter.cpp request_helper.cpp) # ###################### diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 932b0986d..7e99beaea 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -332,7 +332,8 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, - config_{config} { + config_{config}, + shard_splitter_(vertices_, edges_, start_logical_id_to_transaction_, config_) { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } @@ -1056,158 +1057,7 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { return std::nullopt; } -void Shard::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const { - while (delta != nullptr) { - collected_transactions_start_id.insert(delta->commit_info->start_or_commit_timestamp.logical_id); - delta = delta->next; - } -} - -VertexContainer Shard::CollectVertices(std::set<uint64_t> &collected_transactions_start_id, - const PrimaryKey &split_key) { - VertexContainer splitted_data; - - auto split_key_it = vertices_.find(split_key); - while (split_key_it != vertices_.end()) { - // Go through deltas and pick up transactions start_id - ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); - auto next_it = std::next(split_key_it); - splitted_data.insert(vertices_.extract(split_key_it->first)); - split_key_it = next_it; - } - return splitted_data; -} - -std::optional<EdgeContainer> Shard::CollectEdges(std::set<uint64_t> &collected_transactions_start_id, - const VertexContainer &split_vertices, const PrimaryKey &split_key) { - if (!config_.items.properties_on_edges) { - return std::nullopt; - } - EdgeContainer splitted_edges; - const auto split_edges_from_vertex = [&](const auto &edges_ref) { - // This is safe since if properties_on_edges is true, the this must be a - // ptr - for (const auto &edge_ref : edges_ref) { - auto *edge = std::get<2>(edge_ref).ptr; - const auto &other_vtx = std::get<1>(edge_ref); - ScanDeltas(collected_transactions_start_id, edge->delta); - // Check if src and dest edge are both on splitted shard - // so we know if we should remove orphan edge - if (other_vtx.primary_key >= split_key) { - // Remove edge from shard - splitted_edges.insert(edges_.extract(edge->gid)); - } else { - splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); - } - } - }; - - for (const auto &vertex : split_vertices) { - split_edges_from_vertex(vertex.second.in_edges); - split_edges_from_vertex(vertex.second.out_edges); - } - return splitted_edges; -} - -void Shard::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { - // Align next and prev in deltas - // NOTE It is important that the order of delta lists is in same order - auto delta_it = transaction.deltas.begin(); - auto cloned_delta_it = cloned_transaction.deltas.begin(); - while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { - MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); - // Find appropriate prev and delta->next for cloned deltas - // auto *prev = &delta_it->prev; - // auto *cloned_prev = &cloned_delta_it->prev; - - const auto *delta = &*delta_it; - auto *cloned_delta = &*cloned_delta_it; - while (delta != nullptr) { - // Align delta, while ignoring deltas whose transactions have commited, - // or aborted - if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { - cloned_delta->next = &*std::ranges::find_if( - cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, - [delta](const auto &elem) { return elem.uuid == delta->uuid; }); - } else { - delta = delta->next; - continue; - } - // Align prev ptr - auto ptr = delta->prev.Get(); - switch (ptr.type) { - case PreviousPtr::Type::NULLPTR: { - // noop - break; - } - case PreviousPtr::Type::DELTA: { - cloned_delta->prev.Set(ptr.delta); - break; - } - case PreviousPtr::Type::VERTEX: { - // What if the vertex is already moved to garbage collection... - // Make test when you have deleted vertex - auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); - cloned_delta->prev.Set(cloned_vertex); - break; - } - case PreviousPtr::Type::EDGE: { - // TODO Case when there are no properties on edge is not handled - auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); - cloned_delta->prev.Set(&cloned_edge->second); - break; - } - }; - - cloned_delta = cloned_delta->next; - delta = delta->next; - } - - ++delta_it; - ++cloned_delta_it; - } - MG_ASSERT(delta_it == transaction.deltas.end() && cloned_delta_it == cloned_transaction.deltas.end(), - "Both iterators must be exhausted!"); -} - -void Shard::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { - for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, - cloned_vertices, cloned_edges); - } -} - -std::map<uint64_t, Transaction> Shard::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, - VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges) { - std::map<uint64_t, Transaction> transactions; - for (const auto commit_start : collected_transactions_start_id) { - // If it does not contain then the transaction has commited, and we ignore it - if (start_logical_id_to_transaction_.contains(commit_start)) { - transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); - } - } - // It is necessary to clone all the transactions first so we have new addresses - // for deltas, before doing alignment of deltas and prev_ptr - AlignClonedTransactions(transactions, cloned_vertices, cloned_edges); - return transactions; -} - -SplitData Shard::PerformSplit(const PrimaryKey &split_key) { - SplitData data; - std::set<uint64_t> collected_transactions_start_id; - data.vertices = CollectVertices(collected_transactions_start_id, split_key); - data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); - data.transactions = CollectTransactions(collected_transactions_start_id, data.vertices, *data.edges); - - // TODO indices wont work since timestamp cannot be replicated - // data.indices_info = {indices_.label_index.ListIndices(), indices_.label_property_index.ListIndices()}; - - return data; -} +SplitData Shard::PerformSplit(const PrimaryKey &split_key) { return shard_splitter_.SplitShard(split_key); } bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { return vertex_id.primary_label == primary_label_ && vertex_id.primary_key >= min_primary_key_ && diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 3079f89ce..5b18eeda2 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -37,6 +37,7 @@ #include "storage/v3/result.hpp" #include "storage/v3/schema_validator.hpp" #include "storage/v3/schemas.hpp" +#include "storage/v3/splitter.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" #include "storage/v3/vertex_accessor.hpp" @@ -179,15 +180,6 @@ struct SplitInfo { PrimaryKey split_point; }; -// If edge properties-on-edges is false then we don't need to send edges but -// only vertices, since they will contain those edges -struct SplitData { - VertexContainer vertices; - std::optional<EdgeContainer> edges; - IndicesInfo indices_info; - std::map<uint64_t, Transaction> transactions; -}; - /// Structure used to return information about the storage. struct StorageInfo { uint64_t vertex_count; @@ -379,44 +371,6 @@ class Shard final { SplitData PerformSplit(const PrimaryKey &split_key); private: - template <typename TObj> - requires utils::SameAsAnyOf<TObj, Edge, VertexData> - void AdjustSplittedDataDeltas(TObj &delta_holder, const std::map<int64_t, Transaction> &transactions) { - auto *delta_chain = delta_holder.delta; - Delta *new_delta_chain{nullptr}; - while (delta_chain != nullptr) { - auto &transaction = transactions.at(delta_chain->command_id); - // This is the address of corresponding delta - const auto transaction_delta_it = std::ranges::find_if( - transaction->deltas, [delta_uuid = delta_chain->uuid](const auto &elem) { return elem.uuid == delta_uuid; }); - // Add this delta to the new chain - if (new_delta_chain == nullptr) { - new_delta_chain = &*transaction_delta_it; - } else { - new_delta_chain->next = &*transaction_delta_it; - } - delta_chain = delta_chain->next; - } - delta_holder.delta = new_delta_chain; - } - - void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) const; - - void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); - - void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); - - std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); - - VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); - - std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, - const VertexContainer &split_vertices, const PrimaryKey &split_key); - Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); uint64_t CommitTimestamp(std::optional<uint64_t> desired_commit_timestamp = {}); @@ -456,6 +410,7 @@ class Shard final { // Holds all of the (in progress, committed and aborted) transactions that are read or write to this shard, but // haven't been cleaned up yet std::map<uint64_t, std::unique_ptr<Transaction>> start_logical_id_to_transaction_{}; + Splitter shard_splitter_; bool has_any_transaction_aborted_since_last_gc{false}; }; diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp new file mode 100644 index 000000000..8969bfa08 --- /dev/null +++ b/src/storage/v3/splitter.cpp @@ -0,0 +1,183 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v3/splitter.hpp" + +#include <map> +#include <memory> +#include <set> + +#include "storage/v3/config.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/transaction.hpp" + +namespace memgraph::storage::v3 { + +Splitter::Splitter(VertexContainer &vertices, EdgeContainer &edges, + std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Config &config) + : vertices_(vertices), + edges_(edges), + start_logical_id_to_transaction_(start_logical_id_to_transaction), + config_(config) {} + +SplitData Splitter::SplitShard(const PrimaryKey &split_key) { + SplitData data; + std::set<uint64_t> collected_transactions_start_id; + data.vertices = CollectVertices(collected_transactions_start_id, split_key); + data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); + data.transactions = CollectTransactions(collected_transactions_start_id, data.vertices, *data.edges); + // TODO Indices + + return data; +} + +void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) { + while (delta != nullptr) { + collected_transactions_start_id.insert(delta->commit_info->start_or_commit_timestamp.logical_id); + delta = delta->next; + } +} + +VertexContainer Splitter::CollectVertices(std::set<uint64_t> &collected_transactions_start_id, + const PrimaryKey &split_key) { + VertexContainer splitted_data; + + auto split_key_it = vertices_.find(split_key); + while (split_key_it != vertices_.end()) { + // Go through deltas and pick up transactions start_id + ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); + auto next_it = std::next(split_key_it); + splitted_data.insert(vertices_.extract(split_key_it->first)); + split_key_it = next_it; + } + return splitted_data; +} + +std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collected_transactions_start_id, + const VertexContainer &split_vertices, + const PrimaryKey &split_key) { + if (!config_.items.properties_on_edges) { + return std::nullopt; + } + EdgeContainer splitted_edges; + const auto split_edges_from_vertex = [&](const auto &edges_ref) { + // This is safe since if properties_on_edges is true, the this must be a + // ptr + for (const auto &edge_ref : edges_ref) { + auto *edge = std::get<2>(edge_ref).ptr; + const auto &other_vtx = std::get<1>(edge_ref); + ScanDeltas(collected_transactions_start_id, edge->delta); + // Check if src and dest edge are both on splitted shard + // so we know if we should remove orphan edge + if (other_vtx.primary_key >= split_key) { + // Remove edge from shard + splitted_edges.insert(edges_.extract(edge->gid)); + } else { + splitted_edges.insert({edge->gid, Edge{edge->gid, edge->delta}}); + } + } + }; + + for (const auto &vertex : split_vertices) { + split_edges_from_vertex(vertex.second.in_edges); + split_edges_from_vertex(vertex.second.out_edges); + } + return splitted_edges; +} + +std::map<uint64_t, Transaction> Splitter::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, + VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges) { + std::map<uint64_t, Transaction> transactions; + for (const auto commit_start : collected_transactions_start_id) { + // If it does not contain then the transaction has commited, and we ignore it + if (start_logical_id_to_transaction_.contains(commit_start)) { + transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); + } + } + // It is necessary to clone all the transactions first so we have new addresses + // for deltas, before doing alignment of deltas and prev_ptr + AlignClonedTransactions(transactions, cloned_vertices, cloned_edges); + return transactions; +} + +void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + // Align next and prev in deltas + // NOTE It is important that the order of delta lists is in same order + auto delta_it = transaction.deltas.begin(); + auto cloned_delta_it = cloned_transaction.deltas.begin(); + while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { + MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); + // Find appropriate prev and delta->next for cloned deltas + // auto *prev = &delta_it->prev; + // auto *cloned_prev = &cloned_delta_it->prev; + + const auto *delta = &*delta_it; + auto *cloned_delta = &*cloned_delta_it; + while (delta != nullptr) { + // Align delta, while ignoring deltas whose transactions have commited, + // or aborted + if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { + cloned_delta->next = &*std::ranges::find_if( + cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, + [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + } else { + delta = delta->next; + continue; + } + // Align prev ptr + auto ptr = delta->prev.Get(); + switch (ptr.type) { + case PreviousPtr::Type::NULLPTR: { + // noop + break; + } + case PreviousPtr::Type::DELTA: { + cloned_delta->prev.Set(ptr.delta); + break; + } + case PreviousPtr::Type::VERTEX: { + // What if the vertex is already moved to garbage collection... + // Make test when you have deleted vertex + auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); + cloned_delta->prev.Set(cloned_vertex); + break; + } + case PreviousPtr::Type::EDGE: { + // TODO Case when there are no properties on edge is not handled + auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); + cloned_delta->prev.Set(&cloned_edge->second); + break; + } + }; + + cloned_delta = cloned_delta->next; + delta = delta->next; + } + + ++delta_it; + ++cloned_delta_it; + } + MG_ASSERT(delta_it == transaction.deltas.end() && cloned_delta_it == cloned_transaction.deltas.end(), + "Both iterators must be exhausted!"); +} + +void Splitter::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + for (auto &[commit_start, cloned_transaction] : cloned_transactions) { + AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, + cloned_vertices, cloned_edges); + } +} + +} // namespace memgraph::storage::v3 diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp new file mode 100644 index 000000000..bb5909ef1 --- /dev/null +++ b/src/storage/v3/splitter.hpp @@ -0,0 +1,70 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <map> +#include <memory> +#include <optional> +#include <set> + +#include "storage/v3/config.hpp" +#include "storage/v3/delta.hpp" +#include "storage/v3/edge.hpp" +#include "storage/v3/transaction.hpp" +#include "storage/v3/vertex.hpp" + +namespace memgraph::storage::v3 { + +// If edge properties-on-edges is false then we don't need to send edges but +// only vertices, since they will contain those edges +struct SplitData { + VertexContainer vertices; + std::optional<EdgeContainer> edges; + std::map<uint64_t, Transaction> transactions; +}; + +class Splitter final { + public: + Splitter(VertexContainer &vertices, EdgeContainer &edges, + std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Config &config); + + Splitter(const Splitter &) = delete; + Splitter(Splitter &&) noexcept = delete; + Splitter &operator=(const Splitter &) = delete; + Splitter operator=(Splitter &&) noexcept = delete; + ~Splitter() = default; + + SplitData SplitShard(const PrimaryKey &split_key); + + private: + static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); + + void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); + + void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); + + std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + + VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); + + std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, + const VertexContainer &split_vertices, const PrimaryKey &split_key); + + VertexContainer &vertices_; + EdgeContainer &edges_; + std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction_; + Config &config_; +}; + +} // namespace memgraph::storage::v3 From 859dfb28ebce69bb44a7a3a5017cfb235489ad36 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 18 Jan 2023 13:41:54 +0100 Subject: [PATCH 16/79] Add index split --- src/storage/v3/indices.hpp | 16 ++++++- src/storage/v3/shard.cpp | 2 +- src/storage/v3/splitter.cpp | 96 ++++++++++++++++++++++++++++++++----- src/storage/v3/splitter.hpp | 37 +++++++++----- 4 files changed, 125 insertions(+), 26 deletions(-) diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 4c4e70bdf..394d98ce5 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -30,6 +30,7 @@ namespace memgraph::storage::v3 { struct Indices; class LabelIndex { + public: struct Entry { Vertex *vertex; uint64_t timestamp; @@ -40,12 +41,15 @@ class LabelIndex { bool operator==(const Entry &rhs) const { return vertex == rhs.vertex && timestamp == rhs.timestamp; } }; - public: using LabelIndexContainer = std::set<Entry>; LabelIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) : indices_(indices), config_(config), vertex_validator_{&vertex_validator} {} + LabelIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator, + std::map<LabelId, LabelIndexContainer> &data) + : index_{std::move(data)}, indices_(indices), config_(config), vertex_validator_{&vertex_validator} {} + /// @throw std::bad_alloc void UpdateOnAddLabel(LabelId label, Vertex *vertex, const Transaction &tx); @@ -114,6 +118,10 @@ class LabelIndex { void Clear() { index_.clear(); } + [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } + + std::map<LabelId, LabelIndexContainer> &GetIndex() { return index_; } + private: std::map<LabelId, LabelIndexContainer> index_; Indices *indices_; @@ -122,6 +130,7 @@ class LabelIndex { }; class LabelPropertyIndex { + public: struct Entry { PropertyValue value; Vertex *vertex; @@ -134,7 +143,6 @@ class LabelPropertyIndex { bool operator==(const PropertyValue &rhs) const; }; - public: using LabelPropertyIndexContainer = std::set<Entry>; LabelPropertyIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) @@ -229,6 +237,10 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } + [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } + + std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndexContainer> &GetIndex() { return index_; } + private: std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndexContainer> index_; Indices *indices_; diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 7e99beaea..3d5d579f1 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -333,7 +333,7 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, config_{config}, - shard_splitter_(vertices_, edges_, start_logical_id_to_transaction_, config_) { + shard_splitter_(vertices_, edges_, start_logical_id_to_transaction_, indices_, config_) { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 8969bfa08..bc076a18b 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -13,28 +13,33 @@ #include <map> #include <memory> +#include <optional> #include <set> #include "storage/v3/config.hpp" +#include "storage/v3/indices.hpp" #include "storage/v3/key_store.hpp" #include "storage/v3/transaction.hpp" +#include "storage/v3/vertex.hpp" namespace memgraph::storage::v3 { Splitter::Splitter(VertexContainer &vertices, EdgeContainer &edges, - std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Config &config) + std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, + Config &config) : vertices_(vertices), edges_(edges), start_logical_id_to_transaction_(start_logical_id_to_transaction), + indices_(indices), config_(config) {} SplitData Splitter::SplitShard(const PrimaryKey &split_key) { SplitData data; + std::set<uint64_t> collected_transactions_start_id; - data.vertices = CollectVertices(collected_transactions_start_id, split_key); + data.vertices = CollectVertices(data, collected_transactions_start_id, split_key); data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); data.transactions = CollectTransactions(collected_transactions_start_id, data.vertices, *data.edges); - // TODO Indices return data; } @@ -46,16 +51,85 @@ void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, D } } -VertexContainer Splitter::CollectVertices(std::set<uint64_t> &collected_transactions_start_id, - const PrimaryKey &split_key) { - VertexContainer splitted_data; +std::map<LabelId, LabelIndex::LabelIndexContainer> Splitter::CollectLabelIndices( + const PrimaryKey &split_key, + std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> &vertex_entry_map) { + if (indices_.label_index.Empty()) { + return {}; + } + // Space O(i * n/2 * 2), i number of indexes, n number of vertices + std::map<LabelId, LabelIndex::LabelIndexContainer> cloned_indices; + for (auto &[label, index] : indices_.label_index.GetIndex()) { + for (const auto &entry : index) { + if (entry.vertex->first > split_key) { + [[maybe_unused]] auto [it, inserted, node] = cloned_indices[label].insert(index.extract(entry)); + vertex_entry_map[label].insert({entry.vertex, &node.value()}); + } + } + } + + return cloned_indices; +} + +std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> +Splitter::CollectLabelPropertyIndices( + const PrimaryKey &split_key, + std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> + &vertex_entry_map) { + if (indices_.label_property_index.Empty()) { + return {}; + } + + std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> cloned_indices; + for (auto &[label_prop_pair, index] : indices_.label_property_index.GetIndex()) { + cloned_indices[label_prop_pair] = LabelPropertyIndex::LabelPropertyIndexContainer{}; + for (const auto &entry : index) { + if (entry.vertex->first > split_key) { + // We get this entry + [[maybe_unused]] const auto [it, inserted, node] = cloned_indices[label_prop_pair].insert(index.extract(entry)); + vertex_entry_map[label_prop_pair].insert({entry.vertex, &node.value()}); + } + } + } + + return cloned_indices; +} + +VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, + const PrimaryKey &split_key) { + // Collection of indices is here since it heavily depends on vertices + // Old vertex pointer new entry pointer + std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> label_index_vertex_entry_map; + std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> + label_property_vertex_entry_map; + data.label_indices = CollectLabelIndices(split_key, label_index_vertex_entry_map); + data.label_property_indices = CollectLabelPropertyIndices(split_key, label_property_vertex_entry_map); + const auto update_indices = [](auto &index_map, const auto *old_vertex_ptr, auto &splitted_vertex_it) { + for (auto &[label, vertex_entry_mappings] : index_map) { + auto [it, end] = vertex_entry_mappings.equal_range(old_vertex_ptr); + while (it != end) { + it->second->vertex = &*splitted_vertex_it; + ++it; + } + } + }; + + VertexContainer splitted_data; auto split_key_it = vertices_.find(split_key); while (split_key_it != vertices_.end()) { // Go through deltas and pick up transactions start_id ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); + + const auto *old_vertex_ptr = &*split_key_it; auto next_it = std::next(split_key_it); - splitted_data.insert(vertices_.extract(split_key_it->first)); + + const auto &[splitted_vertex_it, inserted, node] = splitted_data.insert(vertices_.extract(split_key_it->first)); + + // Update indices + update_indices(label_index_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); + update_indices(label_property_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); + split_key_it = next_it; } return splitted_data; @@ -68,7 +142,7 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte return std::nullopt; } EdgeContainer splitted_edges; - const auto split_edges_from_vertex = [&](const auto &edges_ref) { + const auto split_vertex_edges = [&](const auto &edges_ref) { // This is safe since if properties_on_edges is true, the this must be a // ptr for (const auto &edge_ref : edges_ref) { @@ -87,8 +161,8 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte }; for (const auto &vertex : split_vertices) { - split_edges_from_vertex(vertex.second.in_edges); - split_edges_from_vertex(vertex.second.out_edges); + split_vertex_edges(vertex.second.in_edges); + split_vertex_edges(vertex.second.out_edges); } return splitted_edges; } @@ -119,8 +193,6 @@ void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Tra while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); // Find appropriate prev and delta->next for cloned deltas - // auto *prev = &delta_it->prev; - // auto *cloned_prev = &cloned_delta_it->prev; const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index bb5909ef1..858071160 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -17,6 +17,7 @@ #include "storage/v3/config.hpp" #include "storage/v3/delta.hpp" #include "storage/v3/edge.hpp" +#include "storage/v3/indices.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" @@ -28,12 +29,15 @@ struct SplitData { VertexContainer vertices; std::optional<EdgeContainer> edges; std::map<uint64_t, Transaction> transactions; + std::map<LabelId, LabelIndex::LabelIndexContainer> label_indices; + std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> label_property_indices; }; class Splitter final { public: Splitter(VertexContainer &vertices, EdgeContainer &edges, - std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Config &config); + std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, + Config &config); Splitter(const Splitter &) = delete; Splitter(Splitter &&) noexcept = delete; @@ -44,26 +48,37 @@ class Splitter final { SplitData SplitShard(const PrimaryKey &split_key); private: - static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); - - void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); - - void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); - std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); - VertexContainer CollectVertices(std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); + VertexContainer CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, + const PrimaryKey &split_key); std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, const VertexContainer &split_vertices, const PrimaryKey &split_key); + std::map<LabelId, LabelIndex::LabelIndexContainer> CollectLabelIndices( + const PrimaryKey &split_key, + std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> &vertex_entry_map); + + std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> CollectLabelPropertyIndices( + const PrimaryKey &split_key, + std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> + &vertex_entry_map); + + static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); + + static void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + + void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); + VertexContainer &vertices_; EdgeContainer &edges_; std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction_; + Indices &indices_; Config &config_; }; From 7d0e885f9a111a65b37848389d583f2cb3d62bd4 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 18 Jan 2023 14:54:27 +0100 Subject: [PATCH 17/79] Unify CollectEntry method --- src/storage/v3/indices.cpp | 8 +++--- src/storage/v3/indices.hpp | 30 +++++++++++----------- src/storage/v3/splitter.cpp | 51 +++---------------------------------- src/storage/v3/splitter.hpp | 35 ++++++++++++++++++------- 4 files changed, 49 insertions(+), 75 deletions(-) diff --git a/src/storage/v3/indices.cpp b/src/storage/v3/indices.cpp index e0a42e176..931dd83af 100644 --- a/src/storage/v3/indices.cpp +++ b/src/storage/v3/indices.cpp @@ -325,7 +325,7 @@ void LabelIndex::RemoveObsoleteEntries(const uint64_t clean_up_before_timestamp) } } -LabelIndex::Iterable::Iterator::Iterator(Iterable *self, LabelIndexContainer::iterator index_iterator) +LabelIndex::Iterable::Iterator::Iterator(Iterable *self, IndexContainer::iterator index_iterator) : self_(self), index_iterator_(index_iterator), current_vertex_accessor_(nullptr, nullptr, nullptr, self_->config_, *self_->vertex_validator_), @@ -353,7 +353,7 @@ void LabelIndex::Iterable::Iterator::AdvanceUntilValid() { } } -LabelIndex::Iterable::Iterable(LabelIndexContainer &index_container, LabelId label, View view, Transaction *transaction, +LabelIndex::Iterable::Iterable(IndexContainer &index_container, LabelId label, View view, Transaction *transaction, Indices *indices, Config::Items config, const VertexValidator &vertex_validator) : index_container_(&index_container), label_(label), @@ -465,7 +465,7 @@ void LabelPropertyIndex::RemoveObsoleteEntries(const uint64_t clean_up_before_ti } } -LabelPropertyIndex::Iterable::Iterator::Iterator(Iterable *self, LabelPropertyIndexContainer::iterator index_iterator) +LabelPropertyIndex::Iterable::Iterator::Iterator(Iterable *self, IndexContainer::iterator index_iterator) : self_(self), index_iterator_(index_iterator), current_vertex_accessor_(nullptr, nullptr, nullptr, self_->config_, *self_->vertex_validator_), @@ -526,7 +526,7 @@ const PropertyValue kSmallestMap = PropertyValue(std::map<std::string, PropertyV const PropertyValue kSmallestTemporalData = PropertyValue(TemporalData{static_cast<TemporalType>(0), std::numeric_limits<int64_t>::min()}); -LabelPropertyIndex::Iterable::Iterable(LabelPropertyIndexContainer &index_container, LabelId label, PropertyId property, +LabelPropertyIndex::Iterable::Iterable(IndexContainer &index_container, LabelId label, PropertyId property, const std::optional<utils::Bound<PropertyValue>> &lower_bound, const std::optional<utils::Bound<PropertyValue>> &upper_bound, View view, Transaction *transaction, Indices *indices, Config::Items config, diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 394d98ce5..00d228df1 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -41,13 +41,13 @@ class LabelIndex { bool operator==(const Entry &rhs) const { return vertex == rhs.vertex && timestamp == rhs.timestamp; } }; - using LabelIndexContainer = std::set<Entry>; + using IndexContainer = std::set<Entry>; LabelIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) : indices_(indices), config_(config), vertex_validator_{&vertex_validator} {} LabelIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator, - std::map<LabelId, LabelIndexContainer> &data) + std::map<LabelId, IndexContainer> &data) : index_{std::move(data)}, indices_(indices), config_(config), vertex_validator_{&vertex_validator} {} /// @throw std::bad_alloc @@ -67,12 +67,12 @@ class LabelIndex { class Iterable { public: - Iterable(LabelIndexContainer &index_container, LabelId label, View view, Transaction *transaction, Indices *indices, + Iterable(IndexContainer &index_container, LabelId label, View view, Transaction *transaction, Indices *indices, Config::Items config, const VertexValidator &vertex_validator); class Iterator { public: - Iterator(Iterable *self, LabelIndexContainer::iterator index_iterator); + Iterator(Iterable *self, IndexContainer::iterator index_iterator); VertexAccessor operator*() const { return current_vertex_accessor_; } @@ -85,7 +85,7 @@ class LabelIndex { void AdvanceUntilValid(); Iterable *self_; - LabelIndexContainer::iterator index_iterator_; + IndexContainer::iterator index_iterator_; VertexAccessor current_vertex_accessor_; Vertex *current_vertex_; }; @@ -94,7 +94,7 @@ class LabelIndex { Iterator end() { return {this, index_container_->end()}; } private: - LabelIndexContainer *index_container_; + IndexContainer *index_container_; LabelId label_; View view_; Transaction *transaction_; @@ -120,10 +120,10 @@ class LabelIndex { [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } - std::map<LabelId, LabelIndexContainer> &GetIndex() { return index_; } + std::map<LabelId, IndexContainer> &GetIndex() { return index_; } private: - std::map<LabelId, LabelIndexContainer> index_; + std::map<LabelId, IndexContainer> index_; Indices *indices_; Config::Items config_; const VertexValidator *vertex_validator_; @@ -143,7 +143,7 @@ class LabelPropertyIndex { bool operator==(const PropertyValue &rhs) const; }; - using LabelPropertyIndexContainer = std::set<Entry>; + using IndexContainer = std::set<Entry>; LabelPropertyIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) : indices_(indices), config_(config), vertex_validator_{&vertex_validator} {} @@ -167,14 +167,14 @@ class LabelPropertyIndex { class Iterable { public: - Iterable(LabelPropertyIndexContainer &index_container, LabelId label, PropertyId property, + Iterable(IndexContainer &index_container, LabelId label, PropertyId property, const std::optional<utils::Bound<PropertyValue>> &lower_bound, const std::optional<utils::Bound<PropertyValue>> &upper_bound, View view, Transaction *transaction, Indices *indices, Config::Items config, const VertexValidator &vertex_validator); class Iterator { public: - Iterator(Iterable *self, LabelPropertyIndexContainer::iterator index_iterator); + Iterator(Iterable *self, IndexContainer::iterator index_iterator); VertexAccessor operator*() const { return current_vertex_accessor_; } @@ -187,7 +187,7 @@ class LabelPropertyIndex { void AdvanceUntilValid(); Iterable *self_; - LabelPropertyIndexContainer::iterator index_iterator_; + IndexContainer::iterator index_iterator_; VertexAccessor current_vertex_accessor_; Vertex *current_vertex_; }; @@ -196,7 +196,7 @@ class LabelPropertyIndex { Iterator end(); private: - LabelPropertyIndexContainer *index_container_; + IndexContainer *index_container_; LabelId label_; PropertyId property_; std::optional<utils::Bound<PropertyValue>> lower_bound_; @@ -239,10 +239,10 @@ class LabelPropertyIndex { [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } - std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndexContainer> &GetIndex() { return index_; } + std::map<std::pair<LabelId, PropertyId>, IndexContainer> &GetIndex() { return index_; } private: - std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndexContainer> index_; + std::map<std::pair<LabelId, PropertyId>, IndexContainer> index_; Indices *indices_; Config::Items config_; const VertexValidator *vertex_validator_; diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index bc076a18b..b7eb65ba8 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -51,51 +51,6 @@ void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, D } } -std::map<LabelId, LabelIndex::LabelIndexContainer> Splitter::CollectLabelIndices( - const PrimaryKey &split_key, - std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> &vertex_entry_map) { - if (indices_.label_index.Empty()) { - return {}; - } - - // Space O(i * n/2 * 2), i number of indexes, n number of vertices - std::map<LabelId, LabelIndex::LabelIndexContainer> cloned_indices; - for (auto &[label, index] : indices_.label_index.GetIndex()) { - for (const auto &entry : index) { - if (entry.vertex->first > split_key) { - [[maybe_unused]] auto [it, inserted, node] = cloned_indices[label].insert(index.extract(entry)); - vertex_entry_map[label].insert({entry.vertex, &node.value()}); - } - } - } - - return cloned_indices; -} - -std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> -Splitter::CollectLabelPropertyIndices( - const PrimaryKey &split_key, - std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> - &vertex_entry_map) { - if (indices_.label_property_index.Empty()) { - return {}; - } - - std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> cloned_indices; - for (auto &[label_prop_pair, index] : indices_.label_property_index.GetIndex()) { - cloned_indices[label_prop_pair] = LabelPropertyIndex::LabelPropertyIndexContainer{}; - for (const auto &entry : index) { - if (entry.vertex->first > split_key) { - // We get this entry - [[maybe_unused]] const auto [it, inserted, node] = cloned_indices[label_prop_pair].insert(index.extract(entry)); - vertex_entry_map[label_prop_pair].insert({entry.vertex, &node.value()}); - } - } - } - - return cloned_indices; -} - VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key) { // Collection of indices is here since it heavily depends on vertices @@ -103,8 +58,10 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> label_index_vertex_entry_map; std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> label_property_vertex_entry_map; - data.label_indices = CollectLabelIndices(split_key, label_index_vertex_entry_map); - data.label_property_indices = CollectLabelPropertyIndices(split_key, label_property_vertex_entry_map); + data.label_indices = + CollectIndexEntries<LabelIndex, LabelId>(indices_.label_index, split_key, label_index_vertex_entry_map); + data.label_property_indices = CollectIndexEntries<LabelPropertyIndex, std::pair<LabelId, PropertyId>>( + indices_.label_property_index, split_key, label_property_vertex_entry_map); const auto update_indices = [](auto &index_map, const auto *old_vertex_ptr, auto &splitted_vertex_it) { for (auto &[label, vertex_entry_mappings] : index_map) { auto [it, end] = vertex_entry_mappings.equal_range(old_vertex_ptr); diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 858071160..6ae550635 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -20,6 +20,7 @@ #include "storage/v3/indices.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" +#include "utils/concepts.hpp" namespace memgraph::storage::v3 { @@ -29,8 +30,8 @@ struct SplitData { VertexContainer vertices; std::optional<EdgeContainer> edges; std::map<uint64_t, Transaction> transactions; - std::map<LabelId, LabelIndex::LabelIndexContainer> label_indices; - std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> label_property_indices; + std::map<LabelId, LabelIndex::IndexContainer> label_indices; + std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::IndexContainer> label_property_indices; }; class Splitter final { @@ -57,14 +58,30 @@ class Splitter final { std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, const VertexContainer &split_vertices, const PrimaryKey &split_key); - std::map<LabelId, LabelIndex::LabelIndexContainer> CollectLabelIndices( - const PrimaryKey &split_key, - std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> &vertex_entry_map); + template <typename IndexMap, typename IndexType> + requires utils::SameAsAnyOf<IndexMap, LabelPropertyIndex, LabelIndex> + std::map<IndexType, typename IndexMap::IndexContainer> CollectIndexEntries( + IndexMap &index, const PrimaryKey &split_key, + std::map<IndexType, std::multimap<const Vertex *, typename IndexMap::Entry *>> &vertex_entry_map) { + if (index.Empty()) { + return {}; + } - std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::LabelPropertyIndexContainer> CollectLabelPropertyIndices( - const PrimaryKey &split_key, - std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> - &vertex_entry_map); + std::map<IndexType, typename IndexMap::IndexContainer> cloned_indices; + for (auto &[label_prop_pair, index] : index.GetIndex()) { + cloned_indices[label_prop_pair] = typename IndexMap::IndexContainer{}; + for (const auto &entry : index) { + if (entry.vertex->first > split_key) { + // We get this entry + [[maybe_unused]] const auto [it, inserted, node] = + cloned_indices[label_prop_pair].insert(index.extract(entry)); + vertex_entry_map[label_prop_pair].insert({entry.vertex, &node.value()}); + } + } + } + + return cloned_indices; + } static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); From 28624aaa88ed04c692232077208daf0ae0ec1108 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 18 Jan 2023 16:08:18 +0100 Subject: [PATCH 18/79] Copy all relevant transactions Copy all transactions which are relevant for entity MVCC value resolution. --- src/storage/v3/splitter.cpp | 47 ++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index b7eb65ba8..c6ba37abe 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -36,22 +36,22 @@ Splitter::Splitter(VertexContainer &vertices, EdgeContainer &edges, SplitData Splitter::SplitShard(const PrimaryKey &split_key) { SplitData data; - std::set<uint64_t> collected_transactions_start_id; - data.vertices = CollectVertices(data, collected_transactions_start_id, split_key); - data.edges = CollectEdges(collected_transactions_start_id, data.vertices, split_key); - data.transactions = CollectTransactions(collected_transactions_start_id, data.vertices, *data.edges); + std::set<uint64_t> collected_transactions_; + data.vertices = CollectVertices(data, collected_transactions_, split_key); + data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); + data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges); return data; } -void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta) { +void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, Delta *delta) { while (delta != nullptr) { - collected_transactions_start_id.insert(delta->commit_info->start_or_commit_timestamp.logical_id); + collected_transactions_.insert(delta->commit_info->start_or_commit_timestamp.logical_id); delta = delta->next; } } -VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, +VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_, const PrimaryKey &split_key) { // Collection of indices is here since it heavily depends on vertices // Old vertex pointer new entry pointer @@ -76,7 +76,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c auto split_key_it = vertices_.find(split_key); while (split_key_it != vertices_.end()) { // Go through deltas and pick up transactions start_id - ScanDeltas(collected_transactions_start_id, split_key_it->second.delta); + ScanDeltas(collected_transactions_, split_key_it->second.delta); const auto *old_vertex_ptr = &*split_key_it; auto next_it = std::next(split_key_it); @@ -92,7 +92,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c return splitted_data; } -std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collected_transactions_start_id, +std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collected_transactions_, const VertexContainer &split_vertices, const PrimaryKey &split_key) { if (!config_.items.properties_on_edges) { @@ -105,7 +105,7 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte for (const auto &edge_ref : edges_ref) { auto *edge = std::get<2>(edge_ref).ptr; const auto &other_vtx = std::get<1>(edge_ref); - ScanDeltas(collected_transactions_start_id, edge->delta); + ScanDeltas(collected_transactions_, edge->delta); // Check if src and dest edge are both on splitted shard // so we know if we should remove orphan edge if (other_vtx.primary_key >= split_key) { @@ -124,22 +124,33 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte return splitted_edges; } -std::map<uint64_t, Transaction> Splitter::CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, +std::map<uint64_t, Transaction> Splitter::CollectTransactions(const std::set<uint64_t> &collected_transactions_, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { std::map<uint64_t, Transaction> transactions; - for (const auto commit_start : collected_transactions_start_id) { - // If it does not contain then the transaction has commited, and we ignore it - if (start_logical_id_to_transaction_.contains(commit_start)) { + + for (const auto &[commit_start, transaction] : start_logical_id_to_transaction_) { + // We need all transaction whose deltas need to be resolved for any of the + // entities + if (collected_transactions_.contains(transaction->commit_info->start_or_commit_timestamp.logical_id)) { transactions.insert({commit_start, start_logical_id_to_transaction_[commit_start]->Clone()}); } } + // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr AlignClonedTransactions(transactions, cloned_vertices, cloned_edges); return transactions; } +void Splitter::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + for (auto &[commit_start, cloned_transaction] : cloned_transactions) { + AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, + cloned_vertices, cloned_edges); + } +} + void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { @@ -201,12 +212,4 @@ void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Tra "Both iterators must be exhausted!"); } -void Splitter::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { - for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, - cloned_vertices, cloned_edges); - } -} - } // namespace memgraph::storage::v3 From 0ae535739933188e5b9c6de9f0096a18edc820ec Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 19 Jan 2023 17:28:19 +0100 Subject: [PATCH 19/79] Add delta check to tests --- src/storage/v3/delta.hpp | 37 +++++--- src/storage/v3/splitter.cpp | 7 +- tests/unit/storage_v3_shard_split.cpp | 121 ++++++++++++++++++-------- 3 files changed, 113 insertions(+), 52 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 35c793482..cc8132dc3 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -132,11 +132,12 @@ inline bool operator==(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer inline bool operator!=(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer &b) { return !(a == b); } -struct Delta { - // Needed for splits - // TODO Replace this with int identifier - boost::uuids::uuid uuid{boost::uuids::uuid()}; +inline uint64_t GetNextDeltaUUID() noexcept { + static uint64_t uuid{0}; + return ++uuid; +} +struct Delta { enum class Action : uint8_t { // Used for both Vertex and Edge DELETE_OBJECT, @@ -166,24 +167,37 @@ struct Delta { struct RemoveOutEdgeTag {}; Delta(DeleteObjectTag /*unused*/, CommitInfo *commit_info, uint64_t command_id) - : action(Action::DELETE_OBJECT), commit_info(commit_info), command_id(command_id) {} + : action(Action::DELETE_OBJECT), uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id) {} Delta(RecreateObjectTag /*unused*/, CommitInfo *commit_info, uint64_t command_id) - : action(Action::RECREATE_OBJECT), commit_info(commit_info), command_id(command_id) {} + : action(Action::RECREATE_OBJECT), uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id) {} Delta(AddLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t command_id) - : action(Action::ADD_LABEL), commit_info(commit_info), command_id(command_id), label(label) {} + : action(Action::ADD_LABEL), + uuid(GetNextDeltaUUID()), + commit_info(commit_info), + command_id(command_id), + label(label) {} Delta(RemoveLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t command_id) - : action(Action::REMOVE_LABEL), commit_info(commit_info), command_id(command_id), label(label) {} + : action(Action::REMOVE_LABEL), + uuid(GetNextDeltaUUID()), + commit_info(commit_info), + command_id(command_id), + label(label) {} Delta(SetPropertyTag /*unused*/, PropertyId key, const PropertyValue &value, CommitInfo *commit_info, uint64_t command_id) - : action(Action::SET_PROPERTY), commit_info(commit_info), command_id(command_id), property({key, value}) {} + : action(Action::SET_PROPERTY), + uuid(GetNextDeltaUUID()), + commit_info(commit_info), + command_id(command_id), + property({key, value}) {} Delta(AddInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t command_id) : action(Action::ADD_IN_EDGE), + uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -191,6 +205,7 @@ struct Delta { Delta(AddOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t command_id) : action(Action::ADD_OUT_EDGE), + uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -198,6 +213,7 @@ struct Delta { Delta(RemoveInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t command_id) : action(Action::REMOVE_IN_EDGE), + uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -205,6 +221,7 @@ struct Delta { Delta(RemoveOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t command_id) : action(Action::REMOVE_OUT_EDGE), + uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -234,7 +251,7 @@ struct Delta { } Action action; - + uint64_t uuid; // TODO: optimize with in-place copy CommitInfo *commit_info; uint64_t command_id; diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index c6ba37abe..d84469d5a 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -159,18 +159,17 @@ void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Tra auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { - MG_ASSERT(delta_it->uuid == cloned_delta_it->uuid, "The order of deltas is not correct"); - // Find appropriate prev and delta->next for cloned deltas - const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta != nullptr) { // Align delta, while ignoring deltas whose transactions have commited, // or aborted if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { - cloned_delta->next = &*std::ranges::find_if( + auto *found_delta_it = &*std::ranges::find_if( cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, [delta](const auto &elem) { return elem.uuid == delta->uuid; }); + MG_ASSERT(found_delta_it, "Delta with given uuid must exist!"); + cloned_delta->next = &*found_delta_it; } else { delta = delta->next; continue; diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 54068955d..e6c9b897e 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -9,27 +9,41 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include <gtest/gtest.h> #include <cstdint> +#include <gmock/gmock-matchers.h> +#include <gmock/gmock.h> +#include <gtest/gtest.h> + #include "query/v2/requests.hpp" +#include "storage/v3/delta.hpp" #include "storage/v3/id_types.hpp" #include "storage/v3/key_store.hpp" +#include "storage/v3/mvcc.hpp" #include "storage/v3/property_value.hpp" #include "storage/v3/shard.hpp" +#include "storage/v3/vertex.hpp" #include "storage/v3/vertex_id.hpp" +using testing::Pair; +using testing::UnorderedElementsAre; + namespace memgraph::storage::v3::tests { class ShardSplitTest : public testing::Test { protected: - void SetUp() override { storage.StoreMapping({{1, "label"}, {2, "property"}, {3, "edge_property"}}); } + void SetUp() override { + storage.StoreMapping( + {{1, "label"}, {2, "property"}, {3, "edge_property"}, {4, "secondary_label"}, {5, "secondary_prop"}}); + } const PropertyId primary_property{PropertyId::FromUint(2)}; + const PropertyId secondary_property{PropertyId::FromUint(5)}; std::vector<storage::v3::SchemaProperty> schema_property_vector = { storage::v3::SchemaProperty{primary_property, common::SchemaType::INT}}; const std::vector<PropertyValue> min_pk{PropertyValue{0}}; const LabelId primary_label{LabelId::FromUint(1)}; + const LabelId secondary_label{LabelId::FromUint(4)}; const EdgeTypeId edge_type_id{EdgeTypeId::FromUint(3)}; Shard storage{primary_label, min_pk, std::nullopt /*max_primary_key*/, schema_property_vector}; @@ -42,21 +56,74 @@ class ShardSplitTest : public testing::Test { } }; +void AssertEqVertexContainer(const VertexContainer &expected, const VertexContainer &actual) { + ASSERT_EQ(expected.size(), actual.size()); + + auto expected_it = expected.begin(); + auto actual_it = actual.begin(); + while (expected_it != expected.end()) { + EXPECT_EQ(expected_it->first, actual_it->first); + EXPECT_EQ(expected_it->second.deleted, actual_it->second.deleted); + EXPECT_EQ(expected_it->second.in_edges, actual_it->second.in_edges); + EXPECT_EQ(expected_it->second.out_edges, actual_it->second.out_edges); + EXPECT_EQ(expected_it->second.labels, actual_it->second.labels); + + auto *expected_delta = expected_it->second.delta; + auto *actual_delta = actual_it->second.delta; + while (expected_delta != nullptr) { + EXPECT_EQ(expected_delta->action, actual_delta->action); + expected_delta = expected_delta->next; + actual_delta = actual_delta->next; + } + EXPECT_EQ(expected_delta, nullptr); + EXPECT_EQ(actual_delta, nullptr); + ++expected_it; + ++actual_it; + } +} + +void AddDeltaToDeltaChain(Vertex *object, Delta *new_delta) { + auto *delta_holder = GetDeltaHolder(object); + + new_delta->next = delta_holder->delta; + new_delta->prev.Set(object); + if (delta_holder->delta) { + delta_holder->delta->prev.Set(new_delta); + } + delta_holder->delta = new_delta; +} + TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { auto acc = storage.Access(GetNextHlc()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(1)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(5)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); - acc.Commit(GetNextHlc()); - storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); + auto current_hlc = GetNextHlc(); + acc.Commit(current_hlc); auto splitted_data = storage.PerformSplit({PropertyValue(4)}); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 0); - EXPECT_EQ(splitted_data.transactions.size(), 0); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); + + CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 2}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 3}; + Delta delta_add_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 4}; + VertexContainer expected_vertices; + expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); + auto [it, inserted] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); + expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); + it->second.labels.push_back(secondary_label); + AddDeltaToDeltaChain(&*it, &delta_add_label); + + AssertEqVertexContainer(expected_vertices, splitted_data.vertices); } TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { @@ -79,12 +146,13 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { .HasError()); acc.Commit(GetNextHlc()); - storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); auto splitted_data = storage.PerformSplit({PropertyValue(4)}); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); - EXPECT_EQ(splitted_data.transactions.size(), 0); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); } TEST_F(ShardSplitTest, TestBasicSplitBeforeCommit) { @@ -110,36 +178,11 @@ TEST_F(ShardSplitTest, TestBasicSplitBeforeCommit) { EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); } -TEST_F(ShardSplitTest, TestBasicSplitAfterCommit) { - auto acc = storage.Access(GetNextHlc()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); - - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) - .HasError()); - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) - .HasError()); - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) - .HasError()); - - acc.Commit(GetNextHlc()); - - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); - EXPECT_EQ(splitted_data.vertices.size(), 3); - EXPECT_EQ(splitted_data.edges->size(), 2); - EXPECT_EQ(splitted_data.transactions.size(), 0); -} - -TEST_F(ShardSplitTest, TestBasicSplitAfterCommit2) { +TEST_F(ShardSplitTest, TestBasicSplitWithCommitedAndOngoingTransactions) { { auto acc = storage.Access(GetNextHlc()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); @@ -165,7 +208,9 @@ TEST_F(ShardSplitTest, TestBasicSplitAfterCommit2) { auto splitted_data = storage.PerformSplit({PropertyValue(4)}); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); - EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.transactions.size(), 2); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); } } // namespace memgraph::storage::v3::tests From 6ab76fb9ece657c637e7a1a162ce34387bc922c5 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 19 Jan 2023 17:50:04 +0100 Subject: [PATCH 20/79] Assert delta tag --- src/storage/v3/transaction.hpp | 1 - tests/unit/storage_v3_shard_split.cpp | 26 +++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 6c396d969..69953be53 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -65,7 +65,6 @@ struct Transaction { ~Transaction() {} std::list<Delta> CopyDeltas(CommitInfo *commit_info) const { - // TODO This does not solve the next and prev deltas that also need to be set std::list<Delta> copied_deltas; for (const auto &delta : deltas) { switch (delta.action) { diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index e6c9b897e..6c98fa924 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -72,6 +72,26 @@ void AssertEqVertexContainer(const VertexContainer &expected, const VertexContai auto *actual_delta = actual_it->second.delta; while (expected_delta != nullptr) { EXPECT_EQ(expected_delta->action, actual_delta->action); + switch (expected_delta->action) { + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: { + EXPECT_EQ(expected_delta->label, actual_delta->label); + break; + } + case Delta::Action::SET_PROPERTY: { + EXPECT_EQ(expected_delta->property.key, actual_delta->property.key); + EXPECT_EQ(expected_delta->property.value, actual_delta->property.value); + break; + } + case Delta::Action::ADD_IN_EDGE: + case Delta::Action::ADD_OUT_EDGE: + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::RECREATE_OBJECT: + case Delta::Action::DELETE_OBJECT: + case Delta::Action::REMOVE_OUT_EDGE: { + break; + } + } expected_delta = expected_delta->next; actual_delta = actual_delta->next; } @@ -99,7 +119,9 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(5)}, {{secondary_property, PropertyValue(121)}}) + .HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); auto current_hlc = GetNextHlc(); acc.Commit(current_hlc); @@ -116,11 +138,13 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 2}; Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 3}; Delta delta_add_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 4}; + Delta delta_add_property{Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 4}; VertexContainer expected_vertices; expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); auto [it, inserted] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); it->second.labels.push_back(secondary_label); + AddDeltaToDeltaChain(&*it, &delta_add_property); AddDeltaToDeltaChain(&*it, &delta_add_label); AssertEqVertexContainer(expected_vertices, splitted_data.vertices); From 97002a50d5e44b8a06d1413d10ba6b2e4323922a Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 23 Jan 2023 13:23:21 +0100 Subject: [PATCH 21/79] Create new shard --- src/storage/v3/shard.cpp | 47 ++++++++++++++++++++++++++++++--- src/storage/v3/shard.hpp | 19 +++++++++++--- src/storage/v3/splitter.cpp | 48 ++++++++++++++++++++-------------- src/storage/v3/splitter.hpp | 29 ++++++++++++-------- src/storage/v3/transaction.hpp | 6 ++--- 5 files changed, 109 insertions(+), 40 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 3d5d579f1..d26afc76b 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -333,7 +333,46 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, config_{config}, - shard_splitter_(vertices_, edges_, start_logical_id_to_transaction_, indices_, config_) { + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { + CreateSchema(primary_label_, schema); + StoreMapping(std::move(id_to_name)); +} + +Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, + std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config, + std::unordered_map<uint64_t, std::string> id_to_name) + : primary_label_{primary_label}, + min_primary_key_{min_primary_key}, + max_primary_key_{max_primary_key}, + vertices_(std::move(vertices)), + edges_(std::move(edges)), + schema_validator_{schemas_, name_id_mapper_}, + vertex_validator_{schema_validator_, primary_label}, + indices_{config.items, vertex_validator_}, + isolation_level_{config.transaction.isolation_level}, + config_{config}, + start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { + CreateSchema(primary_label_, schema); + StoreMapping(std::move(id_to_name)); +} + +Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, + std::vector<SchemaProperty> schema, VertexContainer &&vertices, + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config, + std::unordered_map<uint64_t, std::string> id_to_name) + : primary_label_{primary_label}, + min_primary_key_{min_primary_key}, + max_primary_key_{max_primary_key}, + vertices_(std::move(vertices)), + schema_validator_{schemas_, name_id_mapper_}, + vertex_validator_{schema_validator_, primary_label}, + indices_{config.items, vertex_validator_}, + isolation_level_{config.transaction.isolation_level}, + config_{config}, + start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } @@ -434,7 +473,7 @@ ShardResult<std::optional<std::pair<VertexAccessor, std::vector<EdgeAccessor>>>> } std::vector<EdgeAccessor> deleted_edges; - const VertexId vertex_id{shard_->primary_label_, *vertex->PrimaryKey(View::OLD)}; // TODO Replace + const VertexId vertex_id{shard_->primary_label_, *vertex->PrimaryKey(View::OLD)}; for (const auto &item : in_edges) { auto [edge_type, from_vertex, edge] = item; EdgeAccessor e(edge, edge_type, from_vertex, vertex_id, transaction_, &shard_->indices_, config_); @@ -1057,7 +1096,9 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { return std::nullopt; } -SplitData Shard::PerformSplit(const PrimaryKey &split_key) { return shard_splitter_.SplitShard(split_key); } +std::unique_ptr<Shard> Shard::PerformSplit(const PrimaryKey &split_key) { + return shard_splitter_.SplitShard(split_key, max_primary_key_); +} bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { return vertex_id.primary_label == primary_label_ && vertex_id.primary_key >= min_primary_key_ && diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 5b18eeda2..146320b7b 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -14,6 +14,7 @@ #include <cstdint> #include <filesystem> #include <map> +#include <memory> #include <numeric> #include <optional> #include <shared_mutex> @@ -192,9 +193,19 @@ class Shard final { public: /// @throw std::system_error /// @throw std::bad_alloc - explicit Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, - std::vector<SchemaProperty> schema, Config config = Config(), - std::unordered_map<uint64_t, std::string> id_to_name = {}); + Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, + std::vector<SchemaProperty> schema, Config config = Config(), + std::unordered_map<uint64_t, std::string> id_to_name = {}); + + Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, + std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config = Config(), + std::unordered_map<uint64_t, std::string> id_to_name = {}); + + Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, + std::vector<SchemaProperty> schema, VertexContainer &&vertices, + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config = Config(), + std::unordered_map<uint64_t, std::string> id_to_name = {}); Shard(const Shard &) = delete; Shard(Shard &&) noexcept = delete; @@ -368,7 +379,7 @@ class Shard final { std::optional<SplitInfo> ShouldSplit() const noexcept; - SplitData PerformSplit(const PrimaryKey &split_key); + std::unique_ptr<Shard> PerformSplit(const PrimaryKey &split_key); private: Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index d84469d5a..b9d6304e1 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -17,23 +17,29 @@ #include <set> #include "storage/v3/config.hpp" +#include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" #include "storage/v3/key_store.hpp" +#include "storage/v3/schemas.hpp" +#include "storage/v3/shard.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" namespace memgraph::storage::v3 { -Splitter::Splitter(VertexContainer &vertices, EdgeContainer &edges, +Splitter::Splitter(const LabelId primary_label, VertexContainer &vertices, EdgeContainer &edges, std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, - Config &config) - : vertices_(vertices), + Config &config, const std::vector<SchemaProperty> &schema) + : primary_label_(primary_label), + vertices_(vertices), edges_(edges), start_logical_id_to_transaction_(start_logical_id_to_transaction), indices_(indices), - config_(config) {} + config_(config), + schema_(schema) {} -SplitData Splitter::SplitShard(const PrimaryKey &split_key) { +std::unique_ptr<Shard> Splitter::SplitShard(const PrimaryKey &split_key, + const std::optional<PrimaryKey> &max_primary_key) { SplitData data; std::set<uint64_t> collected_transactions_; @@ -41,7 +47,12 @@ SplitData Splitter::SplitShard(const PrimaryKey &split_key) { data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges); - return data; + if (data.edges) { + return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), + std::move(*data.edges), std::move(data.transactions), config_); + } + return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), + std::move(data.transactions), config_); } void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, Delta *delta) { @@ -124,10 +135,9 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte return splitted_edges; } -std::map<uint64_t, Transaction> Splitter::CollectTransactions(const std::set<uint64_t> &collected_transactions_, - VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges) { - std::map<uint64_t, Transaction> transactions; +std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( + const std::set<uint64_t> &collected_transactions_, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + std::map<uint64_t, std::unique_ptr<Transaction>> transactions; for (const auto &[commit_start, transaction] : start_logical_id_to_transaction_) { // We need all transaction whose deltas need to be resolved for any of the @@ -139,21 +149,21 @@ std::map<uint64_t, Transaction> Splitter::CollectTransactions(const std::set<uin // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr - AlignClonedTransactions(transactions, cloned_vertices, cloned_edges); + AdjustClonedTransactions(transactions, cloned_vertices, cloned_edges); return transactions; } -void Splitter::AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { +void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - AlignClonedTransaction(cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, - cloned_vertices, cloned_edges); + AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, + cloned_vertices, cloned_edges); } } -void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { +void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { // Align next and prev in deltas // NOTE It is important that the order of delta lists is in same order auto delta_it = transaction.deltas.begin(); @@ -166,7 +176,7 @@ void Splitter::AlignClonedTransaction(Transaction &cloned_transaction, const Tra // or aborted if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { auto *found_delta_it = &*std::ranges::find_if( - cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id).deltas, + cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas, [delta](const auto &elem) { return elem.uuid == delta->uuid; }); MG_ASSERT(found_delta_it, "Delta with given uuid must exist!"); cloned_delta->next = &*found_delta_it; diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 6ae550635..9822aa8f0 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -8,6 +8,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#pragma once #include <map> #include <memory> @@ -17,7 +18,10 @@ #include "storage/v3/config.hpp" #include "storage/v3/delta.hpp" #include "storage/v3/edge.hpp" +#include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/schemas.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" #include "utils/concepts.hpp" @@ -29,16 +33,16 @@ namespace memgraph::storage::v3 { struct SplitData { VertexContainer vertices; std::optional<EdgeContainer> edges; - std::map<uint64_t, Transaction> transactions; + std::map<uint64_t, std::unique_ptr<Transaction>> transactions; std::map<LabelId, LabelIndex::IndexContainer> label_indices; std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::IndexContainer> label_property_indices; }; class Splitter final { public: - Splitter(VertexContainer &vertices, EdgeContainer &edges, + Splitter(LabelId primary_label, VertexContainer &vertices, EdgeContainer &edges, std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, - Config &config); + Config &config, const std::vector<SchemaProperty> &schema); Splitter(const Splitter &) = delete; Splitter(Splitter &&) noexcept = delete; @@ -46,11 +50,12 @@ class Splitter final { Splitter operator=(Splitter &&) noexcept = delete; ~Splitter() = default; - SplitData SplitShard(const PrimaryKey &split_key); + std::unique_ptr<Shard> SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key); private: - std::map<uint64_t, Transaction> CollectTransactions(const std::set<uint64_t> &collected_transactions_start_id, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( + const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); VertexContainer CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); @@ -85,18 +90,20 @@ class Splitter final { static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); - static void AlignClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, Transaction> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); - void AlignClonedTransactions(std::map<uint64_t, Transaction> &cloned_transactions, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); + void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + LabelId primary_label_; VertexContainer &vertices_; EdgeContainer &edges_; std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction_; Indices &indices_; Config &config_; + std::vector<SchemaProperty> schema_; }; } // namespace memgraph::storage::v3 diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 69953be53..9b13d9c7b 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -106,9 +106,9 @@ struct Transaction { } // This does not solve the whole problem of copying deltas - Transaction Clone() const { - return {start_timestamp, *commit_info, CopyDeltas(commit_info.get()), command_id, must_abort, - is_aborted, isolation_level}; + std::unique_ptr<Transaction> Clone() const { + return std::make_unique<Transaction>(start_timestamp, *commit_info, CopyDeltas(commit_info.get()), command_id, + must_abort, is_aborted, isolation_level); } coordinator::Hlc start_timestamp; From 45521bdba828e090d9947e20fb1af04cbbec4218 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 23 Jan 2023 18:04:48 +0100 Subject: [PATCH 22/79] Add from SplitData --- src/storage/v3/shard.cpp | 34 +++++++++++++------ src/storage/v3/shard.hpp | 12 ++++--- src/storage/v3/splitter.cpp | 30 +++++++++++------ src/storage/v3/splitter.hpp | 27 ++++++++++----- tests/unit/storage_v3_shard_split.cpp | 48 ++++++++++++++++++--------- 5 files changed, 100 insertions(+), 51 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index d26afc76b..566029b9c 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -333,15 +333,16 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, config_{config}, - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, + name_id_mapper_) { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, - std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config, - std::unordered_map<uint64_t, std::string> id_to_name) + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, + const std::unordered_map<uint64_t, std::string> &id_to_name) : primary_label_{primary_label}, min_primary_key_{min_primary_key}, max_primary_key_{max_primary_key}, @@ -353,15 +354,16 @@ Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<Pr isolation_level_{config.transaction.isolation_level}, config_{config}, start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, + name_id_mapper_) { CreateSchema(primary_label_, schema); - StoreMapping(std::move(id_to_name)); + StoreMapping(id_to_name); } Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, - std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config, - std::unordered_map<uint64_t, std::string> id_to_name) + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, + const std::unordered_map<uint64_t, std::string> &id_to_name) : primary_label_{primary_label}, min_primary_key_{min_primary_key}, max_primary_key_{max_primary_key}, @@ -372,13 +374,25 @@ Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<Pr isolation_level_{config.transaction.isolation_level}, config_{config}, start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema) { + shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, + name_id_mapper_) { CreateSchema(primary_label_, schema); - StoreMapping(std::move(id_to_name)); + StoreMapping(id_to_name); } Shard::~Shard() {} +std::unique_ptr<Shard> Shard::FromSplitData(SplitData &&split_data) { + if (split_data.config.items.properties_on_edges) [[likely]] { + return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, + split_data.schema, std::move(split_data.vertices), std::move(*split_data.edges), + std::move(split_data.transactions), split_data.config, split_data.id_to_name); + } + return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, + split_data.schema, std::move(split_data.vertices), std::move(split_data.transactions), + split_data.config, split_data.id_to_name); +} + Shard::Accessor::Accessor(Shard &shard, Transaction &transaction) : shard_(&shard), transaction_(&transaction), config_(shard_->config_.items) {} @@ -1096,7 +1110,7 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { return std::nullopt; } -std::unique_ptr<Shard> Shard::PerformSplit(const PrimaryKey &split_key) { +SplitData Shard::PerformSplit(const PrimaryKey &split_key) { return shard_splitter_.SplitShard(split_key, max_primary_key_); } diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 146320b7b..02ba7a2da 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -199,13 +199,13 @@ class Shard final { Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, - std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config = Config(), - std::unordered_map<uint64_t, std::string> id_to_name = {}); + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, + const std::unordered_map<uint64_t, std::string> &id_to_name); Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, - std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, Config config = Config(), - std::unordered_map<uint64_t, std::string> id_to_name = {}); + std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, + const std::unordered_map<uint64_t, std::string> &id_to_name); Shard(const Shard &) = delete; Shard(Shard &&) noexcept = delete; @@ -213,6 +213,8 @@ class Shard final { Shard operator=(Shard &&) noexcept = delete; ~Shard(); + static std::unique_ptr<Shard> FromSplitData(SplitData &&split_data); + class Accessor final { private: friend class Shard; @@ -379,7 +381,7 @@ class Shard final { std::optional<SplitInfo> ShouldSplit() const noexcept; - std::unique_ptr<Shard> PerformSplit(const PrimaryKey &split_key); + SplitData PerformSplit(const PrimaryKey &split_key); private: Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index b9d6304e1..5cbc84f50 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -20,6 +20,7 @@ #include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" #include "storage/v3/key_store.hpp" +#include "storage/v3/name_id_mapper.hpp" #include "storage/v3/schemas.hpp" #include "storage/v3/shard.hpp" #include "storage/v3/transaction.hpp" @@ -29,30 +30,37 @@ namespace memgraph::storage::v3 { Splitter::Splitter(const LabelId primary_label, VertexContainer &vertices, EdgeContainer &edges, std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, - Config &config, const std::vector<SchemaProperty> &schema) + const Config &config, const std::vector<SchemaProperty> &schema, const NameIdMapper &name_id_mapper) : primary_label_(primary_label), vertices_(vertices), edges_(edges), start_logical_id_to_transaction_(start_logical_id_to_transaction), indices_(indices), config_(config), - schema_(schema) {} + schema_(schema), + name_id_mapper_(name_id_mapper) {} -std::unique_ptr<Shard> Splitter::SplitShard(const PrimaryKey &split_key, - const std::optional<PrimaryKey> &max_primary_key) { - SplitData data; +SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key) { + SplitData data{.primary_label = primary_label_, + .min_primary_key = split_key, + .max_primary_key = max_primary_key, + .schema = schema_, + .config = config_, + .id_to_name = name_id_mapper_.GetIdToNameMap()}; std::set<uint64_t> collected_transactions_; data.vertices = CollectVertices(data, collected_transactions_, split_key); data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges); - if (data.edges) { - return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), - std::move(*data.edges), std::move(data.transactions), config_); - } - return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), - std::move(data.transactions), config_); + // if (data.edges) { + // return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), + // std::move(*data.edges), std::move(data.transactions), config_, + // name_id_mapper_.GetIdToNameMap()); + // } + // return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), + // std::move(data.transactions), config_, name_id_mapper_.GetIdToNameMap()); + return data; } void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, Delta *delta) { diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 9822aa8f0..c8fc3fcd2 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -21,6 +21,7 @@ #include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" #include "storage/v3/key_store.hpp" +#include "storage/v3/name_id_mapper.hpp" #include "storage/v3/schemas.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" @@ -31,6 +32,13 @@ namespace memgraph::storage::v3 { // If edge properties-on-edges is false then we don't need to send edges but // only vertices, since they will contain those edges struct SplitData { + LabelId primary_label; + PrimaryKey min_primary_key; + std::optional<PrimaryKey> max_primary_key; + std::vector<SchemaProperty> schema; + Config config; + std::unordered_map<uint64_t, std::string> id_to_name; + VertexContainer vertices; std::optional<EdgeContainer> edges; std::map<uint64_t, std::unique_ptr<Transaction>> transactions; @@ -42,7 +50,7 @@ class Splitter final { public: Splitter(LabelId primary_label, VertexContainer &vertices, EdgeContainer &edges, std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction, Indices &indices, - Config &config, const std::vector<SchemaProperty> &schema); + const Config &config, const std::vector<SchemaProperty> &schema, const NameIdMapper &name_id_mapper_); Splitter(const Splitter &) = delete; Splitter(Splitter &&) noexcept = delete; @@ -50,19 +58,19 @@ class Splitter final { Splitter operator=(Splitter &&) noexcept = delete; ~Splitter() = default; - std::unique_ptr<Shard> SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key); + SplitData SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key); private: - std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( - const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); - VertexContainer CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, const PrimaryKey &split_key); std::optional<EdgeContainer> CollectEdges(std::set<uint64_t> &collected_transactions_start_id, const VertexContainer &split_vertices, const PrimaryKey &split_key); + std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( + const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, + EdgeContainer &cloned_edges); + template <typename IndexMap, typename IndexType> requires utils::SameAsAnyOf<IndexMap, LabelPropertyIndex, LabelIndex> std::map<IndexType, typename IndexMap::IndexContainer> CollectIndexEntries( @@ -97,13 +105,14 @@ class Splitter final { void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); - LabelId primary_label_; + const LabelId primary_label_; VertexContainer &vertices_; EdgeContainer &edges_; std::map<uint64_t, std::unique_ptr<Transaction>> &start_logical_id_to_transaction_; Indices &indices_; - Config &config_; - std::vector<SchemaProperty> schema_; + const Config &config_; + const std::vector<SchemaProperty> schema_; + const NameIdMapper &name_id_mapper_; }; } // namespace memgraph::storage::v3 diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 6c98fa924..f1c52dad6 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -56,31 +56,29 @@ class ShardSplitTest : public testing::Test { } }; -void AssertEqVertexContainer(const VertexContainer &expected, const VertexContainer &actual) { - ASSERT_EQ(expected.size(), actual.size()); +void AssertEqVertexContainer(const VertexContainer &actual, const VertexContainer &expected) { + ASSERT_EQ(actual.size(), expected.size()); auto expected_it = expected.begin(); auto actual_it = actual.begin(); while (expected_it != expected.end()) { - EXPECT_EQ(expected_it->first, actual_it->first); - EXPECT_EQ(expected_it->second.deleted, actual_it->second.deleted); - EXPECT_EQ(expected_it->second.in_edges, actual_it->second.in_edges); - EXPECT_EQ(expected_it->second.out_edges, actual_it->second.out_edges); - EXPECT_EQ(expected_it->second.labels, actual_it->second.labels); + EXPECT_EQ(actual_it->first, expected_it->first); + EXPECT_EQ(actual_it->second.deleted, expected_it->second.deleted); + EXPECT_EQ(actual_it->second.labels, expected_it->second.labels); auto *expected_delta = expected_it->second.delta; auto *actual_delta = actual_it->second.delta; while (expected_delta != nullptr) { - EXPECT_EQ(expected_delta->action, actual_delta->action); + EXPECT_EQ(actual_delta->action, expected_delta->action); switch (expected_delta->action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: { - EXPECT_EQ(expected_delta->label, actual_delta->label); + EXPECT_EQ(actual_delta->label, expected_delta->label); break; } case Delta::Action::SET_PROPERTY: { - EXPECT_EQ(expected_delta->property.key, actual_delta->property.key); - EXPECT_EQ(expected_delta->property.value, actual_delta->property.value); + EXPECT_EQ(actual_delta->property.key, expected_delta->property.key); + EXPECT_EQ(actual_delta->property.value, expected_delta->property.value); break; } case Delta::Action::ADD_IN_EDGE: @@ -147,7 +145,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { AddDeltaToDeltaChain(&*it, &delta_add_property); AddDeltaToDeltaChain(&*it, &delta_add_label); - AssertEqVertexContainer(expected_vertices, splitted_data.vertices); + AssertEqVertexContainer(splitted_data.vertices, expected_vertices); } TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { @@ -159,9 +157,6 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) - .HasError()); EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) .HasError()); @@ -169,7 +164,8 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) .HasError()); - acc.Commit(GetNextHlc()); + auto current_hlc = GetNextHlc(); + acc.Commit(current_hlc); auto splitted_data = storage.PerformSplit({PropertyValue(4)}); EXPECT_EQ(splitted_data.vertices.size(), 3); @@ -177,6 +173,26 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.transactions.size(), 1); EXPECT_EQ(splitted_data.label_indices.size(), 0); EXPECT_EQ(splitted_data.label_property_indices.size(), 0); + + CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 1}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 1}; + Delta delta_add_in_edge1{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(1)}}, + EdgeRef{Gid::FromUint(1)}, &commit_info, 1}; + Delta delta_add_out_edge2{Delta::RemoveOutEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(6)}}, + EdgeRef{Gid::FromUint(2)}, &commit_info, 1}; + Delta delta_add_in_edge2{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(4)}}, + EdgeRef{Gid::FromUint(2)}, &commit_info, 1}; + VertexContainer expected_vertices; + auto [vtx4, inserted4] = expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); + auto [vtx5, inserted5] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); + auto [vtx6, inserted6] = expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); + AddDeltaToDeltaChain(&*vtx4, &delta_add_out_edge2); + AddDeltaToDeltaChain(&*vtx5, &delta_add_in_edge1); + AddDeltaToDeltaChain(&*vtx6, &delta_add_in_edge2); + + AssertEqVertexContainer(splitted_data.vertices, expected_vertices); } TEST_F(ShardSplitTest, TestBasicSplitBeforeCommit) { From 742393cd70021c5b6745f2686e6541da0d4c5e31 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 24 Jan 2023 20:49:04 +0100 Subject: [PATCH 23/79] Fix index split --- src/storage/v3/splitter.cpp | 31 ++++++++++-------- src/storage/v3/splitter.hpp | 25 ++++++++++----- tests/unit/storage_v3_shard_split.cpp | 46 +++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 22 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 5cbc84f50..24472f29d 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -25,6 +25,7 @@ #include "storage/v3/shard.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex.hpp" +#include "utils/logging.hpp" namespace memgraph::storage::v3 { @@ -53,13 +54,6 @@ SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional< data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges); - // if (data.edges) { - // return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), - // std::move(*data.edges), std::move(data.transactions), config_, - // name_id_mapper_.GetIdToNameMap()); - // } - // return std::make_unique<Shard>(primary_label_, split_key, max_primary_key, schema_, std::move(data.vertices), - // std::move(data.transactions), config_, name_id_mapper_.GetIdToNameMap()); return data; } @@ -74,18 +68,26 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c const PrimaryKey &split_key) { // Collection of indices is here since it heavily depends on vertices // Old vertex pointer new entry pointer - std::map<LabelId, std::multimap<const Vertex *, LabelIndex::Entry *>> label_index_vertex_entry_map; - std::map<std::pair<LabelId, PropertyId>, std::multimap<const Vertex *, LabelPropertyIndex::Entry *>> + + std::map<LabelId, std::multimap<const Vertex *, const LabelIndex::IndexContainer::iterator>> + label_index_vertex_entry_map; + std::map<std::pair<LabelId, PropertyId>, + std::multimap<const Vertex *, const LabelPropertyIndex::IndexContainer::iterator>> label_property_vertex_entry_map; + data.label_indices = CollectIndexEntries<LabelIndex, LabelId>(indices_.label_index, split_key, label_index_vertex_entry_map); data.label_property_indices = CollectIndexEntries<LabelPropertyIndex, std::pair<LabelId, PropertyId>>( indices_.label_property_index, split_key, label_property_vertex_entry_map); - const auto update_indices = [](auto &index_map, const auto *old_vertex_ptr, auto &splitted_vertex_it) { - for (auto &[label, vertex_entry_mappings] : index_map) { + const auto update_indices = [](auto &entry_vertex_map, auto &updating_index, const auto *old_vertex_ptr, + auto &new_vertex_ptr) { + for ([[maybe_unused]] auto &[index_type, vertex_entry_mappings] : entry_vertex_map) { auto [it, end] = vertex_entry_mappings.equal_range(old_vertex_ptr); while (it != end) { - it->second->vertex = &*splitted_vertex_it; + auto entry_to_update = *it->second; + entry_to_update.vertex = &*new_vertex_ptr; + updating_index.at(index_type).erase(it->second); + updating_index.at(index_type).insert(std::move(entry_to_update)); ++it; } } @@ -101,10 +103,11 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c auto next_it = std::next(split_key_it); const auto &[splitted_vertex_it, inserted, node] = splitted_data.insert(vertices_.extract(split_key_it->first)); + MG_ASSERT(inserted, "Failed to extract vertex!"); // Update indices - update_indices(label_index_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); - update_indices(label_property_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); + update_indices(label_index_vertex_entry_map, data.label_indices, old_vertex_ptr, splitted_vertex_it); + // update_indices(label_property_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); split_key_it = next_it; } diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index c8fc3fcd2..cc9ca88fa 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -75,21 +75,30 @@ class Splitter final { requires utils::SameAsAnyOf<IndexMap, LabelPropertyIndex, LabelIndex> std::map<IndexType, typename IndexMap::IndexContainer> CollectIndexEntries( IndexMap &index, const PrimaryKey &split_key, - std::map<IndexType, std::multimap<const Vertex *, typename IndexMap::Entry *>> &vertex_entry_map) { + std::map<IndexType, std::multimap<const Vertex *, const typename IndexMap::IndexContainer::iterator>> + &vertex_entry_map) { if (index.Empty()) { return {}; } std::map<IndexType, typename IndexMap::IndexContainer> cloned_indices; - for (auto &[label_prop_pair, index] : index.GetIndex()) { - cloned_indices[label_prop_pair] = typename IndexMap::IndexContainer{}; - for (const auto &entry : index) { - if (entry.vertex->first > split_key) { + for (auto &[index_type_val, index] : index.GetIndex()) { + // cloned_indices[index_type_val] = typename IndexMap::IndexContainer{}; + + auto entry_it = index.begin(); + while (entry_it != index.end()) { + // We need to save the next pointer since the current one will be + // invalidated after extract + auto next_entry_it = std::next(entry_it); + if (entry_it->vertex->first > split_key) { // We get this entry - [[maybe_unused]] const auto [it, inserted, node] = - cloned_indices[label_prop_pair].insert(index.extract(entry)); - vertex_entry_map[label_prop_pair].insert({entry.vertex, &node.value()}); + [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = + cloned_indices[index_type_val].insert(index.extract(entry_it)); + MG_ASSERT(inserted, "Failed to extract index entry!"); + + vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); } + entry_it = next_entry_it; } } diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index f1c52dad6..3afda33ad 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -253,4 +253,50 @@ TEST_F(ShardSplitTest, TestBasicSplitWithCommitedAndOngoingTransactions) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); } +TEST_F(ShardSplitTest, TestBasicSplitWithLabelIndex) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(6)}, {}).HasError()); + acc.Commit(GetNextHlc()); + storage.CreateIndex(secondary_label); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 0); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 1); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); +} + +TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(1)}, {{secondary_property, PropertyValue(1)}}) + .HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(5)}, {{secondary_property, PropertyValue(21)}}) + .HasError()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(6)}, {{secondary_property, PropertyValue(22)}}) + .HasError()); + acc.Commit(GetNextHlc()); + storage.CreateIndex(secondary_label, secondary_property); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 0); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 1); +} + } // namespace memgraph::storage::v3::tests From 1db7447ac9a0b42baf5a86647b854392fdb3c95f Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 24 Jan 2023 21:02:33 +0100 Subject: [PATCH 24/79] Add big test --- src/storage/v3/indices.hpp | 4 ++-- tests/unit/storage_v3_shard_split.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 00d228df1..761157040 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -30,7 +30,6 @@ namespace memgraph::storage::v3 { struct Indices; class LabelIndex { - public: struct Entry { Vertex *vertex; uint64_t timestamp; @@ -41,6 +40,7 @@ class LabelIndex { bool operator==(const Entry &rhs) const { return vertex == rhs.vertex && timestamp == rhs.timestamp; } }; + public: using IndexContainer = std::set<Entry>; LabelIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) @@ -130,7 +130,6 @@ class LabelIndex { }; class LabelPropertyIndex { - public: struct Entry { PropertyValue value; Vertex *vertex; @@ -143,6 +142,7 @@ class LabelPropertyIndex { bool operator==(const PropertyValue &rhs) const; }; + public: using IndexContainer = std::set<Entry>; LabelPropertyIndex(Indices *indices, Config::Items config, const VertexValidator &vertex_validator) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 3afda33ad..2b5fc575d 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -299,4 +299,30 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { EXPECT_EQ(splitted_data.label_property_indices.size(), 1); } +TEST_F(ShardSplitTest, TestBigSplit) { + int pk{0}; + for (size_t i{0}; i < 100000; ++i) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, PropertyValue(pk)}}) + .HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(pk - 2)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(pk - 1)}}, edge_type_id, + Gid::FromUint(pk)) + .HasError()); + acc.Commit(GetNextHlc()); + } + storage.CreateIndex(secondary_label, secondary_property); + + auto splitted_data = storage.PerformSplit({PropertyValue(pk / 2)}); + + EXPECT_EQ(splitted_data.vertices.size(), 100000); + EXPECT_EQ(splitted_data.edges->size(), 50000); + EXPECT_EQ(splitted_data.transactions.size(), 50000); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 1); +} + } // namespace memgraph::storage::v3::tests From bd26af42711f8d218ec8e610d3eff0c2ef392a7d Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 10:35:23 +0100 Subject: [PATCH 25/79] Assert splitted part --- tests/unit/storage_v3_shard_split.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 2b5fc575d..548670480 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -15,6 +15,7 @@ #include <gmock/gmock.h> #include <gtest/gtest.h> +#include "coordinator/hybrid_logical_clock.hpp" #include "query/v2/requests.hpp" #include "storage/v3/delta.hpp" #include "storage/v3/id_types.hpp" @@ -54,6 +55,17 @@ class ShardSplitTest : public testing::Test { last_hlc.coordinator_wall_clock += std::chrono::seconds(1); return last_hlc; } + + void AssertSplittedShard(SplitData &&splitted_data, const int split_value) { + auto shard = Shard::FromSplitData(std::move(splitted_data)); + auto acc = shard->Access(GetNextHlc()); + for (int i{0}; i < split_value; ++i) { + EXPECT_FALSE(acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD).has_value()); + } + for (int i{split_value}; i < split_value * 2; ++i) { + EXPECT_TRUE(acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD).has_value()); + } + } }; void AssertEqVertexContainer(const VertexContainer &actual, const VertexContainer &expected) { @@ -301,10 +313,10 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { TEST_F(ShardSplitTest, TestBigSplit) { int pk{0}; - for (size_t i{0}; i < 100000; ++i) { + for (int64_t i{0}; i < 100000; ++i) { auto acc = storage.Access(GetNextHlc()); EXPECT_FALSE( - acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, PropertyValue(pk)}}) + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, PropertyValue(i)}}) .HasError()); EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); @@ -316,13 +328,16 @@ TEST_F(ShardSplitTest, TestBigSplit) { } storage.CreateIndex(secondary_label, secondary_property); - auto splitted_data = storage.PerformSplit({PropertyValue(pk / 2)}); + const auto split_value = pk / 2; + auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}); EXPECT_EQ(splitted_data.vertices.size(), 100000); EXPECT_EQ(splitted_data.edges->size(), 50000); EXPECT_EQ(splitted_data.transactions.size(), 50000); EXPECT_EQ(splitted_data.label_indices.size(), 0); EXPECT_EQ(splitted_data.label_property_indices.size(), 1); + + AssertSplittedShard(std::move(splitted_data), split_value); } } // namespace memgraph::storage::v3::tests From 3e23437f146364f85eeea952c6ecd45fc99da81d Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 10:43:05 +0100 Subject: [PATCH 26/79] Add comments --- src/storage/v3/splitter.cpp | 19 +++++++++---------- src/storage/v3/splitter.hpp | 5 ++--- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 24472f29d..4b6a09d0a 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -68,7 +68,6 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c const PrimaryKey &split_key) { // Collection of indices is here since it heavily depends on vertices // Old vertex pointer new entry pointer - std::map<LabelId, std::multimap<const Vertex *, const LabelIndex::IndexContainer::iterator>> label_index_vertex_entry_map; std::map<std::pair<LabelId, PropertyId>, @@ -79,6 +78,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c CollectIndexEntries<LabelIndex, LabelId>(indices_.label_index, split_key, label_index_vertex_entry_map); data.label_property_indices = CollectIndexEntries<LabelPropertyIndex, std::pair<LabelId, PropertyId>>( indices_.label_property_index, split_key, label_property_vertex_entry_map); + // This is needed to replace old vertex pointers in index entries with new ones const auto update_indices = [](auto &entry_vertex_map, auto &updating_index, const auto *old_vertex_ptr, auto &new_vertex_ptr) { for ([[maybe_unused]] auto &[index_type, vertex_entry_mappings] : entry_vertex_map) { @@ -96,7 +96,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c VertexContainer splitted_data; auto split_key_it = vertices_.find(split_key); while (split_key_it != vertices_.end()) { - // Go through deltas and pick up transactions start_id + // Go through deltas and pick up transactions start_id/commit_id ScanDeltas(collected_transactions_, split_key_it->second.delta); const auto *old_vertex_ptr = &*split_key_it; @@ -107,7 +107,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c // Update indices update_indices(label_index_vertex_entry_map, data.label_indices, old_vertex_ptr, splitted_vertex_it); - // update_indices(label_property_vertex_entry_map, old_vertex_ptr, splitted_vertex_it); + update_indices(label_property_vertex_entry_map, data.label_property_indices, old_vertex_ptr, splitted_vertex_it); split_key_it = next_it; } @@ -122,14 +122,13 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte } EdgeContainer splitted_edges; const auto split_vertex_edges = [&](const auto &edges_ref) { - // This is safe since if properties_on_edges is true, the this must be a - // ptr + // This is safe since if properties_on_edges is true, the this must be a ptr for (const auto &edge_ref : edges_ref) { auto *edge = std::get<2>(edge_ref).ptr; const auto &other_vtx = std::get<1>(edge_ref); ScanDeltas(collected_transactions_, edge->delta); - // Check if src and dest edge are both on splitted shard - // so we know if we should remove orphan edge + // Check if src and dest edge are both on splitted shard so we know if we + // should remove orphan edge, or make a clone if (other_vtx.primary_key >= split_key) { // Remove edge from shard splitted_edges.insert(edges_.extract(edge->gid)); @@ -183,8 +182,7 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta != nullptr) { - // Align delta, while ignoring deltas whose transactions have commited, - // or aborted + // Align deltas which belong to cloned transaction, skip others if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { auto *found_delta_it = &*std::ranges::find_if( cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas, @@ -208,7 +206,8 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr } case PreviousPtr::Type::VERTEX: { // What if the vertex is already moved to garbage collection... - // Make test when you have deleted vertex + // TODO(jbajic) Maybe revisit when we apply Garbage collection with new + // transaction management system auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); cloned_delta->prev.Set(cloned_vertex); break; diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index cc9ca88fa..2549a280c 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -81,17 +81,16 @@ class Splitter final { return {}; } + // Cloned index entries will contain new index entry iterators, but old + // vertices address which need to be adjusted after extracting vertices std::map<IndexType, typename IndexMap::IndexContainer> cloned_indices; for (auto &[index_type_val, index] : index.GetIndex()) { - // cloned_indices[index_type_val] = typename IndexMap::IndexContainer{}; - auto entry_it = index.begin(); while (entry_it != index.end()) { // We need to save the next pointer since the current one will be // invalidated after extract auto next_entry_it = std::next(entry_it); if (entry_it->vertex->first > split_key) { - // We get this entry [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = cloned_indices[index_type_val].insert(index.extract(entry_it)); MG_ASSERT(inserted, "Failed to extract index entry!"); From 0e517bb9f84219c39ac8c056359452ecaabd3aae Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 11:54:02 +0100 Subject: [PATCH 27/79] Make split shard benchmark --- src/storage/v3/splitter.cpp | 2 +- tests/benchmark/CMakeLists.txt | 3 + tests/benchmark/storage_v3_split.cpp | 100 +++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 tests/benchmark/storage_v3_split.cpp diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 4b6a09d0a..eb5a29d34 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -213,7 +213,7 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr break; } case PreviousPtr::Type::EDGE: { - // TODO Case when there are no properties on edge is not handled + // TODO(jbajic) Case when there are no properties on edge is not handled auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); cloned_delta->prev.Set(&cloned_edge->second); break; diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt index c7cededd8..306cf9661 100644 --- a/tests/benchmark/CMakeLists.txt +++ b/tests/benchmark/CMakeLists.txt @@ -79,3 +79,6 @@ target_link_libraries(${test_prefix}data_structures_contains mg-utils mg-storage add_benchmark(data_structures_remove.cpp) target_link_libraries(${test_prefix}data_structures_remove mg-utils mg-storage-v3) + +add_benchmark(storage_v3_split.cpp) +target_link_libraries(${test_prefix}storage_v3_split mg-storage-v3 mg-query-v2) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp new file mode 100644 index 000000000..9b4ed5c0e --- /dev/null +++ b/tests/benchmark/storage_v3_split.cpp @@ -0,0 +1,100 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <cstdint> +#include <optional> +#include <vector> + +#include <benchmark/benchmark.h> +#include <gflags/gflags.h> + +#include "storage/v3/id_types.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/property_value.hpp" +#include "storage/v3/shard.hpp" +#include "storage/v3/vertex.hpp" +#include "storage/v3/vertex_id.hpp" + +namespace memgraph::benchmark { + +class ShardSplitBenchmark : public ::benchmark::Fixture { + protected: + using PrimaryKey = storage::v3::PrimaryKey; + using PropertyId = storage::v3::PropertyId; + using PropertyValue = storage::v3::PropertyValue; + using LabelId = storage::v3::LabelId; + using EdgeTypeId = storage::v3::EdgeTypeId; + using Shard = storage::v3::Shard; + using VertexId = storage::v3::VertexId; + using Gid = storage::v3::Gid; + + void SetUp(const ::benchmark::State &state) override { + storage.emplace(primary_label, min_pk, std::nullopt, schema_property_vector); + storage->StoreMapping( + {{1, "label"}, {2, "property"}, {3, "edge_property"}, {4, "secondary_label"}, {5, "secondary_prop"}}); + } + + void TearDown(const ::benchmark::State &) override { storage = std::nullopt; } + + const PropertyId primary_property{PropertyId::FromUint(2)}; + const PropertyId secondary_property{PropertyId::FromUint(5)}; + std::vector<storage::v3::SchemaProperty> schema_property_vector = { + storage::v3::SchemaProperty{primary_property, common::SchemaType::INT}}; + const std::vector<PropertyValue> min_pk{PropertyValue{0}}; + const LabelId primary_label{LabelId::FromUint(1)}; + const LabelId secondary_label{LabelId::FromUint(4)}; + const EdgeTypeId edge_type_id{EdgeTypeId::FromUint(3)}; + std::optional<Shard> storage; + + coordinator::Hlc last_hlc{0, io::Time{}}; + + coordinator::Hlc GetNextHlc() { + ++last_hlc.logical_id; + last_hlc.coordinator_wall_clock += std::chrono::seconds(1); + return last_hlc; + } +}; + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) { + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + + for (int64_t i{0}; i < state.range(0); ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}); + } +} + +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) + ->RangeMultiplier(10) + ->Range(100'000, 100'000'000) + ->Unit(::benchmark::kMillisecond); + +} // namespace memgraph::benchmark + +BENCHMARK_MAIN(); From 1b64a45f10327ca4c77d71be4aa96e33dbd5479a Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 12:03:36 +0100 Subject: [PATCH 28/79] Add split parameter into config --- src/coordinator/shard_map.cpp | 2 +- src/coordinator/shard_map.hpp | 2 +- src/query/v2/request_router.hpp | 2 +- src/storage/v3/bindings/db_accessor.hpp | 2 +- src/storage/v3/config.hpp | 4 ++++ src/storage/v3/mvcc.hpp | 2 +- src/storage/v3/property_value.hpp | 2 +- src/storage/v3/shard.cpp | 2 +- 8 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/coordinator/shard_map.cpp b/src/coordinator/shard_map.cpp index e9757953e..f38e6f823 100644 --- a/src/coordinator/shard_map.cpp +++ b/src/coordinator/shard_map.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/coordinator/shard_map.hpp b/src/coordinator/shard_map.hpp index aa59bbf6c..fc408e965 100644 --- a/src/coordinator/shard_map.hpp +++ b/src/coordinator/shard_map.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/v2/request_router.hpp b/src/query/v2/request_router.hpp index 44a423ef0..3dd2f164b 100644 --- a/src/query/v2/request_router.hpp +++ b/src/query/v2/request_router.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/storage/v3/bindings/db_accessor.hpp b/src/storage/v3/bindings/db_accessor.hpp index 852724b85..6392ed18f 100644 --- a/src/storage/v3/bindings/db_accessor.hpp +++ b/src/storage/v3/bindings/db_accessor.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/storage/v3/config.hpp b/src/storage/v3/config.hpp index 05179c6b0..868e82f21 100644 --- a/src/storage/v3/config.hpp +++ b/src/storage/v3/config.hpp @@ -30,6 +30,10 @@ struct Config { io::Duration reclamation_interval{}; } gc; + struct Split { + uint64_t max_shard_vertex_size{500'000}; + } split; + struct Items { bool properties_on_edges{true}; } items; diff --git a/src/storage/v3/mvcc.hpp b/src/storage/v3/mvcc.hpp index 9cd5ca3ff..6ce058d62 100644 --- a/src/storage/v3/mvcc.hpp +++ b/src/storage/v3/mvcc.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/storage/v3/property_value.hpp b/src/storage/v3/property_value.hpp index 80fbb409a..56902bed5 100644 --- a/src/storage/v3/property_value.hpp +++ b/src/storage/v3/property_value.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 566029b9c..d2ede3603 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1100,7 +1100,7 @@ void Shard::StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name) { } std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { - if (vertices_.size() > 10000000) { + if (vertices_.size() > config_.split.max_shard_vertex_size) { // Why should we care if the selected vertex is deleted auto mid_elem = vertices_.begin(); // mid_elem->first From 87718aa0845e62e0f5223df79233672e40dd2fc8 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 12:14:30 +0100 Subject: [PATCH 29/79] Remove changes on license date --- src/storage/v3/delta.hpp | 2 -- src/storage/v3/indices.hpp | 4 ++-- src/storage/v3/shard_worker.hpp | 10 +--------- tests/simulation/request_router.cpp | 2 +- tests/simulation/sharded_map.cpp | 2 +- tests/simulation/test_cluster.hpp | 2 +- tests/unit/high_density_shard_create_scan.cpp | 2 +- tests/unit/machine_manager.cpp | 2 +- tests/unit/storage_v3_indices.cpp | 2 +- 9 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index cc8132dc3..40bbb521d 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -14,8 +14,6 @@ #include <cstdint> #include <memory> -#include <boost/uuid/uuid.hpp> - #include "storage/v3/edge_ref.hpp" #include "storage/v3/id_types.hpp" #include "storage/v3/property_value.hpp" diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 761157040..78a3ee072 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -120,7 +120,7 @@ class LabelIndex { [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } - std::map<LabelId, IndexContainer> &GetIndex() { return index_; } + std::map<LabelId, IndexContainer> &GetIndex() noexcept { return index_; } private: std::map<LabelId, IndexContainer> index_; @@ -239,7 +239,7 @@ class LabelPropertyIndex { [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } - std::map<std::pair<LabelId, PropertyId>, IndexContainer> &GetIndex() { return index_; } + std::map<std::pair<LabelId, PropertyId>, IndexContainer> &GetIndex() noexcept { return index_; } private: std::map<std::pair<LabelId, PropertyId>, IndexContainer> index_; diff --git a/src/storage/v3/shard_worker.hpp b/src/storage/v3/shard_worker.hpp index e3d57964f..547aa0a6f 100644 --- a/src/storage/v3/shard_worker.hpp +++ b/src/storage/v3/shard_worker.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -173,14 +173,6 @@ class ShardWorker { auto &rsm = rsm_map_.at(uuid); Time next_for_uuid = rsm.Cron(); - // Check if shard should split - // if (const auto split_info = rsm.ShouldSplit(); split_info) { - // Request split from coordinator - // split_point => middle pk - // shard_id => uuid - // shard_version => - // } - cron_schedule_.pop(); cron_schedule_.push(std::make_pair(next_for_uuid, uuid)); } else { diff --git a/tests/simulation/request_router.cpp b/tests/simulation/request_router.cpp index 037674b66..4248e7876 100644 --- a/tests/simulation/request_router.cpp +++ b/tests/simulation/request_router.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/simulation/sharded_map.cpp b/tests/simulation/sharded_map.cpp index 0b7d10d3d..d27858abc 100644 --- a/tests/simulation/sharded_map.cpp +++ b/tests/simulation/sharded_map.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/simulation/test_cluster.hpp b/tests/simulation/test_cluster.hpp index 791b45faa..2e8bdf92f 100644 --- a/tests/simulation/test_cluster.hpp +++ b/tests/simulation/test_cluster.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/unit/high_density_shard_create_scan.cpp b/tests/unit/high_density_shard_create_scan.cpp index 4a90b98b2..9fabf6ccc 100644 --- a/tests/unit/high_density_shard_create_scan.cpp +++ b/tests/unit/high_density_shard_create_scan.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/unit/machine_manager.cpp b/tests/unit/machine_manager.cpp index 6cc6a3ff1..74b7d3863 100644 --- a/tests/unit/machine_manager.cpp +++ b/tests/unit/machine_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/unit/storage_v3_indices.cpp b/tests/unit/storage_v3_indices.cpp index 2f84f1558..620878fcf 100644 --- a/tests/unit/storage_v3_indices.cpp +++ b/tests/unit/storage_v3_indices.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2022 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source From 412b8c862f288624bcb8c4952f51178416556c8c Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 13:24:41 +0100 Subject: [PATCH 30/79] Add benchmark with gc --- tests/benchmark/storage_v3_split.cpp | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 9b4ed5c0e..4204bbb2c 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -90,9 +90,42 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) } } +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State &state) { + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + + for (int64_t i{0}; i < state.range(0); ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}); + } +} + BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) ->RangeMultiplier(10) - ->Range(100'000, 100'000'000) + ->Range(100'000, 1'000'000) + ->Unit(::benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) + ->RangeMultiplier(10) + ->Range(100'000, 1'000'000) ->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark From 185b87ec85b5a8aaeb6f66d9ae696c9708d01bf6 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 13:33:58 +0100 Subject: [PATCH 31/79] Add final comments --- src/storage/v3/shard.cpp | 3 +-- src/storage/v3/splitter.cpp | 6 +++--- src/storage/v3/splitter.hpp | 6 +++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index d2ede3603..cb46efc34 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1101,9 +1101,7 @@ void Shard::StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name) { std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { if (vertices_.size() > config_.split.max_shard_vertex_size) { - // Why should we care if the selected vertex is deleted auto mid_elem = vertices_.begin(); - // mid_elem->first std::ranges::advance(mid_elem, static_cast<VertexContainer::difference_type>(vertices_.size() / 2)); return SplitInfo{shard_version_, mid_elem->first}; } @@ -1111,6 +1109,7 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { } SplitData Shard::PerformSplit(const PrimaryKey &split_key) { + ++shard_version_; return shard_splitter_.SplitShard(split_key, max_primary_key_); } diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index eb5a29d34..d89392e13 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -206,14 +206,14 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr } case PreviousPtr::Type::VERTEX: { // What if the vertex is already moved to garbage collection... - // TODO(jbajic) Maybe revisit when we apply Garbage collection with new - // transaction management system + // TODO(jbajic) Maybe revisit when we apply Garbage collection with + // new transaction management system auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); cloned_delta->prev.Set(cloned_vertex); break; } case PreviousPtr::Type::EDGE: { - // TODO(jbajic) Case when there are no properties on edge is not handled + // We can never be here if we have properties on edge disabled auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); cloned_delta->prev.Set(&cloned_edge->second); break; diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 2549a280c..d584ab6d0 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -106,9 +106,9 @@ class Splitter final { static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); - static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); From 75b5da9f0707ce9bbe07b8dfffe8f10dc6145e97 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 15:58:56 +0100 Subject: [PATCH 32/79] Add split in shard rsm --- src/query/v2/requests.hpp | 11 +++++++++++ src/storage/v3/shard.cpp | 20 ++++++++++++-------- src/storage/v3/shard.hpp | 8 ++++---- src/storage/v3/shard_rsm.hpp | 16 ++++++++++++++-- src/storage/v3/splitter.cpp | 7 +++++-- src/storage/v3/splitter.hpp | 5 ++++- tests/benchmark/storage_v3_split.cpp | 4 ++-- tests/unit/storage_v3_shard_split.cpp | 14 +++++++------- 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/query/v2/requests.hpp b/src/query/v2/requests.hpp index 2335fea7d..9a0fca2ce 100644 --- a/src/query/v2/requests.hpp +++ b/src/query/v2/requests.hpp @@ -12,6 +12,7 @@ #pragma once #include <chrono> +#include <cstdint> #include <iostream> #include <map> #include <memory> @@ -570,6 +571,16 @@ struct CommitResponse { std::optional<ShardError> error; }; +struct SplitInfo { + PrimaryKey split_key; + uint64_t shard_version; +}; + +struct PerformSplitDataInfo { + PrimaryKey split_key; + uint64_t shard_version; +}; + using ReadRequests = std::variant<ExpandOneRequest, GetPropertiesRequest, ScanVerticesRequest>; using ReadResponses = std::variant<ExpandOneResponse, GetPropertiesResponse, ScanVerticesResponse>; diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index cb46efc34..f8a7e3862 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -342,12 +342,13 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, - const std::unordered_map<uint64_t, std::string> &id_to_name) + const std::unordered_map<uint64_t, std::string> &id_to_name, const uint64_t shard_version) : primary_label_{primary_label}, min_primary_key_{min_primary_key}, max_primary_key_{max_primary_key}, vertices_(std::move(vertices)), edges_(std::move(edges)), + shard_version_(shard_version), schema_validator_{schemas_, name_id_mapper_}, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, @@ -363,11 +364,12 @@ Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<Pr Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, - const std::unordered_map<uint64_t, std::string> &id_to_name) + const std::unordered_map<uint64_t, std::string> &id_to_name, const uint64_t shard_version) : primary_label_{primary_label}, min_primary_key_{min_primary_key}, max_primary_key_{max_primary_key}, vertices_(std::move(vertices)), + shard_version_(shard_version), schema_validator_{schemas_, name_id_mapper_}, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, @@ -386,11 +388,12 @@ std::unique_ptr<Shard> Shard::FromSplitData(SplitData &&split_data) { if (split_data.config.items.properties_on_edges) [[likely]] { return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, split_data.schema, std::move(split_data.vertices), std::move(*split_data.edges), - std::move(split_data.transactions), split_data.config, split_data.id_to_name); + std::move(split_data.transactions), split_data.config, split_data.id_to_name, + split_data.shard_version); } return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, split_data.schema, std::move(split_data.vertices), std::move(split_data.transactions), - split_data.config, split_data.id_to_name); + split_data.config, split_data.id_to_name, split_data.shard_version); } Shard::Accessor::Accessor(Shard &shard, Transaction &transaction) @@ -1103,14 +1106,15 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { if (vertices_.size() > config_.split.max_shard_vertex_size) { auto mid_elem = vertices_.begin(); std::ranges::advance(mid_elem, static_cast<VertexContainer::difference_type>(vertices_.size() / 2)); - return SplitInfo{shard_version_, mid_elem->first}; + return SplitInfo{mid_elem->first, shard_version_}; } return std::nullopt; } -SplitData Shard::PerformSplit(const PrimaryKey &split_key) { - ++shard_version_; - return shard_splitter_.SplitShard(split_key, max_primary_key_); +SplitData Shard::PerformSplit(const PrimaryKey &split_key, const uint64_t shard_version) { + shard_version_ = shard_version; + max_primary_key_ = split_key; + return shard_splitter_.SplitShard(split_key, max_primary_key_, shard_version); } bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 02ba7a2da..8072b1bff 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -177,8 +177,8 @@ struct SchemasInfo { }; struct SplitInfo { - uint64_t shard_version; PrimaryKey split_point; + uint64_t shard_version; }; /// Structure used to return information about the storage. @@ -200,12 +200,12 @@ class Shard final { Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, EdgeContainer &&edges, std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, - const std::unordered_map<uint64_t, std::string> &id_to_name); + const std::unordered_map<uint64_t, std::string> &id_to_name, uint64_t shard_version); Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<PrimaryKey> max_primary_key, std::vector<SchemaProperty> schema, VertexContainer &&vertices, std::map<uint64_t, std::unique_ptr<Transaction>> &&start_logical_id_to_transaction, const Config &config, - const std::unordered_map<uint64_t, std::string> &id_to_name); + const std::unordered_map<uint64_t, std::string> &id_to_name, uint64_t shard_version); Shard(const Shard &) = delete; Shard(Shard &&) noexcept = delete; @@ -381,7 +381,7 @@ class Shard final { std::optional<SplitInfo> ShouldSplit() const noexcept; - SplitData PerformSplit(const PrimaryKey &split_key); + SplitData PerformSplit(const PrimaryKey &split_key, uint64_t shard_version); private: Transaction &GetTransaction(coordinator::Hlc start_timestamp, IsolationLevel isolation_level); diff --git a/src/storage/v3/shard_rsm.hpp b/src/storage/v3/shard_rsm.hpp index ba284a3ca..8a5533c3e 100644 --- a/src/storage/v3/shard_rsm.hpp +++ b/src/storage/v3/shard_rsm.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -18,6 +18,7 @@ #include <openssl/ec.h> #include "query/v2/requests.hpp" #include "storage/v3/shard.hpp" +#include "storage/v3/value_conversions.hpp" #include "storage/v3/vertex_accessor.hpp" namespace memgraph::storage::v3 { @@ -42,7 +43,18 @@ class ShardRsm { public: explicit ShardRsm(std::unique_ptr<Shard> &&shard) : shard_(std::move(shard)){}; - std::optional<SplitInfo> ShouldSplit() const noexcept { return shard_->ShouldSplit(); } + std::optional<msgs::SplitInfo> ShouldSplit() const noexcept { + auto split_info = shard_->ShouldSplit(); + if (split_info) { + return msgs::SplitInfo{conversions::ConvertValueVector(split_info->split_point), split_info->shard_version}; + } + return std::nullopt; + } + + std::unique_ptr<Shard> PerformSplit(msgs::PerformSplitDataInfo perform_split) const noexcept { + return Shard::FromSplitData( + shard_->PerformSplit(conversions::ConvertPropertyVector(perform_split.split_key), perform_split.shard_version)); + } // NOLINTNEXTLINE(readability-convert-member-functions-to-static) msgs::ReadResponses Read(msgs::ReadRequests requests) { diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index d89392e13..c57de0f6e 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -11,6 +11,7 @@ #include "storage/v3/splitter.hpp" +#include <cstdint> #include <map> #include <memory> #include <optional> @@ -41,13 +42,15 @@ Splitter::Splitter(const LabelId primary_label, VertexContainer &vertices, EdgeC schema_(schema), name_id_mapper_(name_id_mapper) {} -SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key) { +SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key, + const uint64_t shard_version) { SplitData data{.primary_label = primary_label_, .min_primary_key = split_key, .max_primary_key = max_primary_key, .schema = schema_, .config = config_, - .id_to_name = name_id_mapper_.GetIdToNameMap()}; + .id_to_name = name_id_mapper_.GetIdToNameMap(), + .shard_version = shard_version}; std::set<uint64_t> collected_transactions_; data.vertices = CollectVertices(data, collected_transactions_, split_key); diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index d584ab6d0..a726b11da 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #pragma once +#include <cstdint> #include <map> #include <memory> #include <optional> @@ -38,6 +39,7 @@ struct SplitData { std::vector<SchemaProperty> schema; Config config; std::unordered_map<uint64_t, std::string> id_to_name; + uint64_t shard_version; VertexContainer vertices; std::optional<EdgeContainer> edges; @@ -58,7 +60,8 @@ class Splitter final { Splitter operator=(Splitter &&) noexcept = delete; ~Splitter() = default; - SplitData SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key); + SplitData SplitShard(const PrimaryKey &split_key, const std::optional<PrimaryKey> &max_primary_key, + uint64_t shard_version); private: VertexContainer CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_start_id, diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 4204bbb2c..fce34ef4a 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -86,7 +86,7 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) acc.Commit(GetNextHlc()); } for (auto _ : state) { - auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}); + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); } } @@ -114,7 +114,7 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State & } storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); for (auto _ : state) { - auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}); + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); } } diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 548670480..7307fdce0 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -136,7 +136,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { auto current_hlc = GetNextHlc(); acc.Commit(current_hlc); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 0); EXPECT_EQ(splitted_data.transactions.size(), 1); @@ -179,7 +179,7 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { auto current_hlc = GetNextHlc(); acc.Commit(current_hlc); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); EXPECT_EQ(splitted_data.transactions.size(), 1); @@ -226,7 +226,7 @@ TEST_F(ShardSplitTest, TestBasicSplitBeforeCommit) { VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) .HasError()); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); EXPECT_EQ(splitted_data.transactions.size(), 1); @@ -257,7 +257,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithCommitedAndOngoingTransactions) { VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) .HasError()); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 2); EXPECT_EQ(splitted_data.transactions.size(), 2); @@ -276,7 +276,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelIndex) { acc.Commit(GetNextHlc()); storage.CreateIndex(secondary_label); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 0); @@ -302,7 +302,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { acc.Commit(GetNextHlc()); storage.CreateIndex(secondary_label, secondary_property); - auto splitted_data = storage.PerformSplit({PropertyValue(4)}); + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 3); EXPECT_EQ(splitted_data.edges->size(), 0); @@ -329,7 +329,7 @@ TEST_F(ShardSplitTest, TestBigSplit) { storage.CreateIndex(secondary_label, secondary_property); const auto split_value = pk / 2; - auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}); + auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); EXPECT_EQ(splitted_data.vertices.size(), 100000); EXPECT_EQ(splitted_data.edges->size(), 50000); From fbf4c0adee12955ac95770832d8d32d5594be943 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 16:01:44 +0100 Subject: [PATCH 33/79] Assert edges --- tests/unit/storage_v3_shard_split.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 7307fdce0..fd32eb0ef 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -63,7 +63,9 @@ class ShardSplitTest : public testing::Test { EXPECT_FALSE(acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD).has_value()); } for (int i{split_value}; i < split_value * 2; ++i) { - EXPECT_TRUE(acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD).has_value()); + const auto vtx = acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD); + ASSERT_TRUE(vtx.has_value()); + EXPECT_TRUE(vtx->InEdges(View::OLD)->size() == 1 || vtx->OutEdges(View::OLD)->size() == 1); } } }; From a30095854e63d9c3a581d89a7fba9d197811340f Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 25 Jan 2023 16:29:16 +0100 Subject: [PATCH 34/79] Add benchmark with specific number of transactions --- tests/benchmark/storage_v3_split.cpp | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index fce34ef4a..dd32d6cdc 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -118,6 +118,37 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State & } } +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::benchmark::State &state) { + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + + for (int64_t i{0}; i < state.range(0); ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + if (i == state.range(0) - state.range(1)) { + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + } + } + + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); + } +} + BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) ->RangeMultiplier(10) ->Range(100'000, 1'000'000) @@ -128,6 +159,13 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) ->Range(100'000, 1'000'000) ->Unit(::benchmark::kMillisecond); +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) + ->Args({100'000, 1'000}) + ->Args({100'000, 10'000}) + ->Args({1'000'000, 1'000}) + ->Args({1'000'000, 10'000}) + ->Unit(::benchmark::kMillisecond); + } // namespace memgraph::benchmark BENCHMARK_MAIN(); From dee88fd7a310dcd77f6e27f3a5a66674ee27aa2c Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 26 Jan 2023 14:26:24 +0100 Subject: [PATCH 35/79] Fix delta not cloning id --- src/storage/v3/delta.hpp | 54 ++++++++++++--------------- src/storage/v3/mvcc.hpp | 6 +-- src/storage/v3/splitter.cpp | 9 +++-- src/storage/v3/transaction.hpp | 19 +++++----- tests/unit/storage_v3_shard_split.cpp | 50 +++++++++++++++++-------- 5 files changed, 76 insertions(+), 62 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 40bbb521d..c4ffc33e1 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -28,6 +28,11 @@ struct Edge; struct Delta; struct CommitInfo; +inline uint64_t GetNextDeltaUUID() noexcept { + static uint64_t uuid{0}; + return ++uuid; +} + // This class stores one of three pointers (`Delta`, `Vertex` and `Edge`) // without using additional memory for storing the type. The type is stored in // the pointer itself in the lower bits. All of those structures contain large @@ -130,11 +135,6 @@ inline bool operator==(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer inline bool operator!=(const PreviousPtr::Pointer &a, const PreviousPtr::Pointer &b) { return !(a == b); } -inline uint64_t GetNextDeltaUUID() noexcept { - static uint64_t uuid{0}; - return ++uuid; -} - struct Delta { enum class Action : uint8_t { // Used for both Vertex and Edge @@ -164,62 +164,54 @@ struct Delta { struct RemoveInEdgeTag {}; struct RemoveOutEdgeTag {}; - Delta(DeleteObjectTag /*unused*/, CommitInfo *commit_info, uint64_t command_id) - : action(Action::DELETE_OBJECT), uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id) {} + Delta(DeleteObjectTag /*unused*/, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) + : action(Action::DELETE_OBJECT), uuid(delta_id), commit_info(commit_info), command_id(command_id) {} - Delta(RecreateObjectTag /*unused*/, CommitInfo *commit_info, uint64_t command_id) - : action(Action::RECREATE_OBJECT), uuid(GetNextDeltaUUID()), commit_info(commit_info), command_id(command_id) {} + Delta(RecreateObjectTag /*unused*/, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) + : action(Action::RECREATE_OBJECT), uuid(delta_id), commit_info(commit_info), command_id(command_id) {} - Delta(AddLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t command_id) - : action(Action::ADD_LABEL), - uuid(GetNextDeltaUUID()), - commit_info(commit_info), - command_id(command_id), - label(label) {} + Delta(AddLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) + : action(Action::ADD_LABEL), uuid(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} - Delta(RemoveLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t command_id) - : action(Action::REMOVE_LABEL), - uuid(GetNextDeltaUUID()), - commit_info(commit_info), - command_id(command_id), - label(label) {} + Delta(RemoveLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) + : action(Action::REMOVE_LABEL), uuid(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} Delta(SetPropertyTag /*unused*/, PropertyId key, const PropertyValue &value, CommitInfo *commit_info, - uint64_t command_id) + uint64_t delta_id, uint64_t command_id) : action(Action::SET_PROPERTY), - uuid(GetNextDeltaUUID()), + uuid(delta_id), commit_info(commit_info), command_id(command_id), property({key, value}) {} Delta(AddInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, - uint64_t command_id) + uint64_t delta_id, uint64_t command_id) : action(Action::ADD_IN_EDGE), - uuid(GetNextDeltaUUID()), + uuid(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} Delta(AddOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, - uint64_t command_id) + uint64_t delta_id, uint64_t command_id) : action(Action::ADD_OUT_EDGE), - uuid(GetNextDeltaUUID()), + uuid(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} Delta(RemoveInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, - uint64_t command_id) + uint64_t delta_id, uint64_t command_id) : action(Action::REMOVE_IN_EDGE), - uuid(GetNextDeltaUUID()), + uuid(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} Delta(RemoveOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, - uint64_t command_id) + uint64_t delta_id, uint64_t command_id) : action(Action::REMOVE_OUT_EDGE), - uuid(GetNextDeltaUUID()), + uuid(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} diff --git a/src/storage/v3/mvcc.hpp b/src/storage/v3/mvcc.hpp index 6ce058d62..f4e4cb81e 100644 --- a/src/storage/v3/mvcc.hpp +++ b/src/storage/v3/mvcc.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -108,7 +108,7 @@ inline bool PrepareForWrite(Transaction *transaction, TObj *object) { /// a `DELETE_OBJECT` delta). /// @throw std::bad_alloc inline Delta *CreateDeleteObjectDelta(Transaction *transaction) { - return &transaction->deltas.emplace_back(Delta::DeleteObjectTag(), transaction->commit_info.get(), + return &transaction->deltas.emplace_back(Delta::DeleteObjectTag(), transaction->commit_info.get(), GetNextDeltaUUID(), transaction->command_id); } @@ -119,7 +119,7 @@ template <typename TObj, class... Args> requires utils::SameAsAnyOf<TObj, Edge, Vertex> inline void CreateAndLinkDelta(Transaction *transaction, TObj *object, Args &&...args) { auto delta = &transaction->deltas.emplace_back(std::forward<Args>(args)..., transaction->commit_info.get(), - transaction->command_id); + GetNextDeltaUUID(), transaction->command_id); auto *delta_holder = GetDeltaHolder(object); // The operations are written in such order so that both `next` and `prev` diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index c57de0f6e..375d3aa60 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -181,16 +181,19 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr // NOTE It is important that the order of delta lists is in same order auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); - while (delta_it != transaction.deltas.end() && cloned_delta_it != cloned_transaction.deltas.end()) { + while (delta_it != transaction.deltas.end()) { const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta != nullptr) { // Align deltas which belong to cloned transaction, skip others if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { - auto *found_delta_it = &*std::ranges::find_if( + const auto end_it = + cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas.end(); + auto found_delta_it = std::ranges::find_if( cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas, [delta](const auto &elem) { return elem.uuid == delta->uuid; }); - MG_ASSERT(found_delta_it, "Delta with given uuid must exist!"); + MG_ASSERT(found_delta_it != end_it, "Delta with given uuid must exist!"); + cloned_delta->next = &*found_delta_it; } else { delta = delta->next; diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 9b13d9c7b..66269e935 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -69,36 +69,37 @@ struct Transaction { for (const auto &delta : deltas) { switch (delta.action) { case Delta::Action::DELETE_OBJECT: - copied_deltas.emplace_back(Delta::DeleteObjectTag{}, commit_info, command_id); + copied_deltas.emplace_back(Delta::DeleteObjectTag{}, commit_info, delta.uuid, command_id); break; case Delta::Action::RECREATE_OBJECT: - copied_deltas.emplace_back(Delta::RecreateObjectTag{}, commit_info, command_id); + copied_deltas.emplace_back(Delta::RecreateObjectTag{}, commit_info, delta.uuid, command_id); break; case Delta::Action::ADD_LABEL: - copied_deltas.emplace_back(Delta::AddLabelTag{}, delta.label, commit_info, command_id); + copied_deltas.emplace_back(Delta::AddLabelTag{}, delta.label, commit_info, delta.uuid, command_id); break; case Delta::Action::REMOVE_LABEL: - copied_deltas.emplace_back(Delta::RemoveLabelTag{}, delta.label, commit_info, command_id); + copied_deltas.emplace_back(Delta::RemoveLabelTag{}, delta.label, commit_info, delta.uuid, command_id); break; case Delta::Action::ADD_IN_EDGE: copied_deltas.emplace_back(Delta::AddInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, command_id); + delta.vertex_edge.edge, commit_info, delta.uuid, command_id); break; case Delta::Action::ADD_OUT_EDGE: copied_deltas.emplace_back(Delta::AddOutEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, command_id); + delta.vertex_edge.edge, commit_info, delta.uuid, command_id); break; case Delta::Action::REMOVE_IN_EDGE: copied_deltas.emplace_back(Delta::RemoveInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, command_id); + delta.vertex_edge.edge, commit_info, delta.uuid, command_id); break; case Delta::Action::REMOVE_OUT_EDGE: copied_deltas.emplace_back(Delta::RemoveOutEdgeTag{}, delta.vertex_edge.edge_type, - delta.vertex_edge.vertex_id, delta.vertex_edge.edge, commit_info, command_id); + delta.vertex_edge.vertex_id, delta.vertex_edge.edge, commit_info, delta.uuid, + command_id); break; case Delta::Action::SET_PROPERTY: copied_deltas.emplace_back(Delta::SetPropertyTag{}, delta.property.key, delta.property.value, commit_info, - command_id); + delta.uuid, command_id); break; } } diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index fd32eb0ef..5878ec0ce 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -83,7 +83,9 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine auto *expected_delta = expected_it->second.delta; auto *actual_delta = actual_it->second.delta; while (expected_delta != nullptr) { + // TODO Enable this then fix delta id generator EXPECT_EQ(actual_delta->action, expected_delta->action); + // EXPECT_EQ(actual_delta->uuid, expected_delta->uuid); switch (expected_delta->action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: { @@ -146,18 +148,19 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; - Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 1}; - Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 2}; - Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 3}; - Delta delta_add_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 4}; - Delta delta_add_property{Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 4}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 5, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 6, 2}; + Delta delta_remove_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 8, 4}; + Delta delta_set_property{Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 7, 4}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 9, 3}; + VertexContainer expected_vertices; expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); auto [it, inserted] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); it->second.labels.push_back(secondary_label); - AddDeltaToDeltaChain(&*it, &delta_add_property); - AddDeltaToDeltaChain(&*it, &delta_add_label); + AddDeltaToDeltaChain(&*it, &delta_set_property); + AddDeltaToDeltaChain(&*it, &delta_remove_label); AssertEqVertexContainer(splitted_data.vertices, expected_vertices); } @@ -189,15 +192,30 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; - Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 1}; - Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 1}; - Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 1}; - Delta delta_add_in_edge1{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(1)}}, - EdgeRef{Gid::FromUint(1)}, &commit_info, 1}; - Delta delta_add_out_edge2{Delta::RemoveOutEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(6)}}, - EdgeRef{Gid::FromUint(2)}, &commit_info, 1}; - Delta delta_add_in_edge2{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(4)}}, - EdgeRef{Gid::FromUint(2)}, &commit_info, 1}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 3, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 4, 1}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 5, 1}; + Delta delta_add_in_edge1{Delta::RemoveInEdgeTag{}, + edge_type_id, + VertexId{primary_label, {PropertyValue(1)}}, + EdgeRef{Gid::FromUint(1)}, + &commit_info, + 13, + 1}; + Delta delta_add_out_edge2{Delta::RemoveOutEdgeTag{}, + edge_type_id, + VertexId{primary_label, {PropertyValue(6)}}, + EdgeRef{Gid::FromUint(2)}, + &commit_info, + 20, + 1}; + Delta delta_add_in_edge2{Delta::RemoveInEdgeTag{}, + edge_type_id, + VertexId{primary_label, {PropertyValue(4)}}, + EdgeRef{Gid::FromUint(2)}, + &commit_info, + 15, + 1}; VertexContainer expected_vertices; auto [vtx4, inserted4] = expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); auto [vtx5, inserted5] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); From c1639ef77004dbaae5489d650271c01c74bbeab0 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 26 Jan 2023 14:57:30 +0100 Subject: [PATCH 36/79] Vertify delta uuid --- tests/unit/storage_v3_shard_split.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 5878ec0ce..3d5954002 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -83,9 +83,8 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine auto *expected_delta = expected_it->second.delta; auto *actual_delta = actual_it->second.delta; while (expected_delta != nullptr) { - // TODO Enable this then fix delta id generator EXPECT_EQ(actual_delta->action, expected_delta->action); - // EXPECT_EQ(actual_delta->uuid, expected_delta->uuid); + EXPECT_EQ(actual_delta->uuid, expected_delta->uuid); switch (expected_delta->action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: { @@ -192,15 +191,15 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; - Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 3, 1}; - Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 4, 1}; - Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 5, 1}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 13, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 14, 1}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 15, 1}; Delta delta_add_in_edge1{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(1)}}, EdgeRef{Gid::FromUint(1)}, &commit_info, - 13, + 18, 1}; Delta delta_add_out_edge2{Delta::RemoveOutEdgeTag{}, edge_type_id, @@ -214,7 +213,7 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { VertexId{primary_label, {PropertyValue(4)}}, EdgeRef{Gid::FromUint(2)}, &commit_info, - 15, + 21, 1}; VertexContainer expected_vertices; auto [vtx4, inserted4] = expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); From be9600835ffdfcb3bc7bd5805f36418f37f03881 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 26 Jan 2023 15:18:30 +0100 Subject: [PATCH 37/79] Make AdjustClonedTransaction static --- src/storage/v3/splitter.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index a726b11da..17e8265eb 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -109,9 +109,9 @@ class Splitter final { static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); - void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); From 78b5731a0e5bd68d877d609852738dab84ea1027 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 30 Jan 2023 10:54:44 +0100 Subject: [PATCH 38/79] Protect delta id --- src/storage/v3/delta.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index c4ffc33e1..95bd5d18a 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -20,6 +20,7 @@ #include "storage/v3/vertex.hpp" #include "storage/v3/vertex_id.hpp" #include "utils/logging.hpp" +#include "utils/synchronized.hpp" namespace memgraph::storage::v3 { @@ -29,8 +30,8 @@ struct Delta; struct CommitInfo; inline uint64_t GetNextDeltaUUID() noexcept { - static uint64_t uuid{0}; - return ++uuid; + static utils::Synchronized<uint64_t, utils::SpinLock> delta_id{0}; + return delta_id.WithLock([](auto id) { return id++; }); } // This class stores one of three pointers (`Delta`, `Vertex` and `Edge`) From 96ac7bd1c5c9a9e78d4183cc07f339313066ab50 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 30 Jan 2023 12:06:09 +0100 Subject: [PATCH 39/79] Move out index splitting --- src/storage/v3/indices.hpp | 67 ++++++++++++++++++++++++++++++++++--- src/storage/v3/splitter.cpp | 7 ++-- src/storage/v3/splitter.hpp | 33 ------------------ 3 files changed, 66 insertions(+), 41 deletions(-) diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 78a3ee072..56555c31e 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -18,6 +18,8 @@ #include <utility> #include "storage/v3/config.hpp" +#include "storage/v3/id_types.hpp" +#include "storage/v3/key_store.hpp" #include "storage/v3/property_value.hpp" #include "storage/v3/transaction.hpp" #include "storage/v3/vertex_accessor.hpp" @@ -40,6 +42,8 @@ class LabelIndex { bool operator==(const Entry &rhs) const { return vertex == rhs.vertex && timestamp == rhs.timestamp; } }; + using IndexType = LabelId; + public: using IndexContainer = std::set<Entry>; @@ -118,9 +122,36 @@ class LabelIndex { void Clear() { index_.clear(); } - [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } + std::map<IndexType, IndexContainer> SplitIndexEntries( + const PrimaryKey &split_key, + std::map<IndexType, std::multimap<const Vertex *, const IndexContainer::iterator>> &vertex_entry_map) { + if (index_.empty()) { + return {}; + } - std::map<LabelId, IndexContainer> &GetIndex() noexcept { return index_; } + // Cloned index entries will contain new index entry iterators, but old + // vertices address which need to be adjusted after extracting vertices + std::map<IndexType, IndexContainer> cloned_indices; + for (auto &[index_type_val, index] : index_) { + auto entry_it = index.begin(); + auto &cloned_indices_container = cloned_indices[index_type_val]; + while (entry_it != index.end()) { + // We need to save the next pointer since the current one will be + // invalidated after extract + auto next_entry_it = std::next(entry_it); + if (entry_it->vertex->first > split_key) { + [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = + cloned_indices_container.insert(index.extract(entry_it)); + MG_ASSERT(inserted, "Failed to extract index entry!"); + + vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); + } + entry_it = next_entry_it; + } + } + + return cloned_indices; + } private: std::map<LabelId, IndexContainer> index_; @@ -141,6 +172,7 @@ class LabelPropertyIndex { bool operator<(const PropertyValue &rhs) const; bool operator==(const PropertyValue &rhs) const; }; + using IndexType = std::pair<LabelId, PropertyId>; public: using IndexContainer = std::set<Entry>; @@ -237,9 +269,36 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } - [[nodiscard]] bool Empty() const noexcept { return index_.empty(); } + std::map<IndexType, IndexContainer> SplitIndexEntries( + const PrimaryKey &split_key, + std::map<IndexType, std::multimap<const Vertex *, const IndexContainer::iterator>> &vertex_entry_map) { + if (index_.empty()) { + return {}; + } - std::map<std::pair<LabelId, PropertyId>, IndexContainer> &GetIndex() noexcept { return index_; } + // Cloned index entries will contain new index entry iterators, but old + // vertices address which need to be adjusted after extracting vertices + std::map<IndexType, IndexContainer> cloned_indices; + for (auto &[index_type_val, index] : index_) { + auto entry_it = index.begin(); + auto &cloned_index_container = cloned_indices[index_type_val]; + while (entry_it != index.end()) { + // We need to save the next pointer since the current one will be + // invalidated after extract + auto next_entry_it = std::next(entry_it); + if (entry_it->vertex->first > split_key) { + [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = + cloned_index_container.insert(index.extract(entry_it)); + MG_ASSERT(inserted, "Failed to extract index entry!"); + + vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); + } + entry_it = next_entry_it; + } + } + + return cloned_indices; + } private: std::map<std::pair<LabelId, PropertyId>, IndexContainer> index_; diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 375d3aa60..1d36f4ff8 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -77,10 +77,9 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c std::multimap<const Vertex *, const LabelPropertyIndex::IndexContainer::iterator>> label_property_vertex_entry_map; - data.label_indices = - CollectIndexEntries<LabelIndex, LabelId>(indices_.label_index, split_key, label_index_vertex_entry_map); - data.label_property_indices = CollectIndexEntries<LabelPropertyIndex, std::pair<LabelId, PropertyId>>( - indices_.label_property_index, split_key, label_property_vertex_entry_map); + data.label_indices = indices_.label_index.SplitIndexEntries(split_key, label_index_vertex_entry_map); + data.label_property_indices = + indices_.label_property_index.SplitIndexEntries(split_key, label_property_vertex_entry_map); // This is needed to replace old vertex pointers in index entries with new ones const auto update_indices = [](auto &entry_vertex_map, auto &updating_index, const auto *old_vertex_ptr, auto &new_vertex_ptr) { diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 17e8265eb..7afa3422c 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -74,39 +74,6 @@ class Splitter final { const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); - template <typename IndexMap, typename IndexType> - requires utils::SameAsAnyOf<IndexMap, LabelPropertyIndex, LabelIndex> - std::map<IndexType, typename IndexMap::IndexContainer> CollectIndexEntries( - IndexMap &index, const PrimaryKey &split_key, - std::map<IndexType, std::multimap<const Vertex *, const typename IndexMap::IndexContainer::iterator>> - &vertex_entry_map) { - if (index.Empty()) { - return {}; - } - - // Cloned index entries will contain new index entry iterators, but old - // vertices address which need to be adjusted after extracting vertices - std::map<IndexType, typename IndexMap::IndexContainer> cloned_indices; - for (auto &[index_type_val, index] : index.GetIndex()) { - auto entry_it = index.begin(); - while (entry_it != index.end()) { - // We need to save the next pointer since the current one will be - // invalidated after extract - auto next_entry_it = std::next(entry_it); - if (entry_it->vertex->first > split_key) { - [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = - cloned_indices[index_type_val].insert(index.extract(entry_it)); - MG_ASSERT(inserted, "Failed to extract index entry!"); - - vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); - } - entry_it = next_entry_it; - } - } - - return cloned_indices; - } - static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, From c2ec41cb37547fe4b642830b2efa99352b834418 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 30 Jan 2023 12:39:22 +0100 Subject: [PATCH 40/79] Rename ptr to iterator --- src/storage/v3/splitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 1d36f4ff8..3fb865ee8 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -82,12 +82,12 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c indices_.label_property_index.SplitIndexEntries(split_key, label_property_vertex_entry_map); // This is needed to replace old vertex pointers in index entries with new ones const auto update_indices = [](auto &entry_vertex_map, auto &updating_index, const auto *old_vertex_ptr, - auto &new_vertex_ptr) { + auto &new_vertex_it) { for ([[maybe_unused]] auto &[index_type, vertex_entry_mappings] : entry_vertex_map) { auto [it, end] = vertex_entry_mappings.equal_range(old_vertex_ptr); while (it != end) { auto entry_to_update = *it->second; - entry_to_update.vertex = &*new_vertex_ptr; + entry_to_update.vertex = &*new_vertex_it; updating_index.at(index_type).erase(it->second); updating_index.at(index_type).insert(std::move(entry_to_update)); ++it; From 97cab6650b0be34807b294be5b8fe733486232c5 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 31 Jan 2023 15:21:39 +0100 Subject: [PATCH 41/79] Remove redundant index update --- src/storage/v3/delta.hpp | 2 +- src/storage/v3/indices.hpp | 13 ++++------- src/storage/v3/splitter.cpp | 32 ++------------------------- tests/unit/storage_v3_shard_split.cpp | 22 +++++++++--------- 4 files changed, 18 insertions(+), 51 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 95bd5d18a..7b8916f6d 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -31,7 +31,7 @@ struct CommitInfo; inline uint64_t GetNextDeltaUUID() noexcept { static utils::Synchronized<uint64_t, utils::SpinLock> delta_id{0}; - return delta_id.WithLock([](auto id) { return id++; }); + return delta_id.WithLock([](auto &id) { return id++; }); } // This class stores one of three pointers (`Delta`, `Vertex` and `Edge`) diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 56555c31e..99023183b 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -122,9 +122,7 @@ class LabelIndex { void Clear() { index_.clear(); } - std::map<IndexType, IndexContainer> SplitIndexEntries( - const PrimaryKey &split_key, - std::map<IndexType, std::multimap<const Vertex *, const IndexContainer::iterator>> &vertex_entry_map) { + std::map<IndexType, IndexContainer> SplitIndexEntries(const PrimaryKey &split_key) { if (index_.empty()) { return {}; } @@ -144,7 +142,7 @@ class LabelIndex { cloned_indices_container.insert(index.extract(entry_it)); MG_ASSERT(inserted, "Failed to extract index entry!"); - vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); + // vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); } entry_it = next_entry_it; } @@ -269,9 +267,7 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } - std::map<IndexType, IndexContainer> SplitIndexEntries( - const PrimaryKey &split_key, - std::map<IndexType, std::multimap<const Vertex *, const IndexContainer::iterator>> &vertex_entry_map) { + std::map<IndexType, IndexContainer> SplitIndexEntries(const PrimaryKey &split_key) { if (index_.empty()) { return {}; } @@ -290,8 +286,7 @@ class LabelPropertyIndex { [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = cloned_index_container.insert(index.extract(entry_it)); MG_ASSERT(inserted, "Failed to extract index entry!"); - - vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); + // vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); } entry_it = next_entry_it; } diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 3fb865ee8..28b8c3a83 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -69,31 +69,8 @@ void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, Delta *de VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &collected_transactions_, const PrimaryKey &split_key) { - // Collection of indices is here since it heavily depends on vertices - // Old vertex pointer new entry pointer - std::map<LabelId, std::multimap<const Vertex *, const LabelIndex::IndexContainer::iterator>> - label_index_vertex_entry_map; - std::map<std::pair<LabelId, PropertyId>, - std::multimap<const Vertex *, const LabelPropertyIndex::IndexContainer::iterator>> - label_property_vertex_entry_map; - - data.label_indices = indices_.label_index.SplitIndexEntries(split_key, label_index_vertex_entry_map); - data.label_property_indices = - indices_.label_property_index.SplitIndexEntries(split_key, label_property_vertex_entry_map); - // This is needed to replace old vertex pointers in index entries with new ones - const auto update_indices = [](auto &entry_vertex_map, auto &updating_index, const auto *old_vertex_ptr, - auto &new_vertex_it) { - for ([[maybe_unused]] auto &[index_type, vertex_entry_mappings] : entry_vertex_map) { - auto [it, end] = vertex_entry_mappings.equal_range(old_vertex_ptr); - while (it != end) { - auto entry_to_update = *it->second; - entry_to_update.vertex = &*new_vertex_it; - updating_index.at(index_type).erase(it->second); - updating_index.at(index_type).insert(std::move(entry_to_update)); - ++it; - } - } - }; + data.label_indices = indices_.label_index.SplitIndexEntries(split_key); + data.label_property_indices = indices_.label_property_index.SplitIndexEntries(split_key); VertexContainer splitted_data; auto split_key_it = vertices_.find(split_key); @@ -101,16 +78,11 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c // Go through deltas and pick up transactions start_id/commit_id ScanDeltas(collected_transactions_, split_key_it->second.delta); - const auto *old_vertex_ptr = &*split_key_it; auto next_it = std::next(split_key_it); const auto &[splitted_vertex_it, inserted, node] = splitted_data.insert(vertices_.extract(split_key_it->first)); MG_ASSERT(inserted, "Failed to extract vertex!"); - // Update indices - update_indices(label_index_vertex_entry_map, data.label_indices, old_vertex_ptr, splitted_vertex_it); - update_indices(label_property_vertex_entry_map, data.label_property_indices, old_vertex_ptr, splitted_vertex_it); - split_key_it = next_it; } return splitted_data; diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 3d5954002..7e7a917fe 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -147,11 +147,11 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; - Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 5, 1}; - Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 6, 2}; - Delta delta_remove_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 8, 4}; - Delta delta_set_property{Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 7, 4}; - Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 9, 3}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 4, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 5, 2}; + Delta delta_remove_label{Delta::RemoveLabelTag{}, secondary_label, &commit_info, 7, 4}; + Delta delta_set_property{Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 6, 4}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 8, 3}; VertexContainer expected_vertices; expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); @@ -191,29 +191,29 @@ TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { EXPECT_EQ(splitted_data.label_property_indices.size(), 0); CommitInfo commit_info{.start_or_commit_timestamp = current_hlc}; - Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 13, 1}; - Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 14, 1}; - Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 15, 1}; + Delta delta_delete1{Delta::DeleteObjectTag{}, &commit_info, 12, 1}; + Delta delta_delete2{Delta::DeleteObjectTag{}, &commit_info, 13, 1}; + Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 14, 1}; Delta delta_add_in_edge1{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(1)}}, EdgeRef{Gid::FromUint(1)}, &commit_info, - 18, + 17, 1}; Delta delta_add_out_edge2{Delta::RemoveOutEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(6)}}, EdgeRef{Gid::FromUint(2)}, &commit_info, - 20, + 19, 1}; Delta delta_add_in_edge2{Delta::RemoveInEdgeTag{}, edge_type_id, VertexId{primary_label, {PropertyValue(4)}}, EdgeRef{Gid::FromUint(2)}, &commit_info, - 21, + 20, 1}; VertexContainer expected_vertices; auto [vtx4, inserted4] = expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); From c9290777f51bfd9f8b28c16b07af4f0e582eb3e9 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 11:04:37 +0100 Subject: [PATCH 42/79] Fix delta assign --- src/storage/v3/splitter.cpp | 58 +++++++++++++++++++++++++++---------- src/storage/v3/splitter.hpp | 3 ++ 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 28b8c3a83..d61b7a0a9 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -152,24 +152,26 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr // NOTE It is important that the order of delta lists is in same order auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); + while (delta_it != transaction.deltas.end()) { const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; - while (delta != nullptr) { - // Align deltas which belong to cloned transaction, skip others - if (cloned_transactions.contains(delta->commit_info->start_or_commit_timestamp.logical_id)) { - const auto end_it = - cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas.end(); - auto found_delta_it = std::ranges::find_if( - cloned_transactions.at(delta->commit_info->start_or_commit_timestamp.logical_id)->deltas, - [delta](const auto &elem) { return elem.uuid == delta->uuid; }); - MG_ASSERT(found_delta_it != end_it, "Delta with given uuid must exist!"); + while (delta->next != nullptr) { + // Align next ptr + // Get cloned_delta->next transaction, using delta->next original transaction + AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); + // auto cloned_transaction_it = + // cloned_transactions.find(delta->next->commit_info->start_or_commit_timestamp.logical_id); + // MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + // // Find cloned delta in delta list of cloned transaction + // auto found_cloned_delta_it = std::ranges::find_if( + // cloned_transaction_it->second->deltas, [delta](const auto &elem) { return elem.uuid == delta->next->uuid; + // }); + // MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), + // "Delta with given uuid must exist!"); + + // cloned_delta->next = &*found_cloned_delta_it; - cloned_delta->next = &*found_delta_it; - } else { - delta = delta->next; - continue; - } // Align prev ptr auto ptr = delta->prev.Get(); switch (ptr.type) { @@ -178,8 +180,18 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr break; } case PreviousPtr::Type::DELTA: { - cloned_delta->prev.Set(ptr.delta); - break; + auto cloned_transaction_it = + cloned_transactions.find(ptr.delta->commit_info->start_or_commit_timestamp.logical_id); + MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + // Find cloned delta in delta list of cloned transaction + auto found_cloned_delta_it = + std::ranges::find_if(cloned_transaction_it->second->deltas, + [delta = ptr.delta](const auto &elem) { return elem.uuid == delta->next->uuid; }); + MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), + "Delta with given uuid must exist!"); + + // cloned_delta->next = &*found_cloned_delta_it; + ptr.delta.break; } case PreviousPtr::Type::VERTEX: { // What if the vertex is already moved to garbage collection... @@ -208,4 +220,18 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr "Both iterators must be exhausted!"); } +void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { + // Get cloned_delta->next transaction, using delta->next original transaction + auto cloned_transaction_it = + cloned_transactions.find(original.next->commit_info->start_or_commit_timestamp.logical_id); + MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + // Find cloned delta in delta list of cloned transaction + auto found_cloned_delta_it = + std::ranges::find_if(cloned_transaction_it->second->deltas, + [&original](const auto &elem) { return elem.uuid == original.next->uuid; }); + MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), "Delta with given uuid must exist!"); + cloned.next = &*found_cloned_delta_it; +} + } // namespace memgraph::storage::v3 diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 7afa3422c..06af62ffe 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -83,6 +83,9 @@ class Splitter final { void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + static void AdjustDeltaNext(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); + const LabelId primary_label_; VertexContainer &vertices_; EdgeContainer &edges_; From 852e98fb516f6486d3621a7831d170dc11fbf44b Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 14:00:08 +0100 Subject: [PATCH 43/79] Fix split logic --- src/storage/v3/splitter.cpp | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index d61b7a0a9..e5a8b9dd0 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -160,17 +160,6 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr // Align next ptr // Get cloned_delta->next transaction, using delta->next original transaction AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); - // auto cloned_transaction_it = - // cloned_transactions.find(delta->next->commit_info->start_or_commit_timestamp.logical_id); - // MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); - // // Find cloned delta in delta list of cloned transaction - // auto found_cloned_delta_it = std::ranges::find_if( - // cloned_transaction_it->second->deltas, [delta](const auto &elem) { return elem.uuid == delta->next->uuid; - // }); - // MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), - // "Delta with given uuid must exist!"); - - // cloned_delta->next = &*found_cloned_delta_it; // Align prev ptr auto ptr = delta->prev.Get(); @@ -180,18 +169,24 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr break; } case PreviousPtr::Type::DELTA: { - auto cloned_transaction_it = - cloned_transactions.find(ptr.delta->commit_info->start_or_commit_timestamp.logical_id); + // Same as for deltas except don't align next but prev + // auto cloned_transaction_it = + // cloned_transactions.find(ptr.delta->commit_info->start_or_commit_timestamp.logical_id); + auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&ptr](const auto &elem) { + return elem.second->start_timestamp == ptr.delta->commit_info->start_or_commit_timestamp || + elem.second->commit_info->start_or_commit_timestamp == + ptr.delta->commit_info->start_or_commit_timestamp; + }); MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); // Find cloned delta in delta list of cloned transaction auto found_cloned_delta_it = std::ranges::find_if(cloned_transaction_it->second->deltas, - [delta = ptr.delta](const auto &elem) { return elem.uuid == delta->next->uuid; }); + [delta = ptr.delta](const auto &elem) { return elem.uuid == delta->uuid; }); MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), "Delta with given uuid must exist!"); - // cloned_delta->next = &*found_cloned_delta_it; - ptr.delta.break; + cloned_delta->prev.Set(&*found_cloned_delta_it); + break; } case PreviousPtr::Type::VERTEX: { // What if the vertex is already moved to garbage collection... @@ -223,8 +218,11 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { // Get cloned_delta->next transaction, using delta->next original transaction - auto cloned_transaction_it = - cloned_transactions.find(original.next->commit_info->start_or_commit_timestamp.logical_id); + auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&original](const auto &elem) { + return elem.second->start_timestamp == original.next->commit_info->start_or_commit_timestamp || + elem.second->commit_info->start_or_commit_timestamp == original.next->commit_info->start_or_commit_timestamp; + }); + MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); // Find cloned delta in delta list of cloned transaction auto found_cloned_delta_it = From 8f660b0d4432b44b82452acf736baa16d33326cf Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 15:05:04 +0100 Subject: [PATCH 44/79] Add noexcept to hlc --- src/coordinator/hybrid_logical_clock.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coordinator/hybrid_logical_clock.hpp b/src/coordinator/hybrid_logical_clock.hpp index 38571dc20..2fe4880b7 100644 --- a/src/coordinator/hybrid_logical_clock.hpp +++ b/src/coordinator/hybrid_logical_clock.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -23,17 +23,17 @@ namespace memgraph::coordinator { using Time = memgraph::io::Time; /// Hybrid-logical clock -struct Hlc { - uint64_t logical_id = 0; +struct Hlc final { + uint64_t logical_id{0}; Time coordinator_wall_clock = Time::min(); auto operator<=>(const Hlc &other) const { return logical_id <=> other.logical_id; } - bool operator==(const Hlc &other) const = default; - bool operator<(const Hlc &other) const = default; - bool operator==(const uint64_t other) const { return logical_id == other; } - bool operator<(const uint64_t other) const { return logical_id < other; } - bool operator>=(const uint64_t other) const { return logical_id >= other; } + bool operator==(const Hlc &other) const noexcept = default; + bool operator<(const Hlc &other) const noexcept = default; + bool operator==(const uint64_t other) const noexcept { return logical_id == other; } + bool operator<(const uint64_t other) const noexcept { return logical_id < other; } + bool operator>=(const uint64_t other) const noexcept { return logical_id >= other; } friend std::ostream &operator<<(std::ostream &in, const Hlc &hlc) { auto wall_clock = std::chrono::system_clock::to_time_t(hlc.coordinator_wall_clock); From bf44618b7a18b43b10b6b9e78cc21cd5b0c0b4c8 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 15:50:03 +0100 Subject: [PATCH 45/79] Compare prev ptr as well --- src/storage/v3/splitter.cpp | 4 +- tests/unit/storage_v3_shard_split.cpp | 115 +++++++++++++++++++------- 2 files changed, 87 insertions(+), 32 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index e5a8b9dd0..363da3e5f 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -154,6 +154,8 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end()) { + // Only start iterating through deltas that are head of delta chain + // => they have prev pointer to vertex/edge const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta->next != nullptr) { @@ -170,8 +172,6 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr } case PreviousPtr::Type::DELTA: { // Same as for deltas except don't align next but prev - // auto cloned_transaction_it = - // cloned_transactions.find(ptr.delta->commit_info->start_or_commit_timestamp.logical_id); auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&ptr](const auto &elem) { return elem.second->start_timestamp == ptr.delta->commit_info->start_or_commit_timestamp || elem.second->commit_info->start_or_commit_timestamp == diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 7e7a917fe..22e849d23 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -82,9 +82,12 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine auto *expected_delta = expected_it->second.delta; auto *actual_delta = actual_it->second.delta; + // This asserts delta chain while (expected_delta != nullptr) { EXPECT_EQ(actual_delta->action, expected_delta->action); EXPECT_EQ(actual_delta->uuid, expected_delta->uuid); + EXPECT_NE(&actual_delta, &expected_delta) << "Deltas must be different objects!"; + switch (expected_delta->action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: { @@ -105,6 +108,33 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine break; } } + + const auto expected_prev = expected_delta->prev.Get(); + const auto actual_prev = actual_delta->prev.Get(); + switch (expected_prev.type) { + case PreviousPtr::Type::NULLPTR: { + ASSERT_EQ(actual_prev.type, PreviousPtr::Type::NULLPTR) << "Expected type is nullptr!"; + break; + } + case PreviousPtr::Type::DELTA: { + ASSERT_EQ(actual_prev.type, PreviousPtr::Type::DELTA) << "Expected type is delta!"; + EXPECT_EQ(actual_prev.delta->action, expected_prev.delta->action); + EXPECT_EQ(actual_prev.delta->uuid, expected_prev.delta->uuid); + EXPECT_NE(actual_prev.delta, expected_prev.delta) << "Prev deltas must be different objects!"; + break; + } + case v3::PreviousPtr::Type::EDGE: { + ASSERT_EQ(actual_prev.type, PreviousPtr::Type::EDGE) << "Expected type is edge!"; + EXPECT_EQ(actual_prev.edge->gid, expected_prev.edge->gid); + break; + } + case v3::PreviousPtr::Type::VERTEX: { + ASSERT_EQ(actual_prev.type, PreviousPtr::Type::VERTEX) << "Expected type is vertex!"; + EXPECT_EQ(actual_prev.vertex->first, expected_prev.vertex->first); + break; + } + } + expected_delta = expected_delta->next; actual_delta = actual_delta->next; } @@ -115,6 +145,17 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine } } +void AssertEqDeltaLists(const std::list<Delta> &actual, const std::list<Delta> &expected) { + ASSERT_EQ(actual.size(), expected.size()); + auto actual_it = actual.begin(); + auto expected_it = expected.begin(); + while (actual_it != actual.end()) { + EXPECT_EQ(actual_it->action, expected_it->action); + EXPECT_EQ(actual_it->uuid, expected_it->uuid); + EXPECT_NE(&*actual_it, &*expected_it) << "Deltas must be different objects!"; + } +} + void AddDeltaToDeltaChain(Vertex *object, Delta *new_delta) { auto *delta_holder = GetDeltaHolder(object); @@ -154,14 +195,27 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { Delta delta_delete3{Delta::DeleteObjectTag{}, &commit_info, 8, 3}; VertexContainer expected_vertices; - expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); - auto [it, inserted] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); - expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); - it->second.labels.push_back(secondary_label); - AddDeltaToDeltaChain(&*it, &delta_set_property); - AddDeltaToDeltaChain(&*it, &delta_remove_label); + auto [it_4, inserted1] = expected_vertices.emplace(PrimaryKey{PropertyValue{4}}, VertexData(&delta_delete1)); + delta_delete1.prev.Set(&*it_4); + auto [it_5, inserted2] = expected_vertices.emplace(PrimaryKey{PropertyValue{5}}, VertexData(&delta_delete2)); + delta_delete2.prev.Set(&*it_5); + auto [it_6, inserted3] = expected_vertices.emplace(PrimaryKey{PropertyValue{6}}, VertexData(&delta_delete3)); + delta_delete3.prev.Set(&*it_6); + it_5->second.labels.push_back(secondary_label); + AddDeltaToDeltaChain(&*it_5, &delta_set_property); + AddDeltaToDeltaChain(&*it_5, &delta_remove_label); AssertEqVertexContainer(splitted_data.vertices, expected_vertices); + + // This is to ensure that the transaction that we have don't point to invalid + // object on the other shard + std::list<Delta> expected_deltas; + expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 4, 1); + expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 5, 2); + expected_deltas.emplace_back(Delta::RemoveLabelTag{}, secondary_label, &commit_info, 7, 4); + expected_deltas.emplace_back(Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 6, 4); + expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 8, 3); + // AssertEqDeltaLists(splitted_data.transactions.begin()->second->deltas, expected_deltas); } TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { @@ -330,33 +384,34 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { EXPECT_EQ(splitted_data.label_property_indices.size(), 1); } -TEST_F(ShardSplitTest, TestBigSplit) { - int pk{0}; - for (int64_t i{0}; i < 100000; ++i) { - auto acc = storage.Access(GetNextHlc()); - EXPECT_FALSE( - acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, PropertyValue(i)}}) - .HasError()); - EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); +// TEST_F(ShardSplitTest, TestBigSplit) { +// int pk{0}; +// for (int64_t i{0}; i < 10'000; ++i) { +// auto acc = storage.Access(GetNextHlc()); +// EXPECT_FALSE( +// acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, +// PropertyValue(i)}}) +// .HasError()); +// EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(pk - 2)}}, - VertexId{primary_label, PrimaryKey{PropertyValue(pk - 1)}}, edge_type_id, - Gid::FromUint(pk)) - .HasError()); - acc.Commit(GetNextHlc()); - } - storage.CreateIndex(secondary_label, secondary_property); +// EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(pk - 2)}}, +// VertexId{primary_label, PrimaryKey{PropertyValue(pk - 1)}}, edge_type_id, +// Gid::FromUint(pk)) +// .HasError()); +// acc.Commit(GetNextHlc()); +// } +// storage.CreateIndex(secondary_label, secondary_property); - const auto split_value = pk / 2; - auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); +// const auto split_value = pk / 2; +// auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); - EXPECT_EQ(splitted_data.vertices.size(), 100000); - EXPECT_EQ(splitted_data.edges->size(), 50000); - EXPECT_EQ(splitted_data.transactions.size(), 50000); - EXPECT_EQ(splitted_data.label_indices.size(), 0); - EXPECT_EQ(splitted_data.label_property_indices.size(), 1); +// EXPECT_EQ(splitted_data.vertices.size(), 100000); +// EXPECT_EQ(splitted_data.edges->size(), 50000); +// EXPECT_EQ(splitted_data.transactions.size(), 50000); +// EXPECT_EQ(splitted_data.label_indices.size(), 0); +// EXPECT_EQ(splitted_data.label_property_indices.size(), 1); - AssertSplittedShard(std::move(splitted_data), split_value); -} +// AssertSplittedShard(std::move(splitted_data), split_value); +// } } // namespace memgraph::storage::v3::tests From 4619a87e98033471562b5910624e0c261419d37b Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 16:07:16 +0100 Subject: [PATCH 46/79] Only follow head of chain --- src/storage/v3/splitter.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 363da3e5f..87e672da4 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -18,6 +18,7 @@ #include <set> #include "storage/v3/config.hpp" +#include "storage/v3/delta.hpp" #include "storage/v3/id_types.hpp" #include "storage/v3/indices.hpp" #include "storage/v3/key_store.hpp" @@ -154,6 +155,13 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end()) { + // We can safely ignore deltas which are not head of delta chain + if (delta_it->prev.Get().type == PreviousPtr::Type::DELTA || + delta_it->prev.Get().type == PreviousPtr::Type::NULLPTR) { + ++delta_it; + ++cloned_delta_it; + continue; + } // Only start iterating through deltas that are head of delta chain // => they have prev pointer to vertex/edge const auto *delta = &*delta_it; From a95bac65c6acdd851ac09ef1a521f7c0b52898c9 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 2 Feb 2023 16:39:00 +0100 Subject: [PATCH 47/79] Extract logic of checking delta skips --- src/storage/v3/splitter.cpp | 36 +++++++++++++++++++++++++----------- src/storage/v3/splitter.hpp | 8 +++++--- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 87e672da4..d54f8d1a1 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -56,7 +56,7 @@ SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional< std::set<uint64_t> collected_transactions_; data.vertices = CollectVertices(data, collected_transactions_, split_key); data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); - data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges); + data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges, split_key); return data; } @@ -121,7 +121,8 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte } std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( - const std::set<uint64_t> &collected_transactions_, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + const std::set<uint64_t> &collected_transactions_, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key) { std::map<uint64_t, std::unique_ptr<Transaction>> transactions; for (const auto &[commit_start, transaction] : start_logical_id_to_transaction_) { @@ -134,36 +135,49 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr - AdjustClonedTransactions(transactions, cloned_vertices, cloned_edges); + AdjustClonedTransactions(transactions, cloned_vertices, cloned_edges, split_key); return transactions; } void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key) { for (auto &[commit_start, cloned_transaction] : cloned_transactions) { AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, - cloned_vertices, cloned_edges); + cloned_vertices, cloned_edges, split_key); } } +inline bool IsDeltaHeadOfChain(const PreviousPtr::Type &delta_type) { + return delta_type == PreviousPtr::Type::VERTEX || delta_type == PreviousPtr::Type::EDGE; +} + +bool IsDelta(const PreviousPtr::Type &delta_type) { + return delta_type == PreviousPtr::Type::VERTEX || delta_type == PreviousPtr::Type::EDGE; +} + +bool DoesPrevPtrPointsToSplittedData(const PreviousPtr::Pointer &prev_ptr, const PrimaryKey &split_key) { + return prev_ptr.type == PreviousPtr::Type::VERTEX && prev_ptr.vertex->first < split_key; +} + void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges) { + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key) { // Align next and prev in deltas - // NOTE It is important that the order of delta lists is in same order auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end()) { // We can safely ignore deltas which are not head of delta chain - if (delta_it->prev.Get().type == PreviousPtr::Type::DELTA || - delta_it->prev.Get().type == PreviousPtr::Type::NULLPTR) { + // Dont' adjust delta chain that points to irrelevant data vertices/edges + if (const auto delta_prev = delta_it->prev.Get(); + !IsDeltaHeadOfChain(delta_prev.type) && !DoesPrevPtrPointsToSplittedData(delta_prev, split_key)) { ++delta_it; ++cloned_delta_it; continue; } - // Only start iterating through deltas that are head of delta chain - // => they have prev pointer to vertex/edge + const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; while (delta->next != nullptr) { diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 06af62ffe..2053303e3 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -72,16 +72,18 @@ class Splitter final { std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges); + EdgeContainer &cloned_edges, const PrimaryKey &split_key); static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key); void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key); static void AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); From 90bcdc4e2ba3f95741b3e7f7ec2ac227dcff1286 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 3 Feb 2023 13:42:28 +0100 Subject: [PATCH 48/79] Rename uuid to id --- src/storage/v3/delta.hpp | 22 +++++------ src/storage/v3/mvcc.hpp | 4 +- src/storage/v3/transaction.hpp | 18 ++++----- tests/unit/storage_v3_shard_split.cpp | 57 +++++++++++++-------------- 4 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 7b8916f6d..b8bea789d 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -29,7 +29,7 @@ struct Edge; struct Delta; struct CommitInfo; -inline uint64_t GetNextDeltaUUID() noexcept { +inline uint64_t GetNextDeltaId() noexcept { static utils::Synchronized<uint64_t, utils::SpinLock> delta_id{0}; return delta_id.WithLock([](auto &id) { return id++; }); } @@ -166,21 +166,21 @@ struct Delta { struct RemoveOutEdgeTag {}; Delta(DeleteObjectTag /*unused*/, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) - : action(Action::DELETE_OBJECT), uuid(delta_id), commit_info(commit_info), command_id(command_id) {} + : action(Action::DELETE_OBJECT), id(delta_id), commit_info(commit_info), command_id(command_id) {} Delta(RecreateObjectTag /*unused*/, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) - : action(Action::RECREATE_OBJECT), uuid(delta_id), commit_info(commit_info), command_id(command_id) {} + : action(Action::RECREATE_OBJECT), id(delta_id), commit_info(commit_info), command_id(command_id) {} Delta(AddLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) - : action(Action::ADD_LABEL), uuid(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} + : action(Action::ADD_LABEL), id(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} Delta(RemoveLabelTag /*unused*/, LabelId label, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) - : action(Action::REMOVE_LABEL), uuid(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} + : action(Action::REMOVE_LABEL), id(delta_id), commit_info(commit_info), command_id(command_id), label(label) {} Delta(SetPropertyTag /*unused*/, PropertyId key, const PropertyValue &value, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) : action(Action::SET_PROPERTY), - uuid(delta_id), + id(delta_id), commit_info(commit_info), command_id(command_id), property({key, value}) {} @@ -188,7 +188,7 @@ struct Delta { Delta(AddInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) : action(Action::ADD_IN_EDGE), - uuid(delta_id), + id(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -196,7 +196,7 @@ struct Delta { Delta(AddOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) : action(Action::ADD_OUT_EDGE), - uuid(delta_id), + id(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -204,7 +204,7 @@ struct Delta { Delta(RemoveInEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) : action(Action::REMOVE_IN_EDGE), - uuid(delta_id), + id(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -212,7 +212,7 @@ struct Delta { Delta(RemoveOutEdgeTag /*unused*/, EdgeTypeId edge_type, VertexId vertex_id, EdgeRef edge, CommitInfo *commit_info, uint64_t delta_id, uint64_t command_id) : action(Action::REMOVE_OUT_EDGE), - uuid(delta_id), + id(delta_id), commit_info(commit_info), command_id(command_id), vertex_edge({edge_type, std::move(vertex_id), edge}) {} @@ -242,7 +242,7 @@ struct Delta { } Action action; - uint64_t uuid; + uint64_t id; // TODO: optimize with in-place copy CommitInfo *commit_info; uint64_t command_id; diff --git a/src/storage/v3/mvcc.hpp b/src/storage/v3/mvcc.hpp index f4e4cb81e..797339f8b 100644 --- a/src/storage/v3/mvcc.hpp +++ b/src/storage/v3/mvcc.hpp @@ -108,7 +108,7 @@ inline bool PrepareForWrite(Transaction *transaction, TObj *object) { /// a `DELETE_OBJECT` delta). /// @throw std::bad_alloc inline Delta *CreateDeleteObjectDelta(Transaction *transaction) { - return &transaction->deltas.emplace_back(Delta::DeleteObjectTag(), transaction->commit_info.get(), GetNextDeltaUUID(), + return &transaction->deltas.emplace_back(Delta::DeleteObjectTag(), transaction->commit_info.get(), GetNextDeltaId(), transaction->command_id); } @@ -119,7 +119,7 @@ template <typename TObj, class... Args> requires utils::SameAsAnyOf<TObj, Edge, Vertex> inline void CreateAndLinkDelta(Transaction *transaction, TObj *object, Args &&...args) { auto delta = &transaction->deltas.emplace_back(std::forward<Args>(args)..., transaction->commit_info.get(), - GetNextDeltaUUID(), transaction->command_id); + GetNextDeltaId(), transaction->command_id); auto *delta_holder = GetDeltaHolder(object); // The operations are written in such order so that both `next` and `prev` diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 66269e935..8cb4225ee 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -69,37 +69,37 @@ struct Transaction { for (const auto &delta : deltas) { switch (delta.action) { case Delta::Action::DELETE_OBJECT: - copied_deltas.emplace_back(Delta::DeleteObjectTag{}, commit_info, delta.uuid, command_id); + copied_deltas.emplace_back(Delta::DeleteObjectTag{}, commit_info, delta.id, command_id); break; case Delta::Action::RECREATE_OBJECT: - copied_deltas.emplace_back(Delta::RecreateObjectTag{}, commit_info, delta.uuid, command_id); + copied_deltas.emplace_back(Delta::RecreateObjectTag{}, commit_info, delta.id, command_id); break; case Delta::Action::ADD_LABEL: - copied_deltas.emplace_back(Delta::AddLabelTag{}, delta.label, commit_info, delta.uuid, command_id); + copied_deltas.emplace_back(Delta::AddLabelTag{}, delta.label, commit_info, delta.id, command_id); break; case Delta::Action::REMOVE_LABEL: - copied_deltas.emplace_back(Delta::RemoveLabelTag{}, delta.label, commit_info, delta.uuid, command_id); + copied_deltas.emplace_back(Delta::RemoveLabelTag{}, delta.label, commit_info, delta.id, command_id); break; case Delta::Action::ADD_IN_EDGE: copied_deltas.emplace_back(Delta::AddInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, delta.uuid, command_id); + delta.vertex_edge.edge, commit_info, delta.id, command_id); break; case Delta::Action::ADD_OUT_EDGE: copied_deltas.emplace_back(Delta::AddOutEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, delta.uuid, command_id); + delta.vertex_edge.edge, commit_info, delta.id, command_id); break; case Delta::Action::REMOVE_IN_EDGE: copied_deltas.emplace_back(Delta::RemoveInEdgeTag{}, delta.vertex_edge.edge_type, delta.vertex_edge.vertex_id, - delta.vertex_edge.edge, commit_info, delta.uuid, command_id); + delta.vertex_edge.edge, commit_info, delta.id, command_id); break; case Delta::Action::REMOVE_OUT_EDGE: copied_deltas.emplace_back(Delta::RemoveOutEdgeTag{}, delta.vertex_edge.edge_type, - delta.vertex_edge.vertex_id, delta.vertex_edge.edge, commit_info, delta.uuid, + delta.vertex_edge.vertex_id, delta.vertex_edge.edge, commit_info, delta.id, command_id); break; case Delta::Action::SET_PROPERTY: copied_deltas.emplace_back(Delta::SetPropertyTag{}, delta.property.key, delta.property.value, commit_info, - delta.uuid, command_id); + delta.id, command_id); break; } } diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 22e849d23..69a69e52e 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -85,8 +85,7 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine // This asserts delta chain while (expected_delta != nullptr) { EXPECT_EQ(actual_delta->action, expected_delta->action); - EXPECT_EQ(actual_delta->uuid, expected_delta->uuid); - EXPECT_NE(&actual_delta, &expected_delta) << "Deltas must be different objects!"; + EXPECT_EQ(actual_delta->id, expected_delta->id); switch (expected_delta->action) { case Delta::Action::ADD_LABEL: @@ -119,8 +118,7 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine case PreviousPtr::Type::DELTA: { ASSERT_EQ(actual_prev.type, PreviousPtr::Type::DELTA) << "Expected type is delta!"; EXPECT_EQ(actual_prev.delta->action, expected_prev.delta->action); - EXPECT_EQ(actual_prev.delta->uuid, expected_prev.delta->uuid); - EXPECT_NE(actual_prev.delta, expected_prev.delta) << "Prev deltas must be different objects!"; + EXPECT_EQ(actual_prev.delta->id, expected_prev.delta->id); break; } case v3::PreviousPtr::Type::EDGE: { @@ -151,7 +149,7 @@ void AssertEqDeltaLists(const std::list<Delta> &actual, const std::list<Delta> & auto expected_it = expected.begin(); while (actual_it != actual.end()) { EXPECT_EQ(actual_it->action, expected_it->action); - EXPECT_EQ(actual_it->uuid, expected_it->uuid); + EXPECT_EQ(actual_it->id, expected_it->id); EXPECT_NE(&*actual_it, &*expected_it) << "Deltas must be different objects!"; } } @@ -384,34 +382,33 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { EXPECT_EQ(splitted_data.label_property_indices.size(), 1); } -// TEST_F(ShardSplitTest, TestBigSplit) { -// int pk{0}; -// for (int64_t i{0}; i < 10'000; ++i) { -// auto acc = storage.Access(GetNextHlc()); -// EXPECT_FALSE( -// acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, -// PropertyValue(i)}}) -// .HasError()); -// EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); +TEST_F(ShardSplitTest, TestBigSplit) { + int pk{0}; + for (int64_t i{0}; i < 10'000; ++i) { + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE( + acc.CreateVertexAndValidate({secondary_label}, {PropertyValue(pk++)}, {{secondary_property, PropertyValue(i)}}) + .HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(pk++)}, {}).HasError()); -// EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(pk - 2)}}, -// VertexId{primary_label, PrimaryKey{PropertyValue(pk - 1)}}, edge_type_id, -// Gid::FromUint(pk)) -// .HasError()); -// acc.Commit(GetNextHlc()); -// } -// storage.CreateIndex(secondary_label, secondary_property); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(pk - 2)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(pk - 1)}}, edge_type_id, + Gid::FromUint(pk)) + .HasError()); + acc.Commit(GetNextHlc()); + } + storage.CreateIndex(secondary_label, secondary_property); -// const auto split_value = pk / 2; -// auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); + const auto split_value = pk / 2; + auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); -// EXPECT_EQ(splitted_data.vertices.size(), 100000); -// EXPECT_EQ(splitted_data.edges->size(), 50000); -// EXPECT_EQ(splitted_data.transactions.size(), 50000); -// EXPECT_EQ(splitted_data.label_indices.size(), 0); -// EXPECT_EQ(splitted_data.label_property_indices.size(), 1); + // EXPECT_EQ(splitted_data.vertices.size(), 100000); + // EXPECT_EQ(splitted_data.edges->size(), 50000); + // EXPECT_EQ(splitted_data.transactions.size(), 50000); + // EXPECT_EQ(splitted_data.label_indices.size(), 0); + // EXPECT_EQ(splitted_data.label_property_indices.size(), 1); -// AssertSplittedShard(std::move(splitted_data), split_value); -// } + AssertSplittedShard(std::move(splitted_data), split_value); +} } // namespace memgraph::storage::v3::tests From 954df64d1d7ee9b7f233c4373497bc8e19dd7f9a Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 3 Feb 2023 13:42:38 +0100 Subject: [PATCH 49/79] Add detla pruning --- src/storage/v3/splitter.cpp | 53 ++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index d54f8d1a1..3e387fe4d 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -11,6 +11,7 @@ #include "storage/v3/splitter.hpp" +#include <algorithm> #include <cstdint> #include <map> #include <memory> @@ -139,9 +140,45 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( return transactions; } +void PruneDeltas(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, const PrimaryKey &split_key) { + // Remove delta chains wh + auto cloned_transaction_it = cloned_transactions.begin(); + while (cloned_transaction_it != cloned_transactions.end()) { + auto cloned_delta_it = cloned_transaction_it->second->deltas.begin(); + + if (const auto prev = cloned_delta_it->prev.Get(); + prev.type == PreviousPtr::Type::VERTEX && prev.vertex->first < split_key) { + // We can remove this delta chain + auto *current_next_delta = cloned_delta_it->next; + cloned_transaction_it->second->deltas.remove_if( + [cloned_delta_it](const auto &delta) { return delta.id == cloned_delta_it->id; }); + + while (current_next_delta != nullptr) { + auto *next_delta = current_next_delta->next; + // Find next delta transaction delta list + auto current_transaction_it = std::ranges::find_if( + cloned_transactions, [&start_or_commit_timestamp = cloned_delta_it->commit_info->start_or_commit_timestamp]( + const auto &transaction) { + return transaction.second->start_timestamp == start_or_commit_timestamp || + transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; + }); + // Remove it + current_transaction_it->second->deltas.remove_if( + [current_next_delta](const auto &delta) { return delta.id == current_next_delta->id; }); + + current_next_delta = next_delta; + } + } + + // while(cloned_delta_it != ) + } +} + void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key) { + // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard + PruneDeltas(cloned_transactions, split_key); for (auto &[commit_start, cloned_transaction] : cloned_transactions) { AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, cloned_vertices, cloned_edges, split_key); @@ -152,10 +189,6 @@ inline bool IsDeltaHeadOfChain(const PreviousPtr::Type &delta_type) { return delta_type == PreviousPtr::Type::VERTEX || delta_type == PreviousPtr::Type::EDGE; } -bool IsDelta(const PreviousPtr::Type &delta_type) { - return delta_type == PreviousPtr::Type::VERTEX || delta_type == PreviousPtr::Type::EDGE; -} - bool DoesPrevPtrPointsToSplittedData(const PreviousPtr::Pointer &prev_ptr, const PrimaryKey &split_key) { return prev_ptr.type == PreviousPtr::Type::VERTEX && prev_ptr.vertex->first < split_key; } @@ -203,17 +236,14 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr // Find cloned delta in delta list of cloned transaction auto found_cloned_delta_it = std::ranges::find_if(cloned_transaction_it->second->deltas, - [delta = ptr.delta](const auto &elem) { return elem.uuid == delta->uuid; }); + [delta = ptr.delta](const auto &elem) { return elem.id == delta->id; }); MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), - "Delta with given uuid must exist!"); + "Delta with given id must exist!"); cloned_delta->prev.Set(&*found_cloned_delta_it); break; } case PreviousPtr::Type::VERTEX: { - // What if the vertex is already moved to garbage collection... - // TODO(jbajic) Maybe revisit when we apply Garbage collection with - // new transaction management system auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); cloned_delta->prev.Set(cloned_vertex); break; @@ -247,9 +277,8 @@ void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); // Find cloned delta in delta list of cloned transaction - auto found_cloned_delta_it = - std::ranges::find_if(cloned_transaction_it->second->deltas, - [&original](const auto &elem) { return elem.uuid == original.next->uuid; }); + auto found_cloned_delta_it = std::ranges::find_if( + cloned_transaction_it->second->deltas, [&original](const auto &elem) { return elem.id == original.next->id; }); MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), "Delta with given uuid must exist!"); cloned.next = &*found_cloned_delta_it; } From de15c9719cb15a02b7d164f8ff7e8c4a2f6806b6 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 6 Feb 2023 17:38:55 +0100 Subject: [PATCH 50/79] Fix delta chain prev pointer --- src/storage/v3/delta.hpp | 2 + src/storage/v3/indices.hpp | 3 - src/storage/v3/splitter.cpp | 155 +++++++++++++++----------- src/storage/v3/splitter.hpp | 4 + tests/unit/storage_v3_shard_split.cpp | 21 ++-- 5 files changed, 104 insertions(+), 81 deletions(-) diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index b8bea789d..8c1b85ab5 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -241,6 +241,8 @@ struct Delta { } } + friend bool operator==(const Delta &lhs, const Delta &rhs) noexcept { return lhs.id == rhs.id; } + Action action; uint64_t id; // TODO: optimize with in-place copy diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 99023183b..6023ea9e3 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -141,8 +141,6 @@ class LabelIndex { [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = cloned_indices_container.insert(index.extract(entry_it)); MG_ASSERT(inserted, "Failed to extract index entry!"); - - // vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); } entry_it = next_entry_it; } @@ -286,7 +284,6 @@ class LabelPropertyIndex { [[maybe_unused]] const auto &[inserted_entry_it, inserted, node] = cloned_index_container.insert(index.extract(entry_it)); MG_ASSERT(inserted, "Failed to extract index entry!"); - // vertex_entry_map[index_type_val].insert({inserted_entry_it->vertex, inserted_entry_it}); } entry_it = next_entry_it; } diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 3e387fe4d..40c55c154 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -140,37 +140,50 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( return transactions; } -void PruneDeltas(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, const PrimaryKey &split_key) { - // Remove delta chains wh - auto cloned_transaction_it = cloned_transactions.begin(); - while (cloned_transaction_it != cloned_transactions.end()) { - auto cloned_delta_it = cloned_transaction_it->second->deltas.begin(); +void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + const PrimaryKey &split_key) { + // Remove delta chains that don't point to objects on splitted shard + auto cloned_delta_it = cloned_transaction.deltas.begin(); - if (const auto prev = cloned_delta_it->prev.Get(); - prev.type == PreviousPtr::Type::VERTEX && prev.vertex->first < split_key) { - // We can remove this delta chain - auto *current_next_delta = cloned_delta_it->next; - cloned_transaction_it->second->deltas.remove_if( - [cloned_delta_it](const auto &delta) { return delta.id == cloned_delta_it->id; }); + while (cloned_delta_it != cloned_transaction.deltas.end()) { + const auto prev = cloned_delta_it->prev.Get(); + switch (prev.type) { + case PreviousPtr::Type::DELTA: + case PreviousPtr::Type::NULLPTR: + ++cloned_delta_it; + break; + case PreviousPtr::Type::VERTEX: { + if (prev.vertex->first < split_key) { + // We can remove this delta chain + auto *current_next_delta = cloned_delta_it->next; + cloned_delta_it = cloned_transaction.deltas.erase(cloned_delta_it); - while (current_next_delta != nullptr) { - auto *next_delta = current_next_delta->next; - // Find next delta transaction delta list - auto current_transaction_it = std::ranges::find_if( - cloned_transactions, [&start_or_commit_timestamp = cloned_delta_it->commit_info->start_or_commit_timestamp]( - const auto &transaction) { - return transaction.second->start_timestamp == start_or_commit_timestamp || - transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; - }); - // Remove it - current_transaction_it->second->deltas.remove_if( - [current_next_delta](const auto &delta) { return delta.id == current_next_delta->id; }); + while (current_next_delta != nullptr) { + auto *next_delta = current_next_delta->next; + // Find next delta transaction delta list + auto current_transaction_it = std::ranges::find_if( + cloned_transactions, + [&start_or_commit_timestamp = + current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { + return transaction.second->start_timestamp == start_or_commit_timestamp || + transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; + }); + MG_ASSERT(current_transaction_it != cloned_transactions.end(), "Error when pruning deltas!"); + // Remove it + current_transaction_it->second->deltas.remove_if( + [¤t_next_delta = *current_next_delta](const auto &delta) { return delta == current_next_delta; }); - current_next_delta = next_delta; + current_next_delta = next_delta; + } + } else { + ++cloned_delta_it; + } + break; } + case PreviousPtr::Type::EDGE: + ++cloned_delta_it; + break; } - - // while(cloned_delta_it != ) } } @@ -178,11 +191,13 @@ void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Trans VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key) { // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard - PruneDeltas(cloned_transactions, split_key); for (auto &[commit_start, cloned_transaction] : cloned_transactions) { AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, cloned_vertices, cloned_edges, split_key); } + for (auto &[commit_start, cloned_transaction] : cloned_transactions) { + PruneDeltas(*cloned_transaction, cloned_transactions, split_key); + } } inline bool IsDeltaHeadOfChain(const PreviousPtr::Type &delta_type) { @@ -196,16 +211,14 @@ bool DoesPrevPtrPointsToSplittedData(const PreviousPtr::Pointer &prev_ptr, const void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key) { - // Align next and prev in deltas + const PrimaryKey & /*split_key*/) { auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); while (delta_it != transaction.deltas.end()) { // We can safely ignore deltas which are not head of delta chain // Dont' adjust delta chain that points to irrelevant data vertices/edges - if (const auto delta_prev = delta_it->prev.Get(); - !IsDeltaHeadOfChain(delta_prev.type) && !DoesPrevPtrPointsToSplittedData(delta_prev, split_key)) { + if (const auto delta_prev = delta_it->prev.Get(); !IsDeltaHeadOfChain(delta_prev.type)) { ++delta_it; ++cloned_delta_it; continue; @@ -215,50 +228,16 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr auto *cloned_delta = &*cloned_delta_it; while (delta->next != nullptr) { // Align next ptr - // Get cloned_delta->next transaction, using delta->next original transaction AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); // Align prev ptr - auto ptr = delta->prev.Get(); - switch (ptr.type) { - case PreviousPtr::Type::NULLPTR: { - // noop - break; - } - case PreviousPtr::Type::DELTA: { - // Same as for deltas except don't align next but prev - auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&ptr](const auto &elem) { - return elem.second->start_timestamp == ptr.delta->commit_info->start_or_commit_timestamp || - elem.second->commit_info->start_or_commit_timestamp == - ptr.delta->commit_info->start_or_commit_timestamp; - }); - MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); - // Find cloned delta in delta list of cloned transaction - auto found_cloned_delta_it = - std::ranges::find_if(cloned_transaction_it->second->deltas, - [delta = ptr.delta](const auto &elem) { return elem.id == delta->id; }); - MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), - "Delta with given id must exist!"); - - cloned_delta->prev.Set(&*found_cloned_delta_it); - break; - } - case PreviousPtr::Type::VERTEX: { - auto *cloned_vertex = &*cloned_vertices.find(ptr.vertex->first); - cloned_delta->prev.Set(cloned_vertex); - break; - } - case PreviousPtr::Type::EDGE: { - // We can never be here if we have properties on edge disabled - auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); - cloned_delta->prev.Set(&cloned_edge->second); - break; - } - }; + AdjustDeltaPrevPtr(*delta, *cloned_delta, cloned_transactions, cloned_vertices, cloned_edges); cloned_delta = cloned_delta->next; delta = delta->next; } + // Align prev ptr + AdjustDeltaPrevPtr(*delta, *cloned_delta, cloned_transactions, cloned_vertices, cloned_edges); ++delta_it; ++cloned_delta_it; @@ -283,4 +262,44 @@ void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, cloned.next = &*found_cloned_delta_it; } +void Splitter::AdjustDeltaPrevPtr(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer & /*cloned_vertices*/, EdgeContainer &cloned_edges) { + auto ptr = original.prev.Get(); + switch (ptr.type) { + case PreviousPtr::Type::NULLPTR: { + // noop + break; + } + case PreviousPtr::Type::DELTA: { + // Same as for deltas except don't align next but prev + auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&ptr](const auto &elem) { + return elem.second->start_timestamp == ptr.delta->commit_info->start_or_commit_timestamp || + elem.second->commit_info->start_or_commit_timestamp == ptr.delta->commit_info->start_or_commit_timestamp; + }); + MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + // Find cloned delta in delta list of cloned transaction + auto found_cloned_delta_it = + std::ranges::find_if(cloned_transaction_it->second->deltas, + [delta = ptr.delta](const auto &elem) { return elem.id == delta->id; }); + MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), + "Delta with given id must exist!"); + + cloned.prev.Set(&*found_cloned_delta_it); + break; + } + case PreviousPtr::Type::VERTEX: { + // The vertex was extracted and it is safe to reuse address + cloned.prev.Set(ptr.vertex); + break; + } + case PreviousPtr::Type::EDGE: { + // We can never be here if we have properties on edge disabled + auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); + cloned.prev.Set(&cloned_edge->second); + break; + } + }; +} + } // namespace memgraph::storage::v3 diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 2053303e3..193b9d98b 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -88,6 +88,10 @@ class Splitter final { static void AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); + static void AdjustDeltaPrevPtr(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + const LabelId primary_label_; VertexContainer &vertices_; EdgeContainer &edges_; diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 69a69e52e..90bd599d6 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -144,13 +144,14 @@ void AssertEqVertexContainer(const VertexContainer &actual, const VertexContaine } void AssertEqDeltaLists(const std::list<Delta> &actual, const std::list<Delta> &expected) { - ASSERT_EQ(actual.size(), expected.size()); + EXPECT_EQ(actual.size(), expected.size()); auto actual_it = actual.begin(); auto expected_it = expected.begin(); while (actual_it != actual.end()) { - EXPECT_EQ(actual_it->action, expected_it->action); EXPECT_EQ(actual_it->id, expected_it->id); - EXPECT_NE(&*actual_it, &*expected_it) << "Deltas must be different objects!"; + EXPECT_EQ(actual_it->action, expected_it->action); + ++actual_it; + ++expected_it; } } @@ -210,10 +211,10 @@ TEST_F(ShardSplitTest, TestBasicSplitWithVertices) { std::list<Delta> expected_deltas; expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 4, 1); expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 5, 2); - expected_deltas.emplace_back(Delta::RemoveLabelTag{}, secondary_label, &commit_info, 7, 4); expected_deltas.emplace_back(Delta::SetPropertyTag{}, secondary_property, PropertyValue(), &commit_info, 6, 4); + expected_deltas.emplace_back(Delta::RemoveLabelTag{}, secondary_label, &commit_info, 7, 4); expected_deltas.emplace_back(Delta::DeleteObjectTag{}, &commit_info, 8, 3); - // AssertEqDeltaLists(splitted_data.transactions.begin()->second->deltas, expected_deltas); + AssertEqDeltaLists(splitted_data.transactions.begin()->second->deltas, expected_deltas); } TEST_F(ShardSplitTest, TestBasicSplitVerticesAndEdges) { @@ -402,11 +403,11 @@ TEST_F(ShardSplitTest, TestBigSplit) { const auto split_value = pk / 2; auto splitted_data = storage.PerformSplit({PropertyValue(split_value)}, 2); - // EXPECT_EQ(splitted_data.vertices.size(), 100000); - // EXPECT_EQ(splitted_data.edges->size(), 50000); - // EXPECT_EQ(splitted_data.transactions.size(), 50000); - // EXPECT_EQ(splitted_data.label_indices.size(), 0); - // EXPECT_EQ(splitted_data.label_property_indices.size(), 1); + EXPECT_EQ(splitted_data.vertices.size(), 10000); + EXPECT_EQ(splitted_data.edges->size(), 5000); + EXPECT_EQ(splitted_data.transactions.size(), 5000); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 1); AssertSplittedShard(std::move(splitted_data), split_value); } From 82c7c85428ca20ed503e0791a6e2a85521cd9f40 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 7 Feb 2023 11:18:25 +0100 Subject: [PATCH 51/79] Add additional tests --- tests/unit/storage_v3_shard_split.cpp | 100 ++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/tests/unit/storage_v3_shard_split.cpp b/tests/unit/storage_v3_shard_split.cpp index 90bd599d6..c91a11004 100644 --- a/tests/unit/storage_v3_shard_split.cpp +++ b/tests/unit/storage_v3_shard_split.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include <cstdint> +#include <memory> #include <gmock/gmock-matchers.h> #include <gmock/gmock.h> @@ -56,13 +57,12 @@ class ShardSplitTest : public testing::Test { return last_hlc; } - void AssertSplittedShard(SplitData &&splitted_data, const int split_value) { - auto shard = Shard::FromSplitData(std::move(splitted_data)); - auto acc = shard->Access(GetNextHlc()); - for (int i{0}; i < split_value; ++i) { + void AssertShardState(auto &shard, const int split_min, const int split_max) { + auto acc = shard.Access(GetNextHlc()); + for (int i{0}; i < split_min; ++i) { EXPECT_FALSE(acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD).has_value()); } - for (int i{split_value}; i < split_value * 2; ++i) { + for (int i{split_min}; i < split_max; ++i) { const auto vtx = acc.FindVertex(PrimaryKey{{PropertyValue(i)}}, View::OLD); ASSERT_TRUE(vtx.has_value()); EXPECT_TRUE(vtx->InEdges(View::OLD)->size() == 1 || vtx->OutEdges(View::OLD)->size() == 1); @@ -322,7 +322,7 @@ TEST_F(ShardSplitTest, TestBasicSplitWithCommitedAndOngoingTransactions) { EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) .HasError()); - EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(3)}}, VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) .HasError()); EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, @@ -383,6 +383,91 @@ TEST_F(ShardSplitTest, TestBasicSplitWithLabelPropertyIndex) { EXPECT_EQ(splitted_data.label_property_indices.size(), 1); } +TEST_F(ShardSplitTest, TestSplittingShardsWithGcDestroyOriginalShard) { + const auto split_value{4}; + PrimaryKey splitted_value{{PropertyValue(4)}}; + std::unique_ptr<Shard> splitted_shard; + + { + Shard storage2{primary_label, min_pk, std::nullopt /*max_primary_key*/, schema_property_vector}; + auto acc = storage2.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(3)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) + .HasError()); + acc.Commit(GetNextHlc()); + + auto splitted_data = storage2.PerformSplit({PropertyValue(split_value)}, 2); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); + + // Create a new shard + splitted_shard = Shard::FromSplitData(std::move(splitted_data)); + // Call gc on old shard + storage2.CollectGarbage(GetNextHlc().coordinator_wall_clock); + // Destroy original + } + + splitted_shard->CollectGarbage(GetNextHlc().coordinator_wall_clock); + AssertShardState(*splitted_shard, 4, 6); +} + +TEST_F(ShardSplitTest, TestSplittingShardsWithGcDestroySplittedShard) { + PrimaryKey splitted_value{{PropertyValue(4)}}; + + auto acc = storage.Access(GetNextHlc()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(1)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(2)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(3)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(4)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(5)}, {}).HasError()); + EXPECT_FALSE(acc.CreateVertexAndValidate({}, {PropertyValue(6)}, {}).HasError()); + + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(1)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(2)}}, edge_type_id, Gid::FromUint(0)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(3)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(5)}}, edge_type_id, Gid::FromUint(1)) + .HasError()); + EXPECT_FALSE(acc.CreateEdge(VertexId{primary_label, PrimaryKey{PropertyValue(4)}}, + VertexId{primary_label, PrimaryKey{PropertyValue(6)}}, edge_type_id, Gid::FromUint(2)) + .HasError()); + acc.Commit(GetNextHlc()); + + auto splitted_data = storage.PerformSplit({PropertyValue(4)}, 2); + EXPECT_EQ(splitted_data.vertices.size(), 3); + EXPECT_EQ(splitted_data.edges->size(), 2); + EXPECT_EQ(splitted_data.transactions.size(), 1); + EXPECT_EQ(splitted_data.label_indices.size(), 0); + EXPECT_EQ(splitted_data.label_property_indices.size(), 0); + + { + // Create a new shard + auto splitted_shard = Shard::FromSplitData(std::move(splitted_data)); + // Call gc on new shard + splitted_shard->CollectGarbage(GetNextHlc().coordinator_wall_clock); + // Destroy splitted shard + } + + storage.CollectGarbage(GetNextHlc().coordinator_wall_clock); + AssertShardState(storage, 1, 3); +} + TEST_F(ShardSplitTest, TestBigSplit) { int pk{0}; for (int64_t i{0}; i < 10'000; ++i) { @@ -409,7 +494,8 @@ TEST_F(ShardSplitTest, TestBigSplit) { EXPECT_EQ(splitted_data.label_indices.size(), 0); EXPECT_EQ(splitted_data.label_property_indices.size(), 1); - AssertSplittedShard(std::move(splitted_data), split_value); + auto shard = Shard::FromSplitData(std::move(splitted_data)); + AssertShardState(*shard, split_value, split_value * 2); } } // namespace memgraph::storage::v3::tests From dc48b4ae9b371749790448f3b493cee6b57a6935 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 7 Feb 2023 13:31:05 +0100 Subject: [PATCH 52/79] Fix edge in vertex_Edge --- src/storage/v3/splitter.cpp | 34 +++++++++++++++++++++++++++++++++- src/storage/v3/splitter.hpp | 10 ++++++---- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 40c55c154..72527c048 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -190,11 +190,12 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key) { - // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard for (auto &[commit_start, cloned_transaction] : cloned_transactions) { AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, cloned_vertices, cloned_edges, split_key); } + // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard + // Prune must be after ajdust, since next, and prev are not set and we cannot follow the chain for (auto &[commit_start, cloned_transaction] : cloned_transactions) { PruneDeltas(*cloned_transaction, cloned_transactions, split_key); } @@ -226,6 +227,7 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; + AdjustEdgeRef(*cloned_delta, cloned_edges); while (delta->next != nullptr) { // Align next ptr AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); @@ -246,6 +248,36 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr "Both iterators must be exhausted!"); } +void Splitter::AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) const { + switch (cloned_delta.action) { + case Delta::Action::ADD_IN_EDGE: + case Delta::Action::ADD_OUT_EDGE: + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::REMOVE_OUT_EDGE: { + // Find edge + if (config_.items.properties_on_edges) { + // Only case when not finding is when the edge is not on splitted shard + // TODO Do this after prune an move condition into assert + if (const auto cloned_edge_it = + std::ranges::find_if(cloned_edges, [edge_ptr = cloned_delta.vertex_edge.edge.ptr]( + const auto &elem) { return elem.second.gid == edge_ptr->gid; }); + cloned_edge_it != cloned_edges.end()) { + cloned_delta.vertex_edge.edge = EdgeRef{&cloned_edge_it->second}; + } + } + break; + } + case Delta::Action::DELETE_OBJECT: + case Delta::Action::RECREATE_OBJECT: + case Delta::Action::SET_PROPERTY: + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: { + // noop + break; + } + } +} + void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { // Get cloned_delta->next transaction, using delta->next original transaction diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 193b9d98b..4425ad74f 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -76,15 +76,17 @@ class Splitter final { static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); - static void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, - std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key); + void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, + VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, + const PrimaryKey &split_key); void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key); + void AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) const; + static void AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); From a71447e6a020fef01ee023042547f3bcacfdb6e4 Mon Sep 17 00:00:00 2001 From: Jure Bajic <jure.bajic@memgraph.com> Date: Tue, 7 Feb 2023 14:12:21 +0100 Subject: [PATCH 53/79] Update src/storage/v3/shard.cpp Co-authored-by: Tyler Neely <tyler.neely@memgraph.io> --- src/storage/v3/shard.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index f8a7e3862..da3f5b4ec 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -1105,6 +1105,8 @@ void Shard::StoreMapping(std::unordered_map<uint64_t, std::string> id_to_name) { std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { if (vertices_.size() > config_.split.max_shard_vertex_size) { auto mid_elem = vertices_.begin(); + // TODO(tyler) the first time we calculate the split point, we should store it so that we don't have to + // iterate over half of the entire index each time Cron is run until the split succeeds. std::ranges::advance(mid_elem, static_cast<VertexContainer::difference_type>(vertices_.size() / 2)); return SplitInfo{mid_elem->first, shard_version_}; } From 5ee920eaf453b6fed8c954f56dc2f3181dbf1aa2 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 7 Feb 2023 15:32:08 +0100 Subject: [PATCH 54/79] Add edges as parameter to benchmark tests --- src/storage/v3/shard.hpp | 2 +- tests/benchmark/storage_v3_split.cpp | 41 ++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 8072b1bff..69a38b7cd 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -391,7 +391,7 @@ class Shard final { // Main object storage NameIdMapper name_id_mapper_; LabelId primary_label_; - // The shard's range is [min, max> + // The shard's range is [min, max) PrimaryKey min_primary_key_; std::optional<PrimaryKey> max_primary_key_; VertexContainer vertices_; diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index dd32d6cdc..1f1d7565a 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -123,13 +123,18 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::bench std::default_random_engine e1(r()); std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); - for (int64_t i{0}; i < state.range(0); ++i) { + const auto max_transactions_needed = std::max(state.range(0), state.range(1)); + for (int64_t vertex_counter{state.range(0)}, edge_counter{state.range(1)}, i{0}; + vertex_counter > 0 || edge_counter > 0; --vertex_counter, --edge_counter, ++i) { auto acc = storage->Access(GetNextHlc()); - MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, - {{secondary_property, PropertyValue(i)}}) - .HasValue(), - "Failed creating with pk {}", i); - if (i > 1) { + if (vertex_counter > 0) { + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + ++i; + } + if (edge_counter > 0 && i > 1) { const auto vtx1 = uniform_dist(e1) % i; const auto vtx2 = uniform_dist(e1) % i; @@ -138,32 +143,46 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::bench .HasValue(), "Failed on {} and {}", vtx1, vtx2); } + acc.Commit(GetNextHlc()); - if (i == state.range(0) - state.range(1)) { + if (i >= max_transactions_needed - state.range(2)) { storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); } } for (auto _ : state) { + // Don't create shard since shard deallocation can take some time as well auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); } } +// Range: +// Number of vertices +// This run is pessimistic, number of vertices corresponds with number if transactions BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) ->RangeMultiplier(10) ->Range(100'000, 1'000'000) ->Unit(::benchmark::kMillisecond); +// Range: +// Number of vertices +// This run is optimistic, in this run there are no transactions BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) ->RangeMultiplier(10) ->Range(100'000, 1'000'000) ->Unit(::benchmark::kMillisecond); +// Args: +// Number of vertices +// Number of edges +// Number of transaction BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) - ->Args({100'000, 1'000}) - ->Args({100'000, 10'000}) - ->Args({1'000'000, 1'000}) - ->Args({1'000'000, 10'000}) + ->Args({100'000, 100'000, 1'000}) + ->Args({100'000, 100'000, 10'000}) + ->Args({1'000'000, 100'000, 1'000}) + ->Args({1'000'000, 100'000, 10'000}) + ->Args({100'000, 1'000'000, 1'000}) + ->Args({1'000'000, 1'00'000, 10'000}) ->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark From e6c80e7dc9e92cba802d0493e9c4555433c3f881 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 7 Feb 2023 22:56:10 +0100 Subject: [PATCH 55/79] Fix when delta chain breaks --- src/storage/v3/splitter.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 72527c048..ee9b63b21 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -227,19 +227,32 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; - AdjustEdgeRef(*cloned_delta, cloned_edges); + Delta *cloned_delta_prev_ptr = cloned_delta; while (delta->next != nullptr) { + AdjustEdgeRef(*cloned_delta, cloned_edges); + // Align next ptr AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); // Align prev ptr - AdjustDeltaPrevPtr(*delta, *cloned_delta, cloned_transactions, cloned_vertices, cloned_edges); + if (cloned_delta_prev_ptr != nullptr) { + AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_vertices, cloned_edges); + } - cloned_delta = cloned_delta->next; + // TODO Next delta might not belong to the cloned transaction and thats + // why we skip this delta of the delta chain + if (cloned_delta->next != nullptr) { + cloned_delta = cloned_delta->next; + cloned_delta_prev_ptr = cloned_delta; + } else { + cloned_delta_prev_ptr = nullptr; + } delta = delta->next; } // Align prev ptr - AdjustDeltaPrevPtr(*delta, *cloned_delta, cloned_transactions, cloned_vertices, cloned_edges); + if (cloned_delta_prev_ptr != nullptr) { + AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_vertices, cloned_edges); + } ++delta_it; ++cloned_delta_it; @@ -285,8 +298,9 @@ void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, return elem.second->start_timestamp == original.next->commit_info->start_or_commit_timestamp || elem.second->commit_info->start_or_commit_timestamp == original.next->commit_info->start_or_commit_timestamp; }); - - MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + // TODO(jbajic) What if next in delta chain does not belong to cloned transaction? + // MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); + if (cloned_transaction_it == cloned_transactions.end()) return; // Find cloned delta in delta list of cloned transaction auto found_cloned_delta_it = std::ranges::find_if( cloned_transaction_it->second->deltas, [&original](const auto &elem) { return elem.id == original.next->id; }); From 177de2fa2eb4d107963207b6ad1d96c68f657a12 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 8 Feb 2023 15:24:23 +0100 Subject: [PATCH 56/79] Fix benchmark test --- src/storage/v3/shard.cpp | 7 +++--- tests/benchmark/storage_v3_split.cpp | 34 ++++++++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index da3f5b4ec..2d7841e95 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -386,12 +386,12 @@ Shard::~Shard() {} std::unique_ptr<Shard> Shard::FromSplitData(SplitData &&split_data) { if (split_data.config.items.properties_on_edges) [[likely]] { - return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, + return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.max_primary_key, split_data.schema, std::move(split_data.vertices), std::move(*split_data.edges), std::move(split_data.transactions), split_data.config, split_data.id_to_name, split_data.shard_version); } - return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.min_primary_key, + return std::make_unique<Shard>(split_data.primary_label, split_data.min_primary_key, split_data.max_primary_key, split_data.schema, std::move(split_data.vertices), std::move(split_data.transactions), split_data.config, split_data.id_to_name, split_data.shard_version); } @@ -1115,8 +1115,9 @@ std::optional<SplitInfo> Shard::ShouldSplit() const noexcept { SplitData Shard::PerformSplit(const PrimaryKey &split_key, const uint64_t shard_version) { shard_version_ = shard_version; + const auto old_max_key = max_primary_key_; max_primary_key_ = split_key; - return shard_splitter_.SplitShard(split_key, max_primary_key_, shard_version); + return shard_splitter_.SplitShard(split_key, old_max_key, shard_version); } bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 1f1d7565a..4e46ee943 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -64,11 +64,12 @@ class ShardSplitBenchmark : public ::benchmark::Fixture { }; BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; std::random_device r; std::default_random_engine e1(r()); - std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); - for (int64_t i{0}; i < state.range(0); ++i) { + for (int64_t i{0}; i < number_of_vertices; ++i) { auto acc = storage->Access(GetNextHlc()); MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, {{secondary_property, PropertyValue(i)}}) @@ -86,16 +87,17 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) acc.Commit(GetNextHlc()); } for (auto _ : state) { - auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); } } BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; std::random_device r; std::default_random_engine e1(r()); - std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); - for (int64_t i{0}; i < state.range(0); ++i) { + for (int64_t i{0}; i < number_of_vertices; ++i) { auto acc = storage->Access(GetNextHlc()); MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, {{secondary_property, PropertyValue(i)}}) @@ -114,17 +116,20 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State & } storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); for (auto _ : state) { - auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); } } BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::benchmark::State &state) { + const auto number_of_vertices = state.range(0); + const auto number_of_edges = state.range(1); + const auto number_of_transactions = state.range(2); std::random_device r; std::default_random_engine e1(r()); - std::uniform_int_distribution<int> uniform_dist(0, state.range(0)); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); - const auto max_transactions_needed = std::max(state.range(0), state.range(1)); - for (int64_t vertex_counter{state.range(0)}, edge_counter{state.range(1)}, i{0}; + const auto max_transactions_needed = std::max(number_of_vertices, number_of_edges); + for (int64_t vertex_counter{number_of_vertices}, edge_counter{number_of_edges}, i{0}; vertex_counter > 0 || edge_counter > 0; --vertex_counter, --edge_counter, ++i) { auto acc = storage->Access(GetNextHlc()); if (vertex_counter > 0) { @@ -132,11 +137,10 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::bench {{secondary_property, PropertyValue(i)}}) .HasValue(), "Failed creating with pk {}", i); - ++i; } if (edge_counter > 0 && i > 1) { - const auto vtx1 = uniform_dist(e1) % i; - const auto vtx2 = uniform_dist(e1) % i; + const auto vtx1 = uniform_dist(e1) % std::min(i, number_of_vertices); + const auto vtx2 = uniform_dist(e1) % std::min(i, number_of_vertices); MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) @@ -145,14 +149,14 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::bench } acc.Commit(GetNextHlc()); - if (i >= max_transactions_needed - state.range(2)) { + if (i == max_transactions_needed - number_of_transactions) { storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); } } for (auto _ : state) { // Don't create shard since shard deallocation can take some time as well - auto data = storage->PerformSplit(PrimaryKey{PropertyValue{state.range(0) / 2}}, 2); + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); } } @@ -161,7 +165,7 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::bench // This run is pessimistic, number of vertices corresponds with number if transactions BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) ->RangeMultiplier(10) - ->Range(100'000, 1'000'000) + ->Range(100'000, 100'000) ->Unit(::benchmark::kMillisecond); // Range: From cb1a77db98fff46974c1e11c9ca511d2a1a62c39 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 21 Feb 2023 14:29:13 +0100 Subject: [PATCH 57/79] Add noexcept to spaceship operator in hlc --- src/coordinator/hybrid_logical_clock.hpp | 2 +- src/storage/v3/delta.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coordinator/hybrid_logical_clock.hpp b/src/coordinator/hybrid_logical_clock.hpp index 2fe4880b7..e8ea30e6b 100644 --- a/src/coordinator/hybrid_logical_clock.hpp +++ b/src/coordinator/hybrid_logical_clock.hpp @@ -27,7 +27,7 @@ struct Hlc final { uint64_t logical_id{0}; Time coordinator_wall_clock = Time::min(); - auto operator<=>(const Hlc &other) const { return logical_id <=> other.logical_id; } + auto operator<=>(const Hlc &other) const noexcept { return logical_id <=> other.logical_id; } bool operator==(const Hlc &other) const noexcept = default; bool operator<(const Hlc &other) const noexcept = default; diff --git a/src/storage/v3/delta.hpp b/src/storage/v3/delta.hpp index 8c1b85ab5..555a6a151 100644 --- a/src/storage/v3/delta.hpp +++ b/src/storage/v3/delta.hpp @@ -29,7 +29,7 @@ struct Edge; struct Delta; struct CommitInfo; -inline uint64_t GetNextDeltaId() noexcept { +inline uint64_t GetNextDeltaId() { static utils::Synchronized<uint64_t, utils::SpinLock> delta_id{0}; return delta_id.WithLock([](auto &id) { return id++; }); } From bea2ae7cbcb0c620845535fe66a1cbb8a7253ee4 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 21 Feb 2023 14:29:56 +0100 Subject: [PATCH 58/79] Remove redundan check in indices --- src/storage/v3/indices.hpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/storage/v3/indices.hpp b/src/storage/v3/indices.hpp index 6023ea9e3..882f2e894 100644 --- a/src/storage/v3/indices.hpp +++ b/src/storage/v3/indices.hpp @@ -123,18 +123,12 @@ class LabelIndex { void Clear() { index_.clear(); } std::map<IndexType, IndexContainer> SplitIndexEntries(const PrimaryKey &split_key) { - if (index_.empty()) { - return {}; - } - - // Cloned index entries will contain new index entry iterators, but old - // vertices address which need to be adjusted after extracting vertices std::map<IndexType, IndexContainer> cloned_indices; for (auto &[index_type_val, index] : index_) { auto entry_it = index.begin(); auto &cloned_indices_container = cloned_indices[index_type_val]; while (entry_it != index.end()) { - // We need to save the next pointer since the current one will be + // We need to save the next iterator since the current one will be // invalidated after extract auto next_entry_it = std::next(entry_it); if (entry_it->vertex->first > split_key) { @@ -266,18 +260,12 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } std::map<IndexType, IndexContainer> SplitIndexEntries(const PrimaryKey &split_key) { - if (index_.empty()) { - return {}; - } - - // Cloned index entries will contain new index entry iterators, but old - // vertices address which need to be adjusted after extracting vertices std::map<IndexType, IndexContainer> cloned_indices; for (auto &[index_type_val, index] : index_) { auto entry_it = index.begin(); auto &cloned_index_container = cloned_indices[index_type_val]; while (entry_it != index.end()) { - // We need to save the next pointer since the current one will be + // We need to save the next iterator since the current one will be // invalidated after extract auto next_entry_it = std::next(entry_it); if (entry_it->vertex->first > split_key) { From 8955850bbcec423b9bc6eabee01377031b5f7e86 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 21 Feb 2023 14:42:42 +0100 Subject: [PATCH 59/79] Remove splitter as member variable --- src/storage/v3/shard.cpp | 18 ++++++++---------- src/storage/v3/shard.hpp | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/storage/v3/shard.cpp b/src/storage/v3/shard.cpp index 2d7841e95..afd13fa88 100644 --- a/src/storage/v3/shard.cpp +++ b/src/storage/v3/shard.cpp @@ -332,9 +332,7 @@ Shard::Shard(const LabelId primary_label, const PrimaryKey min_primary_key, vertex_validator_{schema_validator_, primary_label}, indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, - config_{config}, - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, - name_id_mapper_) { + config_{config} { CreateSchema(primary_label_, schema); StoreMapping(std::move(id_to_name)); } @@ -354,9 +352,7 @@ Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<Pr indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, config_{config}, - start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, - name_id_mapper_) { + start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)) { CreateSchema(primary_label_, schema); StoreMapping(id_to_name); } @@ -375,9 +371,7 @@ Shard::Shard(LabelId primary_label, PrimaryKey min_primary_key, std::optional<Pr indices_{config.items, vertex_validator_}, isolation_level_{config.transaction.isolation_level}, config_{config}, - start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)), - shard_splitter_(primary_label, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, schema, - name_id_mapper_) { + start_logical_id_to_transaction_(std::move(start_logical_id_to_transaction)) { CreateSchema(primary_label_, schema); StoreMapping(id_to_name); } @@ -1117,7 +1111,11 @@ SplitData Shard::PerformSplit(const PrimaryKey &split_key, const uint64_t shard_ shard_version_ = shard_version; const auto old_max_key = max_primary_key_; max_primary_key_ = split_key; - return shard_splitter_.SplitShard(split_key, old_max_key, shard_version); + const auto *schema = GetSchema(primary_label_); + MG_ASSERT(schema, "Shard must know about schema of primary label!"); + Splitter shard_splitter(primary_label_, vertices_, edges_, start_logical_id_to_transaction_, indices_, config_, + schema->second, name_id_mapper_); + return shard_splitter.SplitShard(split_key, old_max_key, shard_version); } bool Shard::IsVertexBelongToShard(const VertexId &vertex_id) const { diff --git a/src/storage/v3/shard.hpp b/src/storage/v3/shard.hpp index 69a38b7cd..9496a6084 100644 --- a/src/storage/v3/shard.hpp +++ b/src/storage/v3/shard.hpp @@ -423,7 +423,6 @@ class Shard final { // Holds all of the (in progress, committed and aborted) transactions that are read or write to this shard, but // haven't been cleaned up yet std::map<uint64_t, std::unique_ptr<Transaction>> start_logical_id_to_transaction_{}; - Splitter shard_splitter_; bool has_any_transaction_aborted_since_last_gc{false}; }; From e00a2907b6e258fb54456e68d90bfb83bbe12de5 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Tue, 21 Feb 2023 16:39:06 +0100 Subject: [PATCH 60/79] Fix commit info of deltas --- src/storage/v3/transaction.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index 8cb4225ee..aa3fa3d1a 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -31,8 +31,8 @@ struct CommitInfo { }; struct Transaction { - Transaction(coordinator::Hlc start_timestamp, CommitInfo new_commit_info, std::list<Delta> deltas, - uint64_t command_id, bool must_abort, bool is_aborted, IsolationLevel isolation_level) + Transaction(coordinator::Hlc start_timestamp, CommitInfo new_commit_info, uint64_t command_id, bool must_abort, + bool is_aborted, IsolationLevel isolation_level) : start_timestamp{start_timestamp}, commit_info{std::make_unique<CommitInfo>(new_commit_info)}, command_id(command_id), @@ -108,8 +108,10 @@ struct Transaction { // This does not solve the whole problem of copying deltas std::unique_ptr<Transaction> Clone() const { - return std::make_unique<Transaction>(start_timestamp, *commit_info, CopyDeltas(commit_info.get()), command_id, - must_abort, is_aborted, isolation_level); + auto transaction_ptr = std::make_unique<Transaction>(start_timestamp, *commit_info, command_id, must_abort, + is_aborted, isolation_level); + transaction_ptr->deltas = CopyDeltas(transaction_ptr->commit_info.get()); + return transaction_ptr; } coordinator::Hlc start_timestamp; From a919048fae5322504b5ba1cf9a58253862c1b6a2 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 22 Feb 2023 10:55:24 +0100 Subject: [PATCH 61/79] Add missing const --- src/storage/v3/splitter.cpp | 4 ++-- src/storage/v3/splitter.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index ee9b63b21..3ecfe178b 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -62,7 +62,7 @@ SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional< return data; } -void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, Delta *delta) { +void Splitter::ScanDeltas(std::set<uint64_t> &collected_transactions_, const Delta *delta) { while (delta != nullptr) { collected_transactions_.insert(delta->commit_info->start_or_commit_timestamp.logical_id); delta = delta->next; @@ -195,7 +195,7 @@ void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Trans cloned_vertices, cloned_edges, split_key); } // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard - // Prune must be after ajdust, since next, and prev are not set and we cannot follow the chain + // Prune must be after adjust, since next, and prev are not set and we cannot follow the chain for (auto &[commit_start, cloned_transaction] : cloned_transactions) { PruneDeltas(*cloned_transaction, cloned_transactions, split_key); } diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 4425ad74f..71c8aa901 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -74,7 +74,7 @@ class Splitter final { const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key); - static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, Delta *delta); + static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, const Delta *delta); void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, From 8f1406fb5fec1bdf86d61f68a3f364da19de3ded Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 22 Feb 2023 11:03:15 +0100 Subject: [PATCH 62/79] Fix transaction ctor --- src/storage/v3/transaction.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/storage/v3/transaction.hpp b/src/storage/v3/transaction.hpp index aa3fa3d1a..0428e0c30 100644 --- a/src/storage/v3/transaction.hpp +++ b/src/storage/v3/transaction.hpp @@ -36,7 +36,6 @@ struct Transaction { : start_timestamp{start_timestamp}, commit_info{std::make_unique<CommitInfo>(new_commit_info)}, command_id(command_id), - deltas(std::move(deltas)), must_abort(must_abort), is_aborted(is_aborted), isolation_level(isolation_level){}; From 0fa0f2cc3e866708bcc171333f84e87f887aa03d Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 22 Feb 2023 15:50:18 +0100 Subject: [PATCH 63/79] Fix pruning delta on edge --- src/storage/v3/splitter.cpp | 60 +++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 3ecfe178b..b3f1ae5f3 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -141,10 +141,33 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( } void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - const PrimaryKey &split_key) { + const PrimaryKey &split_key, EdgeContainer &cloned_edges) { // Remove delta chains that don't point to objects on splitted shard auto cloned_delta_it = cloned_transaction.deltas.begin(); + const auto remove_from_delta_chain = [&cloned_transaction, &cloned_transactions](auto &cloned_delta_it) { + auto *current_next_delta = cloned_delta_it->next; + cloned_delta_it = cloned_transaction.deltas.erase(cloned_delta_it); + + while (current_next_delta != nullptr) { + auto *next_delta = current_next_delta->next; + // Find next delta transaction delta list + auto current_transaction_it = std::ranges::find_if( + cloned_transactions, + [&start_or_commit_timestamp = + current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { + return transaction.second->start_timestamp == start_or_commit_timestamp || + transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; + }); + MG_ASSERT(current_transaction_it != cloned_transactions.end(), "Error when pruning deltas!"); + // Remove it + current_transaction_it->second->deltas.remove_if( + [¤t_next_delta = *current_next_delta](const auto &delta) { return delta == current_next_delta; }); + + current_next_delta = next_delta; + } + }; + while (cloned_delta_it != cloned_transaction.deltas.end()) { const auto prev = cloned_delta_it->prev.Get(); switch (prev.type) { @@ -155,34 +178,21 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique case PreviousPtr::Type::VERTEX: { if (prev.vertex->first < split_key) { // We can remove this delta chain - auto *current_next_delta = cloned_delta_it->next; - cloned_delta_it = cloned_transaction.deltas.erase(cloned_delta_it); - - while (current_next_delta != nullptr) { - auto *next_delta = current_next_delta->next; - // Find next delta transaction delta list - auto current_transaction_it = std::ranges::find_if( - cloned_transactions, - [&start_or_commit_timestamp = - current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { - return transaction.second->start_timestamp == start_or_commit_timestamp || - transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; - }); - MG_ASSERT(current_transaction_it != cloned_transactions.end(), "Error when pruning deltas!"); - // Remove it - current_transaction_it->second->deltas.remove_if( - [¤t_next_delta = *current_next_delta](const auto &delta) { return delta == current_next_delta; }); - - current_next_delta = next_delta; - } + remove_from_delta_chain(cloned_delta_it); } else { ++cloned_delta_it; } break; } - case PreviousPtr::Type::EDGE: - ++cloned_delta_it; - break; + case PreviousPtr::Type::EDGE: { + if (const auto edge_gid = prev.edge->gid; !cloned_edges.contains(edge_gid)) { + // We can remove this delta chain + remove_from_delta_chain(cloned_delta_it); + } else { + ++cloned_delta_it; + break; + } + } } } } @@ -197,7 +207,7 @@ void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Trans // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard // Prune must be after adjust, since next, and prev are not set and we cannot follow the chain for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - PruneDeltas(*cloned_transaction, cloned_transactions, split_key); + PruneDeltas(*cloned_transaction, cloned_transactions, split_key, cloned_edges); } } From bc46a9f54bccf9db837cdc77cbe0415ef381fa25 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Wed, 22 Feb 2023 19:18:23 +0100 Subject: [PATCH 64/79] Optimize search for deltas --- src/storage/v3/splitter.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index b3f1ae5f3..c0f6fac09 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -281,9 +281,7 @@ void Splitter::AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) c if (config_.items.properties_on_edges) { // Only case when not finding is when the edge is not on splitted shard // TODO Do this after prune an move condition into assert - if (const auto cloned_edge_it = - std::ranges::find_if(cloned_edges, [edge_ptr = cloned_delta.vertex_edge.edge.ptr]( - const auto &elem) { return elem.second.gid == edge_ptr->gid; }); + if (const auto cloned_edge_it = cloned_edges.find(cloned_delta.vertex_edge.edge.ptr->gid); cloned_edge_it != cloned_edges.end()) { cloned_delta.vertex_edge.edge = EdgeRef{&cloned_edge_it->second}; } @@ -304,10 +302,15 @@ void Splitter::AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) c void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { // Get cloned_delta->next transaction, using delta->next original transaction - auto cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&original](const auto &elem) { - return elem.second->start_timestamp == original.next->commit_info->start_or_commit_timestamp || - elem.second->commit_info->start_or_commit_timestamp == original.next->commit_info->start_or_commit_timestamp; - }); + // cloned_transactions key is start_timestamp + auto cloned_transaction_it = + cloned_transactions.find(original.next->commit_info->start_or_commit_timestamp.logical_id); + if (cloned_transaction_it == cloned_transactions.end()) { + cloned_transaction_it = std::ranges::find_if(cloned_transactions, [&original](const auto &elem) { + return elem.second->commit_info->start_or_commit_timestamp == + original.next->commit_info->start_or_commit_timestamp; + }); + } // TODO(jbajic) What if next in delta chain does not belong to cloned transaction? // MG_ASSERT(cloned_transaction_it != cloned_transactions.end(), "Cloned transaction not found"); if (cloned_transaction_it == cloned_transactions.end()) return; From fd18ff01967a5156bec549918ceee7fa43a00861 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Feb 2023 14:18:59 +0100 Subject: [PATCH 65/79] Improve ajdusting of prev ptr --- src/storage/v3/splitter.cpp | 24 ++++++++++++------------ src/storage/v3/splitter.hpp | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index c0f6fac09..e7de81f79 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -238,16 +238,15 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr const auto *delta = &*delta_it; auto *cloned_delta = &*cloned_delta_it; Delta *cloned_delta_prev_ptr = cloned_delta; + // The head of delta chains contain either vertex/edge as prev ptr so we adjust + // it just at the beginning of delta chain + AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_edges); + while (delta->next != nullptr) { AdjustEdgeRef(*cloned_delta, cloned_edges); - // Align next ptr - AdjustDeltaNext(*delta, *cloned_delta, cloned_transactions); - - // Align prev ptr - if (cloned_delta_prev_ptr != nullptr) { - AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_vertices, cloned_edges); - } + // Align next ptr and prev ptr + AdjustDeltaNextAndPrev(*delta, *cloned_delta, cloned_transactions); // TODO Next delta might not belong to the cloned transaction and thats // why we skip this delta of the delta chain @@ -261,7 +260,7 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr } // Align prev ptr if (cloned_delta_prev_ptr != nullptr) { - AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_vertices, cloned_edges); + AdjustDeltaPrevPtr(*delta, *cloned_delta_prev_ptr, cloned_transactions, cloned_edges); } ++delta_it; @@ -299,8 +298,8 @@ void Splitter::AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) c } } -void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, - std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { +void Splitter::AdjustDeltaNextAndPrev(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions) { // Get cloned_delta->next transaction, using delta->next original transaction // cloned_transactions key is start_timestamp auto cloned_transaction_it = @@ -319,15 +318,16 @@ void Splitter::AdjustDeltaNext(const Delta &original, Delta &cloned, cloned_transaction_it->second->deltas, [&original](const auto &elem) { return elem.id == original.next->id; }); MG_ASSERT(found_cloned_delta_it != cloned_transaction_it->second->deltas.end(), "Delta with given uuid must exist!"); cloned.next = &*found_cloned_delta_it; + found_cloned_delta_it->prev.Set(&cloned); } void Splitter::AdjustDeltaPrevPtr(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer & /*cloned_vertices*/, EdgeContainer &cloned_edges) { + EdgeContainer &cloned_edges) { auto ptr = original.prev.Get(); switch (ptr.type) { case PreviousPtr::Type::NULLPTR: { - // noop + MG_ASSERT(false, "PreviousPtr cannot be a nullptr!"); break; } case PreviousPtr::Type::DELTA: { diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 71c8aa901..be27291e1 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -87,12 +87,12 @@ class Splitter final { void AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) const; - static void AdjustDeltaNext(const Delta &original, Delta &cloned, - std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); + static void AdjustDeltaNextAndPrev(const Delta &original, Delta &cloned, + std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions); static void AdjustDeltaPrevPtr(const Delta &original, Delta &cloned, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges); + EdgeContainer &cloned_edges); const LabelId primary_label_; VertexContainer &vertices_; From eb75215ef5613a1b66bf99b7b3a8efe1be1f4014 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Feb 2023 14:33:54 +0100 Subject: [PATCH 66/79] Add note --- src/storage/v3/splitter.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index be27291e1..5e5867127 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -48,6 +48,7 @@ struct SplitData { std::map<std::pair<LabelId, PropertyId>, LabelPropertyIndex::IndexContainer> label_property_indices; }; +// TODO(jbajic) Handle deleted_vertices_ and deleted_edges_ after the finishing GC class Splitter final { public: Splitter(LabelId primary_label, VertexContainer &vertices, EdgeContainer &edges, From 39cfcca2e5e1dfc158b7d6ccee2355f4afe496d7 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 11:16:46 +0100 Subject: [PATCH 67/79] Fix ASAN bug --- src/storage/v3/splitter.cpp | 110 ++++++++++++++++++++++++++++++++---- src/storage/v3/splitter.hpp | 3 + 2 files changed, 103 insertions(+), 10 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index e7de81f79..0bd69d8c5 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -145,8 +145,10 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique // Remove delta chains that don't point to objects on splitted shard auto cloned_delta_it = cloned_transaction.deltas.begin(); - const auto remove_from_delta_chain = [&cloned_transaction, &cloned_transactions](auto &cloned_delta_it) { + // Erases the delta chain + const auto erase_delta_chain = [&cloned_transaction, &cloned_transactions](auto &cloned_delta_it) { auto *current_next_delta = cloned_delta_it->next; + // We need to keep track of cloned_delta_it in the delta list of current transaction cloned_delta_it = cloned_transaction.deltas.erase(cloned_delta_it); while (current_next_delta != nullptr) { @@ -160,9 +162,21 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; }); MG_ASSERT(current_transaction_it != cloned_transactions.end(), "Error when pruning deltas!"); - // Remove it - current_transaction_it->second->deltas.remove_if( - [¤t_next_delta = *current_next_delta](const auto &delta) { return delta == current_next_delta; }); + // Remove the delta + const auto delta_it = + std::ranges::find_if(current_transaction_it->second->deltas, + [current_next_delta](const auto &elem) { return elem.id == current_next_delta->id; }); + if (delta_it != current_transaction_it->second->deltas.end()) { + // If the next delta is next in transaction list replace current_transaction_it + // with the next one + if (current_transaction_it->second->start_timestamp == cloned_transaction.start_timestamp && + current_transaction_it == std::next(current_transaction_it)) { + // TODO Dont do this if the delta_it is not next in line in transaction list + cloned_delta_it = current_transaction_it->second->deltas.erase(delta_it); + } else { + current_transaction_it->second->deltas.erase(delta_it); + } + } current_next_delta = next_delta; } @@ -178,7 +192,7 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique case PreviousPtr::Type::VERTEX: { if (prev.vertex->first < split_key) { // We can remove this delta chain - remove_from_delta_chain(cloned_delta_it); + erase_delta_chain(cloned_delta_it); } else { ++cloned_delta_it; } @@ -187,11 +201,81 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique case PreviousPtr::Type::EDGE: { if (const auto edge_gid = prev.edge->gid; !cloned_edges.contains(edge_gid)) { // We can remove this delta chain - remove_from_delta_chain(cloned_delta_it); + erase_delta_chain(cloned_delta_it); } else { ++cloned_delta_it; - break; } + break; + } + } + } +} + +void Splitter::PruneOriginalDeltas(Transaction &transaction, + std::map<uint64_t, std::unique_ptr<Transaction>> &transactions, + const PrimaryKey &split_key) { + // Remove delta chains that don't point to objects on splitted shard + auto delta_it = transaction.deltas.begin(); + + const auto erase_delta_chain = [&transaction, &transactions](auto &cloned_delta_it) { + auto *current_next_delta = cloned_delta_it->next; + // We need to keep track of cloned_delta_it in the delta list of current transaction + cloned_delta_it = transaction.deltas.erase(cloned_delta_it); + + while (current_next_delta != nullptr) { + auto *next_delta = current_next_delta->next; + // Find next delta transaction delta list + auto current_transaction_it = std::ranges::find_if( + transactions, [&start_or_commit_timestamp = + current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { + return transaction.second->start_timestamp == start_or_commit_timestamp || + transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; + }); + MG_ASSERT(current_transaction_it != transactions.end(), "Error when pruning deltas!"); + // Remove the delta + const auto delta_it = + std::ranges::find_if(current_transaction_it->second->deltas, + [current_next_delta](const auto &elem) { return elem.id == current_next_delta->id; }); + if (delta_it != current_transaction_it->second->deltas.end()) { + // If the next delta is next in transaction list replace current_transaction_it + // with the next one + if (current_transaction_it->second->start_timestamp == transaction.start_timestamp && + current_transaction_it == std::next(current_transaction_it)) { + // TODO Dont do this if the delta_it is not next in line in transaction list + cloned_delta_it = current_transaction_it->second->deltas.erase(delta_it); + } else { + current_transaction_it->second->deltas.erase(delta_it); + } + } + + current_next_delta = next_delta; + } + }; + + while (delta_it != transaction.deltas.end()) { + const auto prev = delta_it->prev.Get(); + switch (prev.type) { + case PreviousPtr::Type::DELTA: + case PreviousPtr::Type::NULLPTR: + ++delta_it; + break; + case PreviousPtr::Type::VERTEX: { + if (prev.vertex->first >= split_key) { + // We can remove this delta chain + erase_delta_chain(delta_it); + } else { + ++delta_it; + } + break; + } + case PreviousPtr::Type::EDGE: { + if (const auto edge_gid = prev.edge->gid; !edges_.contains(edge_gid)) { + // We can remove this delta chain + erase_delta_chain(delta_it); + } else { + ++delta_it; + } + break; } } } @@ -200,15 +284,19 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, const PrimaryKey &split_key) { - for (auto &[commit_start, cloned_transaction] : cloned_transactions) { - AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[commit_start], cloned_transactions, + for (auto &[start_id, cloned_transaction] : cloned_transactions) { + AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[start_id], cloned_transactions, cloned_vertices, cloned_edges, split_key); } // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard // Prune must be after adjust, since next, and prev are not set and we cannot follow the chain - for (auto &[commit_start, cloned_transaction] : cloned_transactions) { + for (auto &[start_id, cloned_transaction] : cloned_transactions) { PruneDeltas(*cloned_transaction, cloned_transactions, split_key, cloned_edges); } + // Also we need to remove deltas from original transactions + for (auto &[start_id, original_transaction] : start_logical_id_to_transaction_) { + PruneOriginalDeltas(*original_transaction, start_logical_id_to_transaction_, split_key); + } } inline bool IsDeltaHeadOfChain(const PreviousPtr::Type &delta_type) { @@ -350,11 +438,13 @@ void Splitter::AdjustDeltaPrevPtr(const Delta &original, Delta &cloned, case PreviousPtr::Type::VERTEX: { // The vertex was extracted and it is safe to reuse address cloned.prev.Set(ptr.vertex); + ptr.vertex->second.delta = &cloned; break; } case PreviousPtr::Type::EDGE: { // We can never be here if we have properties on edge disabled auto *cloned_edge = &*cloned_edges.find(ptr.edge->gid); + ptr.edge->delta = &cloned; cloned.prev.Set(&cloned_edge->second); break; } diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 5e5867127..72869e9cb 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -77,6 +77,9 @@ class Splitter final { static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, const Delta *delta); + void PruneOriginalDeltas(Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &transactions, + const PrimaryKey &split_key); + void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, From f0e3d5e9ab0305fd394561b9eedb63983f941b2b Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 11:49:55 +0100 Subject: [PATCH 68/79] Move out EraseDeltaChain --- src/storage/v3/splitter.cpp | 118 ++++++++++++------------------------ 1 file changed, 39 insertions(+), 79 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 0bd69d8c5..1c48907d0 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -140,48 +140,45 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( return transactions; } +void EraseDeltaChain(auto &transaction, auto &transactions, auto &delta_head_it) { + auto *current_next_delta = delta_head_it->next; + // We need to keep track of delta_head_it in the delta list of current transaction + delta_head_it = transaction.deltas.erase(delta_head_it); + + while (current_next_delta != nullptr) { + auto *next_delta = current_next_delta->next; + // Find next delta transaction delta list + auto current_transaction_it = std::ranges::find_if( + transactions, [&start_or_commit_timestamp = + current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { + return transaction.second->start_timestamp == start_or_commit_timestamp || + transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; + }); + MG_ASSERT(current_transaction_it != transactions.end(), "Error when pruning deltas!"); + // Remove the delta + const auto delta_it = + std::ranges::find_if(current_transaction_it->second->deltas, + [current_next_delta](const auto &elem) { return elem.id == current_next_delta->id; }); + if (delta_it != current_transaction_it->second->deltas.end()) { + // If the next delta is next in transaction list replace current_transaction_it + // with the next one + if (current_transaction_it->second->start_timestamp == transaction.start_timestamp && + current_transaction_it == std::next(current_transaction_it)) { + delta_head_it = current_transaction_it->second->deltas.erase(delta_it); + } else { + current_transaction_it->second->deltas.erase(delta_it); + } + } + + current_next_delta = next_delta; + } +} + void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, const PrimaryKey &split_key, EdgeContainer &cloned_edges) { // Remove delta chains that don't point to objects on splitted shard auto cloned_delta_it = cloned_transaction.deltas.begin(); - // Erases the delta chain - const auto erase_delta_chain = [&cloned_transaction, &cloned_transactions](auto &cloned_delta_it) { - auto *current_next_delta = cloned_delta_it->next; - // We need to keep track of cloned_delta_it in the delta list of current transaction - cloned_delta_it = cloned_transaction.deltas.erase(cloned_delta_it); - - while (current_next_delta != nullptr) { - auto *next_delta = current_next_delta->next; - // Find next delta transaction delta list - auto current_transaction_it = std::ranges::find_if( - cloned_transactions, - [&start_or_commit_timestamp = - current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { - return transaction.second->start_timestamp == start_or_commit_timestamp || - transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; - }); - MG_ASSERT(current_transaction_it != cloned_transactions.end(), "Error when pruning deltas!"); - // Remove the delta - const auto delta_it = - std::ranges::find_if(current_transaction_it->second->deltas, - [current_next_delta](const auto &elem) { return elem.id == current_next_delta->id; }); - if (delta_it != current_transaction_it->second->deltas.end()) { - // If the next delta is next in transaction list replace current_transaction_it - // with the next one - if (current_transaction_it->second->start_timestamp == cloned_transaction.start_timestamp && - current_transaction_it == std::next(current_transaction_it)) { - // TODO Dont do this if the delta_it is not next in line in transaction list - cloned_delta_it = current_transaction_it->second->deltas.erase(delta_it); - } else { - current_transaction_it->second->deltas.erase(delta_it); - } - } - - current_next_delta = next_delta; - } - }; - while (cloned_delta_it != cloned_transaction.deltas.end()) { const auto prev = cloned_delta_it->prev.Get(); switch (prev.type) { @@ -192,7 +189,7 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique case PreviousPtr::Type::VERTEX: { if (prev.vertex->first < split_key) { // We can remove this delta chain - erase_delta_chain(cloned_delta_it); + EraseDeltaChain(cloned_transaction, cloned_transactions, cloned_delta_it); } else { ++cloned_delta_it; } @@ -201,7 +198,7 @@ void PruneDeltas(Transaction &cloned_transaction, std::map<uint64_t, std::unique case PreviousPtr::Type::EDGE: { if (const auto edge_gid = prev.edge->gid; !cloned_edges.contains(edge_gid)) { // We can remove this delta chain - erase_delta_chain(cloned_delta_it); + EraseDeltaChain(cloned_transaction, cloned_transactions, cloned_delta_it); } else { ++cloned_delta_it; } @@ -217,41 +214,6 @@ void Splitter::PruneOriginalDeltas(Transaction &transaction, // Remove delta chains that don't point to objects on splitted shard auto delta_it = transaction.deltas.begin(); - const auto erase_delta_chain = [&transaction, &transactions](auto &cloned_delta_it) { - auto *current_next_delta = cloned_delta_it->next; - // We need to keep track of cloned_delta_it in the delta list of current transaction - cloned_delta_it = transaction.deltas.erase(cloned_delta_it); - - while (current_next_delta != nullptr) { - auto *next_delta = current_next_delta->next; - // Find next delta transaction delta list - auto current_transaction_it = std::ranges::find_if( - transactions, [&start_or_commit_timestamp = - current_next_delta->commit_info->start_or_commit_timestamp](const auto &transaction) { - return transaction.second->start_timestamp == start_or_commit_timestamp || - transaction.second->commit_info->start_or_commit_timestamp == start_or_commit_timestamp; - }); - MG_ASSERT(current_transaction_it != transactions.end(), "Error when pruning deltas!"); - // Remove the delta - const auto delta_it = - std::ranges::find_if(current_transaction_it->second->deltas, - [current_next_delta](const auto &elem) { return elem.id == current_next_delta->id; }); - if (delta_it != current_transaction_it->second->deltas.end()) { - // If the next delta is next in transaction list replace current_transaction_it - // with the next one - if (current_transaction_it->second->start_timestamp == transaction.start_timestamp && - current_transaction_it == std::next(current_transaction_it)) { - // TODO Dont do this if the delta_it is not next in line in transaction list - cloned_delta_it = current_transaction_it->second->deltas.erase(delta_it); - } else { - current_transaction_it->second->deltas.erase(delta_it); - } - } - - current_next_delta = next_delta; - } - }; - while (delta_it != transaction.deltas.end()) { const auto prev = delta_it->prev.Get(); switch (prev.type) { @@ -262,7 +224,7 @@ void Splitter::PruneOriginalDeltas(Transaction &transaction, case PreviousPtr::Type::VERTEX: { if (prev.vertex->first >= split_key) { // We can remove this delta chain - erase_delta_chain(delta_it); + EraseDeltaChain(transaction, transactions, delta_it); } else { ++delta_it; } @@ -271,7 +233,7 @@ void Splitter::PruneOriginalDeltas(Transaction &transaction, case PreviousPtr::Type::EDGE: { if (const auto edge_gid = prev.edge->gid; !edges_.contains(edge_gid)) { // We can remove this delta chain - erase_delta_chain(delta_it); + EraseDeltaChain(transaction, transactions, delta_it); } else { ++delta_it; } @@ -336,7 +298,7 @@ void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Tr // Align next ptr and prev ptr AdjustDeltaNextAndPrev(*delta, *cloned_delta, cloned_transactions); - // TODO Next delta might not belong to the cloned transaction and thats + // Next delta might not belong to the cloned transaction and thats // why we skip this delta of the delta chain if (cloned_delta->next != nullptr) { cloned_delta = cloned_delta->next; @@ -366,8 +328,6 @@ void Splitter::AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) c case Delta::Action::REMOVE_OUT_EDGE: { // Find edge if (config_.items.properties_on_edges) { - // Only case when not finding is when the edge is not on splitted shard - // TODO Do this after prune an move condition into assert if (const auto cloned_edge_it = cloned_edges.find(cloned_delta.vertex_edge.edge.ptr->gid); cloned_edge_it != cloned_edges.end()) { cloned_delta.vertex_edge.edge = EdgeRef{&cloned_edge_it->second}; From 5fef2f9a2471e7a34ed4502c70eebbda31392e9e Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 14:11:39 +0100 Subject: [PATCH 69/79] Fix clang tidy errors --- src/storage/v3/splitter.cpp | 15 ++++++--------- src/storage/v3/splitter.hpp | 10 ++++------ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 1c48907d0..58c2d6a9e 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -57,7 +57,7 @@ SplitData Splitter::SplitShard(const PrimaryKey &split_key, const std::optional< std::set<uint64_t> collected_transactions_; data.vertices = CollectVertices(data, collected_transactions_, split_key); data.edges = CollectEdges(collected_transactions_, data.vertices, split_key); - data.transactions = CollectTransactions(collected_transactions_, data.vertices, *data.edges, split_key); + data.transactions = CollectTransactions(collected_transactions_, *data.edges, split_key); return data; } @@ -122,8 +122,7 @@ std::optional<EdgeContainer> Splitter::CollectEdges(std::set<uint64_t> &collecte } std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( - const std::set<uint64_t> &collected_transactions_, VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key) { + const std::set<uint64_t> &collected_transactions_, EdgeContainer &cloned_edges, const PrimaryKey &split_key) { std::map<uint64_t, std::unique_ptr<Transaction>> transactions; for (const auto &[commit_start, transaction] : start_logical_id_to_transaction_) { @@ -136,7 +135,7 @@ std::map<uint64_t, std::unique_ptr<Transaction>> Splitter::CollectTransactions( // It is necessary to clone all the transactions first so we have new addresses // for deltas, before doing alignment of deltas and prev_ptr - AdjustClonedTransactions(transactions, cloned_vertices, cloned_edges, split_key); + AdjustClonedTransactions(transactions, cloned_edges, split_key); return transactions; } @@ -244,11 +243,10 @@ void Splitter::PruneOriginalDeltas(Transaction &transaction, } void Splitter::AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key) { + EdgeContainer &cloned_edges, const PrimaryKey &split_key) { for (auto &[start_id, cloned_transaction] : cloned_transactions) { AdjustClonedTransaction(*cloned_transaction, *start_logical_id_to_transaction_[start_id], cloned_transactions, - cloned_vertices, cloned_edges, split_key); + cloned_edges); } // Prune deltas whose delta chain points to vertex/edge that should not belong on that shard // Prune must be after adjust, since next, and prev are not set and we cannot follow the chain @@ -271,8 +269,7 @@ bool DoesPrevPtrPointsToSplittedData(const PreviousPtr::Pointer &prev_ptr, const void Splitter::AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey & /*split_key*/) { + EdgeContainer &cloned_edges) { auto delta_it = transaction.deltas.begin(); auto cloned_delta_it = cloned_transaction.deltas.begin(); diff --git a/src/storage/v3/splitter.hpp b/src/storage/v3/splitter.hpp index 72869e9cb..8cd0f7ae4 100644 --- a/src/storage/v3/splitter.hpp +++ b/src/storage/v3/splitter.hpp @@ -72,8 +72,8 @@ class Splitter final { const VertexContainer &split_vertices, const PrimaryKey &split_key); std::map<uint64_t, std::unique_ptr<Transaction>> CollectTransactions( - const std::set<uint64_t> &collected_transactions_start_id, VertexContainer &cloned_vertices, - EdgeContainer &cloned_edges, const PrimaryKey &split_key); + const std::set<uint64_t> &collected_transactions_start_id, EdgeContainer &cloned_edges, + const PrimaryKey &split_key); static void ScanDeltas(std::set<uint64_t> &collected_transactions_start_id, const Delta *delta); @@ -82,12 +82,10 @@ class Splitter final { void AdjustClonedTransaction(Transaction &cloned_transaction, const Transaction &transaction, std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key); + EdgeContainer &cloned_edges); void AdjustClonedTransactions(std::map<uint64_t, std::unique_ptr<Transaction>> &cloned_transactions, - VertexContainer &cloned_vertices, EdgeContainer &cloned_edges, - const PrimaryKey &split_key); + EdgeContainer &cloned_edges, const PrimaryKey &split_key); void AdjustEdgeRef(Delta &cloned_delta, EdgeContainer &cloned_edges) const; From fe80c7ea94e67d1b7c5f24f079c2a6e82907b830 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 15:17:54 +0100 Subject: [PATCH 70/79] Extend benchmark --- tests/benchmark/storage_v3_split.cpp | 36 ++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 4e46ee943..5836ee086 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -176,18 +176,44 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) ->Range(100'000, 1'000'000) ->Unit(::benchmark::kMillisecond); +// Args: +// Number of vertices +// Number of edges +// Number of transaction +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) +// ->Args({100'000, 100'000, 1'000}) +// ->Args({100'000, 100'000, 10'000}) +// ->Args({1'000'000, 100'000, 1'000}) +// ->Args({1'000'000, 100'000, 10'000}) +// ->Args({100'000, 1'000'000, 1'000}) +// ->Args({1'000'000, 1'00'000, 10'000}) +// ->Unit(::benchmark::kMillisecond); + // Args: // Number of vertices // Number of edges // Number of transaction BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) + ->Args({100'000, 100'000, 100}) + ->Args({500'000, 100'000, 100}) + ->Args({1'000'000, 100'000, 100}) + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseVertices"); + +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) + ->Args({100'000, 100'000, 100}) + ->Args({100'000, 500'000, 100}) + ->Args({100'000, 1'000'000, 100}) + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseEdges"); + +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) + ->Args({100'000, 100'000, 1}) + ->Args({100'000, 100'000, 100}) ->Args({100'000, 100'000, 1'000}) ->Args({100'000, 100'000, 10'000}) - ->Args({1'000'000, 100'000, 1'000}) - ->Args({1'000'000, 100'000, 10'000}) - ->Args({100'000, 1'000'000, 1'000}) - ->Args({1'000'000, 1'00'000, 10'000}) - ->Unit(::benchmark::kMillisecond); + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseTransactions"); } // namespace memgraph::benchmark From 460ecc5340ad57230578b8a6560e185bb8dd7c3a Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 16:52:06 +0100 Subject: [PATCH 71/79] Make transactinal data more consistent in benchmark --- tests/benchmark/storage_v3_split.cpp | 78 ++++++++++++++-------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 5836ee086..8938c68c0 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -120,38 +120,49 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State & } } -BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions)(::benchmark::State &state) { +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices)(::benchmark::State &state) { const auto number_of_vertices = state.range(0); const auto number_of_edges = state.range(1); const auto number_of_transactions = state.range(2); std::random_device r; std::default_random_engine e1(r()); - std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices - number_of_transactions); - const auto max_transactions_needed = std::max(number_of_vertices, number_of_edges); - for (int64_t vertex_counter{number_of_vertices}, edge_counter{number_of_edges}, i{0}; - vertex_counter > 0 || edge_counter > 0; --vertex_counter, --edge_counter, ++i) { + // Create Vertices + int64_t vertex_count{0}; + for (; vertex_count < number_of_vertices - number_of_transactions; ++vertex_count) { auto acc = storage->Access(GetNextHlc()); - if (vertex_counter > 0) { - MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, - {{secondary_property, PropertyValue(i)}}) - .HasValue(), - "Failed creating with pk {}", i); - } - if (edge_counter > 0 && i > 1) { - const auto vtx1 = uniform_dist(e1) % std::min(i, number_of_vertices); - const auto vtx2 = uniform_dist(e1) % std::min(i, number_of_vertices); - - MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, - VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) - .HasValue(), - "Failed on {} and {}", vtx1, vtx2); - } - + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + acc.Commit(GetNextHlc()); + } + + // Create Edges + for (int64_t i{0}; i < number_of_edges; ++i) { + auto acc = storage->Access(GetNextHlc()); + + const auto vtx1 = uniform_dist(e1); + const auto vtx2 = uniform_dist(e1); + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + acc.Commit(GetNextHlc()); + } + // Clean up transactional data + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + + // Create rest of the objects and leave transactions + for (; vertex_count < number_of_vertices; ++vertex_count) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); acc.Commit(GetNextHlc()); - if (i == max_transactions_needed - number_of_transactions) { - storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); - } } for (auto _ : state) { @@ -180,34 +191,21 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) // Number of vertices // Number of edges // Number of transaction -// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) -// ->Args({100'000, 100'000, 1'000}) -// ->Args({100'000, 100'000, 10'000}) -// ->Args({1'000'000, 100'000, 1'000}) -// ->Args({1'000'000, 100'000, 10'000}) -// ->Args({100'000, 1'000'000, 1'000}) -// ->Args({1'000'000, 1'00'000, 10'000}) -// ->Unit(::benchmark::kMillisecond); - -// Args: -// Number of vertices -// Number of edges -// Number of transaction -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) ->Args({500'000, 100'000, 100}) ->Args({1'000'000, 100'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseVertices"); -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) ->Args({100'000, 500'000, 100}) ->Args({100'000, 1'000'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseEdges"); -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactions) +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 1}) ->Args({100'000, 100'000, 100}) ->Args({100'000, 100'000, 1'000}) From d1b3cc35ea60ad574ea5b10a725481b05832f740 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Fri, 17 Mar 2023 17:13:50 +0100 Subject: [PATCH 72/79] Fix edge creation --- tests/benchmark/storage_v3_split.cpp | 53 ++++++++++++++-------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 8938c68c0..3b16c8444 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -126,30 +126,29 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertice const auto number_of_transactions = state.range(2); std::random_device r; std::default_random_engine e1(r()); - std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices - number_of_transactions); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices - number_of_transactions - 1); // Create Vertices int64_t vertex_count{0}; - for (; vertex_count < number_of_vertices - number_of_transactions; ++vertex_count) { + { auto acc = storage->Access(GetNextHlc()); - MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, - {{secondary_property, PropertyValue(vertex_count)}}) - .HasValue(), - "Failed creating with pk {}", vertex_count); - acc.Commit(GetNextHlc()); - } + for (; vertex_count < number_of_vertices - number_of_transactions; ++vertex_count) { + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + } - // Create Edges - for (int64_t i{0}; i < number_of_edges; ++i) { - auto acc = storage->Access(GetNextHlc()); + // Create Edges + for (int64_t i{0}; i < number_of_edges; ++i) { + const auto vtx1 = uniform_dist(e1); + const auto vtx2 = uniform_dist(e1); - const auto vtx1 = uniform_dist(e1); - const auto vtx2 = uniform_dist(e1); - - MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, - VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) - .HasValue(), - "Failed on {} and {}", vtx1, vtx2); + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } acc.Commit(GetNextHlc()); } // Clean up transactional data @@ -174,18 +173,18 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertice // Range: // Number of vertices // This run is pessimistic, number of vertices corresponds with number if transactions -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) - ->RangeMultiplier(10) - ->Range(100'000, 100'000) - ->Unit(::benchmark::kMillisecond); +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) +// ->RangeMultiplier(10) +// ->Range(100'000, 100'000) +// ->Unit(::benchmark::kMillisecond); // Range: // Number of vertices // This run is optimistic, in this run there are no transactions -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) - ->RangeMultiplier(10) - ->Range(100'000, 1'000'000) - ->Unit(::benchmark::kMillisecond); +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) +// ->RangeMultiplier(10) +// ->Range(100'000, 1'000'000) +// ->Unit(::benchmark::kMillisecond); // Args: // Number of vertices @@ -195,6 +194,7 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti ->Args({100'000, 100'000, 100}) ->Args({500'000, 100'000, 100}) ->Args({1'000'000, 100'000, 100}) + ->Args({5'000'000, 100'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseVertices"); @@ -202,6 +202,7 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti ->Args({100'000, 100'000, 100}) ->Args({100'000, 500'000, 100}) ->Args({100'000, 1'000'000, 100}) + ->Args({100'000, 5'000'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseEdges"); From 20c6d66fb05fbf737295bac92142299c4af36aa5 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Mar 2023 09:55:29 +0100 Subject: [PATCH 73/79] Extend benchamrks --- tests/benchmark/storage_v3_split.cpp | 33 ++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 3b16c8444..29ecbd669 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -192,25 +192,54 @@ BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertice // Number of transaction BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) + ->Args({200'000, 100'000, 100}) + ->Args({300'000, 100'000, 100}) + ->Args({400'000, 100'000, 100}) ->Args({500'000, 100'000, 100}) + ->Args({600'000, 100'000, 100}) + ->Args({700'000, 100'000, 100}) + ->Args({800'000, 100'000, 100}) + ->Args({900'000, 100'000, 100}) ->Args({1'000'000, 100'000, 100}) - ->Args({5'000'000, 100'000, 100}) + ->Args({2'000'000, 100'000, 100}) + ->Args({3'000'000, 100'000, 100}) + ->Args({4'000'000, 100'000, 100}) + ->Args({6'000'000, 100'000, 100}) + ->Args({7'000'000, 100'000, 100}) + ->Args({8'000'000, 100'000, 100}) + ->Args({9'000'000, 100'000, 100}) + ->Args({10'000'000, 100'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseVertices"); BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) + ->Args({100'000, 200'000, 100}) + ->Args({100'000, 300'000, 100}) + ->Args({100'000, 400'000, 100}) ->Args({100'000, 500'000, 100}) + ->Args({100'000, 600'000, 100}) + ->Args({100'000, 700'000, 100}) + ->Args({100'000, 800'000, 100}) + ->Args({100'000, 900'000, 100}) ->Args({100'000, 1'000'000, 100}) + ->Args({100'000, 2'000'000, 100}) + ->Args({100'000, 3'000'000, 100}) + ->Args({100'000, 4'000'000, 100}) ->Args({100'000, 5'000'000, 100}) + ->Args({100'000, 6'000'000, 100}) + ->Args({100'000, 7'000'000, 100}) + ->Args({100'000, 8'000'000, 100}) + ->Args({100'000, 9'000'000, 100}) + ->Args({100'000, 10'000'000, 100}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseEdges"); BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) - ->Args({100'000, 100'000, 1}) ->Args({100'000, 100'000, 100}) ->Args({100'000, 100'000, 1'000}) ->Args({100'000, 100'000, 10'000}) + ->Args({100'000, 100'000, 100'000}) ->Unit(::benchmark::kMillisecond) ->Name("IncreaseTransactions"); From 967a8f46cf92f16654ffba499389625d2ed6af96 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Mar 2023 11:05:05 +0100 Subject: [PATCH 74/79] Benchmarking --- tests/benchmark/CMakeLists.txt | 6 + tests/benchmark/storage_v3_split.cpp | 22 ++ tests/benchmark/storage_v3_split_1.cpp | 270 +++++++++++++++++++++++++ tests/benchmark/storage_v3_split_2.cpp | 270 +++++++++++++++++++++++++ 4 files changed, 568 insertions(+) create mode 100644 tests/benchmark/storage_v3_split_1.cpp create mode 100644 tests/benchmark/storage_v3_split_2.cpp diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt index 306cf9661..954809ffc 100644 --- a/tests/benchmark/CMakeLists.txt +++ b/tests/benchmark/CMakeLists.txt @@ -82,3 +82,9 @@ target_link_libraries(${test_prefix}data_structures_remove mg-utils mg-storage-v add_benchmark(storage_v3_split.cpp) target_link_libraries(${test_prefix}storage_v3_split mg-storage-v3 mg-query-v2) + +add_benchmark(storage_v3_split_1.cpp) +target_link_libraries(${test_prefix}storage_v3_split_1 mg-storage-v3 mg-query-v2) + +add_benchmark(storage_v3_split_2.cpp) +target_link_libraries(${test_prefix}storage_v3_split_2 mg-storage-v3 mg-query-v2) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 29ecbd669..82204e1b3 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -212,6 +212,28 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti ->Unit(::benchmark::kMillisecond) ->Name("IncreaseVertices"); +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) + ->Args({100'000, 100'000, 100}) + ->Args({200'000, 100'000, 100}) + ->Args({300'000, 100'000, 100}) + ->Args({400'000, 100'000, 100}) + ->Args({500'000, 100'000, 100}) + ->Args({600'000, 100'000, 100}) + ->Args({700'000, 100'000, 100}) + ->Args({800'000, 100'000, 100}) + ->Args({900'000, 100'000, 100}) + ->Args({1'000'000, 100'000, 100}) + ->Args({2'000'000, 100'000, 100}) + ->Args({3'000'000, 100'000, 100}) + ->Args({4'000'000, 100'000, 100}) + ->Args({6'000'000, 100'000, 100}) + ->Args({7'000'000, 100'000, 100}) + ->Args({8'000'000, 100'000, 100}) + ->Args({9'000'000, 100'000, 100}) + ->Args({10'000'000, 100'000, 100}) + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseVertices"); + BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) ->Args({100'000, 200'000, 100}) diff --git a/tests/benchmark/storage_v3_split_1.cpp b/tests/benchmark/storage_v3_split_1.cpp new file mode 100644 index 000000000..9fdcd1abb --- /dev/null +++ b/tests/benchmark/storage_v3_split_1.cpp @@ -0,0 +1,270 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <cstdint> +#include <optional> +#include <vector> + +#include <benchmark/benchmark.h> +#include <gflags/gflags.h> + +#include "storage/v3/id_types.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/property_value.hpp" +#include "storage/v3/shard.hpp" +#include "storage/v3/vertex.hpp" +#include "storage/v3/vertex_id.hpp" + +namespace memgraph::benchmark { + +class ShardSplitBenchmark : public ::benchmark::Fixture { + protected: + using PrimaryKey = storage::v3::PrimaryKey; + using PropertyId = storage::v3::PropertyId; + using PropertyValue = storage::v3::PropertyValue; + using LabelId = storage::v3::LabelId; + using EdgeTypeId = storage::v3::EdgeTypeId; + using Shard = storage::v3::Shard; + using VertexId = storage::v3::VertexId; + using Gid = storage::v3::Gid; + + void SetUp(const ::benchmark::State &state) override { + storage.emplace(primary_label, min_pk, std::nullopt, schema_property_vector); + storage->StoreMapping( + {{1, "label"}, {2, "property"}, {3, "edge_property"}, {4, "secondary_label"}, {5, "secondary_prop"}}); + } + + void TearDown(const ::benchmark::State &) override { storage = std::nullopt; } + + const PropertyId primary_property{PropertyId::FromUint(2)}; + const PropertyId secondary_property{PropertyId::FromUint(5)}; + std::vector<storage::v3::SchemaProperty> schema_property_vector = { + storage::v3::SchemaProperty{primary_property, common::SchemaType::INT}}; + const std::vector<PropertyValue> min_pk{PropertyValue{0}}; + const LabelId primary_label{LabelId::FromUint(1)}; + const LabelId secondary_label{LabelId::FromUint(4)}; + const EdgeTypeId edge_type_id{EdgeTypeId::FromUint(3)}; + std::optional<Shard> storage; + + coordinator::Hlc last_hlc{0, io::Time{}}; + + coordinator::Hlc GetNextHlc() { + ++last_hlc.logical_id; + last_hlc.coordinator_wall_clock += std::chrono::seconds(1); + return last_hlc; + } +}; + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); + + for (int64_t i{0}; i < number_of_vertices; ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); + + for (int64_t i{0}; i < number_of_vertices; ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices)(::benchmark::State &state) { + const auto number_of_vertices = state.range(0); + const auto number_of_edges = state.range(1); + const auto number_of_transactions = state.range(2); + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices - number_of_transactions - 1); + + // Create Vertices + int64_t vertex_count{0}; + { + auto acc = storage->Access(GetNextHlc()); + for (; vertex_count < number_of_vertices - number_of_transactions; ++vertex_count) { + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + } + + // Create Edges + for (int64_t i{0}; i < number_of_edges; ++i) { + const auto vtx1 = uniform_dist(e1); + const auto vtx2 = uniform_dist(e1); + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + // Clean up transactional data + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + + // Create rest of the objects and leave transactions + for (; vertex_count < number_of_vertices; ++vertex_count) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + acc.Commit(GetNextHlc()); + } + + for (auto _ : state) { + // Don't create shard since shard deallocation can take some time as well + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +// Range: +// Number of vertices +// This run is pessimistic, number of vertices corresponds with number if transactions +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) +// ->RangeMultiplier(10) +// ->Range(100'000, 100'000) +// ->Unit(::benchmark::kMillisecond); + +// Range: +// Number of vertices +// This run is optimistic, in this run there are no transactions +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) +// ->RangeMultiplier(10) +// ->Range(100'000, 1'000'000) +// ->Unit(::benchmark::kMillisecond); + +// Args: +// Number of vertices +// Number of edges +// Number of transaction +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) + // ->Args({100'000, 100'000, 100}) + // ->Args({200'000, 100'000, 100}) + // ->Args({300'000, 100'000, 100}) + // ->Args({400'000, 100'000, 100}) + // ->Args({500'000, 100'000, 100}) + // ->Args({600'000, 100'000, 100}) + ->Args({700'000, 100'000, 100}) + // ->Args({800'000, 100'000, 100}) + // ->Args({900'000, 100'000, 100}) + // ->Args({1'000'000, 100'000, 100}) + // ->Args({2'000'000, 100'000, 100}) + // ->Args({3'000'000, 100'000, 100}) + // ->Args({4'000'000, 100'000, 100}) + // ->Args({6'000'000, 100'000, 100}) + // ->Args({7'000'000, 100'000, 100}) + // ->Args({8'000'000, 100'000, 100}) + // ->Args({9'000'000, 100'000, 100}) + // ->Args({10'000'000, 100'000, 100}) + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseVertices"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({200'000, 100'000, 100}) +// ->Args({300'000, 100'000, 100}) +// ->Args({400'000, 100'000, 100}) +// ->Args({500'000, 100'000, 100}) +// ->Args({600'000, 100'000, 100}) +// ->Args({700'000, 100'000, 100}) +// ->Args({800'000, 100'000, 100}) +// ->Args({900'000, 100'000, 100}) +// ->Args({1'000'000, 100'000, 100}) +// ->Args({2'000'000, 100'000, 100}) +// ->Args({3'000'000, 100'000, 100}) +// ->Args({4'000'000, 100'000, 100}) +// ->Args({6'000'000, 100'000, 100}) +// ->Args({7'000'000, 100'000, 100}) +// ->Args({8'000'000, 100'000, 100}) +// ->Args({9'000'000, 100'000, 100}) +// ->Args({10'000'000, 100'000, 100}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseVertices"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({100'000, 200'000, 100}) +// ->Args({100'000, 300'000, 100}) +// ->Args({100'000, 400'000, 100}) +// ->Args({100'000, 500'000, 100}) +// ->Args({100'000, 600'000, 100}) +// ->Args({100'000, 700'000, 100}) +// ->Args({100'000, 800'000, 100}) +// ->Args({100'000, 900'000, 100}) +// ->Args({100'000, 1'000'000, 100}) +// ->Args({100'000, 2'000'000, 100}) +// ->Args({100'000, 3'000'000, 100}) +// ->Args({100'000, 4'000'000, 100}) +// ->Args({100'000, 5'000'000, 100}) +// ->Args({100'000, 6'000'000, 100}) +// ->Args({100'000, 7'000'000, 100}) +// ->Args({100'000, 8'000'000, 100}) +// ->Args({100'000, 9'000'000, 100}) +// ->Args({100'000, 10'000'000, 100}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseEdges"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({100'000, 100'000, 1'000}) +// ->Args({100'000, 100'000, 10'000}) +// ->Args({100'000, 100'000, 100'000}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseTransactions"); + +} // namespace memgraph::benchmark + +BENCHMARK_MAIN(); diff --git a/tests/benchmark/storage_v3_split_2.cpp b/tests/benchmark/storage_v3_split_2.cpp new file mode 100644 index 000000000..34b588607 --- /dev/null +++ b/tests/benchmark/storage_v3_split_2.cpp @@ -0,0 +1,270 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <cstdint> +#include <optional> +#include <vector> + +#include <benchmark/benchmark.h> +#include <gflags/gflags.h> + +#include "storage/v3/id_types.hpp" +#include "storage/v3/key_store.hpp" +#include "storage/v3/property_value.hpp" +#include "storage/v3/shard.hpp" +#include "storage/v3/vertex.hpp" +#include "storage/v3/vertex_id.hpp" + +namespace memgraph::benchmark { + +class ShardSplitBenchmark : public ::benchmark::Fixture { + protected: + using PrimaryKey = storage::v3::PrimaryKey; + using PropertyId = storage::v3::PropertyId; + using PropertyValue = storage::v3::PropertyValue; + using LabelId = storage::v3::LabelId; + using EdgeTypeId = storage::v3::EdgeTypeId; + using Shard = storage::v3::Shard; + using VertexId = storage::v3::VertexId; + using Gid = storage::v3::Gid; + + void SetUp(const ::benchmark::State &state) override { + storage.emplace(primary_label, min_pk, std::nullopt, schema_property_vector); + storage->StoreMapping( + {{1, "label"}, {2, "property"}, {3, "edge_property"}, {4, "secondary_label"}, {5, "secondary_prop"}}); + } + + void TearDown(const ::benchmark::State &) override { storage = std::nullopt; } + + const PropertyId primary_property{PropertyId::FromUint(2)}; + const PropertyId secondary_property{PropertyId::FromUint(5)}; + std::vector<storage::v3::SchemaProperty> schema_property_vector = { + storage::v3::SchemaProperty{primary_property, common::SchemaType::INT}}; + const std::vector<PropertyValue> min_pk{PropertyValue{0}}; + const LabelId primary_label{LabelId::FromUint(1)}; + const LabelId secondary_label{LabelId::FromUint(4)}; + const EdgeTypeId edge_type_id{EdgeTypeId::FromUint(3)}; + std::optional<Shard> storage; + + coordinator::Hlc last_hlc{0, io::Time{}}; + + coordinator::Hlc GetNextHlc() { + ++last_hlc.logical_id; + last_hlc.coordinator_wall_clock += std::chrono::seconds(1); + return last_hlc; + } +}; + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplit)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); + + for (int64_t i{0}; i < number_of_vertices; ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithGc)(::benchmark::State &state) { + const auto number_of_vertices{state.range(0)}; + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices); + + for (int64_t i{0}; i < number_of_vertices; ++i) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(i)}, + {{secondary_property, PropertyValue(i)}}) + .HasValue(), + "Failed creating with pk {}", i); + if (i > 1) { + const auto vtx1 = uniform_dist(e1) % i; + const auto vtx2 = uniform_dist(e1) % i; + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + for (auto _ : state) { + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +BENCHMARK_DEFINE_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices)(::benchmark::State &state) { + const auto number_of_vertices = state.range(0); + const auto number_of_edges = state.range(1); + const auto number_of_transactions = state.range(2); + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution<int> uniform_dist(0, number_of_vertices - number_of_transactions - 1); + + // Create Vertices + int64_t vertex_count{0}; + { + auto acc = storage->Access(GetNextHlc()); + for (; vertex_count < number_of_vertices - number_of_transactions; ++vertex_count) { + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + } + + // Create Edges + for (int64_t i{0}; i < number_of_edges; ++i) { + const auto vtx1 = uniform_dist(e1); + const auto vtx2 = uniform_dist(e1); + + MG_ASSERT(acc.CreateEdge(VertexId{primary_label, {PropertyValue(vtx1)}}, + VertexId{primary_label, {PropertyValue(vtx2)}}, edge_type_id, Gid::FromUint(i)) + .HasValue(), + "Failed on {} and {}", vtx1, vtx2); + } + acc.Commit(GetNextHlc()); + } + // Clean up transactional data + storage->CollectGarbage(GetNextHlc().coordinator_wall_clock); + + // Create rest of the objects and leave transactions + for (; vertex_count < number_of_vertices; ++vertex_count) { + auto acc = storage->Access(GetNextHlc()); + MG_ASSERT(acc.CreateVertexAndValidate({secondary_label}, PrimaryKey{PropertyValue(vertex_count)}, + {{secondary_property, PropertyValue(vertex_count)}}) + .HasValue(), + "Failed creating with pk {}", vertex_count); + acc.Commit(GetNextHlc()); + } + + for (auto _ : state) { + // Don't create shard since shard deallocation can take some time as well + auto data = storage->PerformSplit(PrimaryKey{PropertyValue{number_of_vertices / 2}}, 2); + } +} + +// Range: +// Number of vertices +// This run is pessimistic, number of vertices corresponds with number if transactions +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplit) +// ->RangeMultiplier(10) +// ->Range(100'000, 100'000) +// ->Unit(::benchmark::kMillisecond); + +// Range: +// Number of vertices +// This run is optimistic, in this run there are no transactions +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithGc) +// ->RangeMultiplier(10) +// ->Range(100'000, 1'000'000) +// ->Unit(::benchmark::kMillisecond); + +// Args: +// Number of vertices +// Number of edges +// Number of transaction +BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) + // ->Args({100'000, 100'000, 100}) + // ->Args({200'000, 100'000, 100}) + // ->Args({300'000, 100'000, 100}) + // ->Args({400'000, 100'000, 100}) + // ->Args({500'000, 100'000, 100}) + // ->Args({600'000, 100'000, 100}) + // ->Args({700'000, 100'000, 100}) + ->Args({800'000, 100'000, 100}) + // ->Args({900'000, 100'000, 100}) + // ->Args({1'000'000, 100'000, 100}) + // ->Args({2'000'000, 100'000, 100}) + // ->Args({3'000'000, 100'000, 100}) + // ->Args({4'000'000, 100'000, 100}) + // ->Args({6'000'000, 100'000, 100}) + // ->Args({7'000'000, 100'000, 100}) + // ->Args({8'000'000, 100'000, 100}) + // ->Args({9'000'000, 100'000, 100}) + // ->Args({10'000'000, 100'000, 100}) + ->Unit(::benchmark::kMillisecond) + ->Name("IncreaseVertices"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({200'000, 100'000, 100}) +// ->Args({300'000, 100'000, 100}) +// ->Args({400'000, 100'000, 100}) +// ->Args({500'000, 100'000, 100}) +// ->Args({600'000, 100'000, 100}) +// ->Args({700'000, 100'000, 100}) +// ->Args({800'000, 100'000, 100}) +// ->Args({900'000, 100'000, 100}) +// ->Args({1'000'000, 100'000, 100}) +// ->Args({2'000'000, 100'000, 100}) +// ->Args({3'000'000, 100'000, 100}) +// ->Args({4'000'000, 100'000, 100}) +// ->Args({6'000'000, 100'000, 100}) +// ->Args({7'000'000, 100'000, 100}) +// ->Args({8'000'000, 100'000, 100}) +// ->Args({9'000'000, 100'000, 100}) +// ->Args({10'000'000, 100'000, 100}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseVertices"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({100'000, 200'000, 100}) +// ->Args({100'000, 300'000, 100}) +// ->Args({100'000, 400'000, 100}) +// ->Args({100'000, 500'000, 100}) +// ->Args({100'000, 600'000, 100}) +// ->Args({100'000, 700'000, 100}) +// ->Args({100'000, 800'000, 100}) +// ->Args({100'000, 900'000, 100}) +// ->Args({100'000, 1'000'000, 100}) +// ->Args({100'000, 2'000'000, 100}) +// ->Args({100'000, 3'000'000, 100}) +// ->Args({100'000, 4'000'000, 100}) +// ->Args({100'000, 5'000'000, 100}) +// ->Args({100'000, 6'000'000, 100}) +// ->Args({100'000, 7'000'000, 100}) +// ->Args({100'000, 8'000'000, 100}) +// ->Args({100'000, 9'000'000, 100}) +// ->Args({100'000, 10'000'000, 100}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseEdges"); + +// BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) +// ->Args({100'000, 100'000, 100}) +// ->Args({100'000, 100'000, 1'000}) +// ->Args({100'000, 100'000, 10'000}) +// ->Args({100'000, 100'000, 100'000}) +// ->Unit(::benchmark::kMillisecond) +// ->Name("IncreaseTransactions"); + +} // namespace memgraph::benchmark + +BENCHMARK_MAIN(); From 3c545704dfaeddba8f8954c6bb6a0e137f28c380 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Mar 2023 15:15:18 +0100 Subject: [PATCH 75/79] Add hint on insert --- src/storage/v3/splitter.cpp | 4 ++-- tests/benchmark/storage_v3_split.cpp | 22 ---------------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 58c2d6a9e..0ac00ec85 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -82,8 +82,8 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c auto next_it = std::next(split_key_it); - const auto &[splitted_vertex_it, inserted, node] = splitted_data.insert(vertices_.extract(split_key_it->first)); - MG_ASSERT(inserted, "Failed to extract vertex!"); + const auto new_it = splitted_data.insert(splitted_data.end(), vertices_.extract(split_key_it->first)); + MG_ASSERT(new_it != splitted_data.end(), "Failed to extract vertex!"); split_key_it = next_it; } diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 82204e1b3..29ecbd669 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -212,28 +212,6 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti ->Unit(::benchmark::kMillisecond) ->Name("IncreaseVertices"); -BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) - ->Args({100'000, 100'000, 100}) - ->Args({200'000, 100'000, 100}) - ->Args({300'000, 100'000, 100}) - ->Args({400'000, 100'000, 100}) - ->Args({500'000, 100'000, 100}) - ->Args({600'000, 100'000, 100}) - ->Args({700'000, 100'000, 100}) - ->Args({800'000, 100'000, 100}) - ->Args({900'000, 100'000, 100}) - ->Args({1'000'000, 100'000, 100}) - ->Args({2'000'000, 100'000, 100}) - ->Args({3'000'000, 100'000, 100}) - ->Args({4'000'000, 100'000, 100}) - ->Args({6'000'000, 100'000, 100}) - ->Args({7'000'000, 100'000, 100}) - ->Args({8'000'000, 100'000, 100}) - ->Args({9'000'000, 100'000, 100}) - ->Args({10'000'000, 100'000, 100}) - ->Unit(::benchmark::kMillisecond) - ->Name("IncreaseVertices"); - BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVertices) ->Args({100'000, 100'000, 100}) ->Args({100'000, 200'000, 100}) From 825b567c8187a11ec09c67d944dac1e703aaa250 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Mar 2023 15:36:00 +0100 Subject: [PATCH 76/79] Test 1m --- tests/benchmark/storage_v3_split_1.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/benchmark/storage_v3_split_1.cpp b/tests/benchmark/storage_v3_split_1.cpp index 9fdcd1abb..5f5abe287 100644 --- a/tests/benchmark/storage_v3_split_1.cpp +++ b/tests/benchmark/storage_v3_split_1.cpp @@ -197,10 +197,10 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti // ->Args({400'000, 100'000, 100}) // ->Args({500'000, 100'000, 100}) // ->Args({600'000, 100'000, 100}) - ->Args({700'000, 100'000, 100}) + // ->Args({700'000, 100'000, 100}) // ->Args({800'000, 100'000, 100}) // ->Args({900'000, 100'000, 100}) - // ->Args({1'000'000, 100'000, 100}) + ->Args({1'000'000, 100'000, 100}) // ->Args({2'000'000, 100'000, 100}) // ->Args({3'000'000, 100'000, 100}) // ->Args({4'000'000, 100'000, 100}) From cfb3841fd74a82d986667fc961232ac5e082a84f Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Thu, 23 Mar 2023 16:19:15 +0100 Subject: [PATCH 77/79] Bench --- tests/benchmark/storage_v3_split_1.cpp | 4 ++-- tests/benchmark/storage_v3_split_2.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/benchmark/storage_v3_split_1.cpp b/tests/benchmark/storage_v3_split_1.cpp index 5f5abe287..ebfe97c05 100644 --- a/tests/benchmark/storage_v3_split_1.cpp +++ b/tests/benchmark/storage_v3_split_1.cpp @@ -199,8 +199,8 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti // ->Args({600'000, 100'000, 100}) // ->Args({700'000, 100'000, 100}) // ->Args({800'000, 100'000, 100}) - // ->Args({900'000, 100'000, 100}) - ->Args({1'000'000, 100'000, 100}) + ->Args({900'000, 100'000, 100}) + // ->Args({1'000'000, 100'000, 100}) // ->Args({2'000'000, 100'000, 100}) // ->Args({3'000'000, 100'000, 100}) // ->Args({4'000'000, 100'000, 100}) diff --git a/tests/benchmark/storage_v3_split_2.cpp b/tests/benchmark/storage_v3_split_2.cpp index 34b588607..39c5c1ead 100644 --- a/tests/benchmark/storage_v3_split_2.cpp +++ b/tests/benchmark/storage_v3_split_2.cpp @@ -198,9 +198,9 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti // ->Args({500'000, 100'000, 100}) // ->Args({600'000, 100'000, 100}) // ->Args({700'000, 100'000, 100}) - ->Args({800'000, 100'000, 100}) + // ->Args({800'000, 100'000, 100}) // ->Args({900'000, 100'000, 100}) - // ->Args({1'000'000, 100'000, 100}) + ->Args({1'000'000, 100'000, 100}) // ->Args({2'000'000, 100'000, 100}) // ->Args({3'000'000, 100'000, 100}) // ->Args({4'000'000, 100'000, 100}) From f566db446d5c4c889a2f9687ef4c74061dfc86c0 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 27 Mar 2023 13:54:27 +0200 Subject: [PATCH 78/79] Extract by iterator --- src/storage/v3/splitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage/v3/splitter.cpp b/src/storage/v3/splitter.cpp index 0ac00ec85..6178b67e6 100644 --- a/src/storage/v3/splitter.cpp +++ b/src/storage/v3/splitter.cpp @@ -82,7 +82,7 @@ VertexContainer Splitter::CollectVertices(SplitData &data, std::set<uint64_t> &c auto next_it = std::next(split_key_it); - const auto new_it = splitted_data.insert(splitted_data.end(), vertices_.extract(split_key_it->first)); + const auto new_it = splitted_data.insert(splitted_data.end(), vertices_.extract(split_key_it)); MG_ASSERT(new_it != splitted_data.end(), "Failed to extract vertex!"); split_key_it = next_it; From 62115f07933cfd327f1db33d4bc86e92c39eb67e Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 27 Mar 2023 14:11:56 +0200 Subject: [PATCH 79/79] Bring back 5m test case --- tests/benchmark/storage_v3_split.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/benchmark/storage_v3_split.cpp b/tests/benchmark/storage_v3_split.cpp index 29ecbd669..de8735f8e 100644 --- a/tests/benchmark/storage_v3_split.cpp +++ b/tests/benchmark/storage_v3_split.cpp @@ -204,6 +204,7 @@ BENCHMARK_REGISTER_F(ShardSplitBenchmark, BigDataSplitWithFewTransactionsOnVerti ->Args({2'000'000, 100'000, 100}) ->Args({3'000'000, 100'000, 100}) ->Args({4'000'000, 100'000, 100}) + ->Args({5'000'000, 100'000, 100}) ->Args({6'000'000, 100'000, 100}) ->Args({7'000'000, 100'000, 100}) ->Args({8'000'000, 100'000, 100})