Merge branch 'master' into add-logs-to-ci

This commit is contained in:
Marko Barišić 2024-02-07 11:33:27 +01:00 committed by GitHub
commit 28f49a3b90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
70 changed files with 1419 additions and 573 deletions

View File

@ -57,16 +57,19 @@ struct UpdateAuthData : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during Auth execution */ void DoDurability() override { /* Done during Auth execution */
} }
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch, bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override { memgraph::system::Transaction const &txn) const override {
auto check_response = [](const replication::UpdateAuthDataRes &response) { return response.success; }; auto check_response = [](const replication::UpdateAuthDataRes &response) { return response.success; };
if (user_) { if (user_) {
return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>( return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), *user_); check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
*user_);
} }
if (role_) { if (role_) {
return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>( return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), *role_); check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
*role_);
} }
// Should never get here // Should never get here
MG_ASSERT(false, "Trying to update auth data that is not a user nor a role"); MG_ASSERT(false, "Trying to update auth data that is not a user nor a role");
@ -88,7 +91,8 @@ struct DropAuthData : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during Auth execution */ void DoDurability() override { /* Done during Auth execution */
} }
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch, bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override { memgraph::system::Transaction const &txn) const override {
auto check_response = [](const replication::DropAuthDataRes &response) { return response.success; }; auto check_response = [](const replication::DropAuthDataRes &response) { return response.success; };
@ -102,7 +106,8 @@ struct DropAuthData : memgraph::system::ISystemAction {
break; break;
} }
return client.SteamAndFinalizeDelta<replication::DropAuthDataRpc>( return client.SteamAndFinalizeDelta<replication::DropAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), type, name_); check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
type, name_);
} }
void PostReplication(replication::RoleMainData &mainData) const override {} void PostReplication(replication::RoleMainData &mainData) const override {}

View File

@ -17,8 +17,15 @@
namespace memgraph::auth { namespace memgraph::auth {
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error(fmt::format("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : ""));
}
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth, void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder) { slk::Reader *req_reader, slk::Builder *res_builder) {
replication::UpdateAuthDataReq req; replication::UpdateAuthDataReq req;
memgraph::slk::Load(&req, req_reader); memgraph::slk::Load(&req, req_reader);
@ -26,6 +33,12 @@ void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system
using memgraph::replication::UpdateAuthDataRes; using memgraph::replication::UpdateAuthDataRes;
UpdateAuthDataRes res(false); UpdateAuthDataRes res(false);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::UpdateAuthDataReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot // Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change. // of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with // If MAIN has changed we need to check this new group_timestamp is consistent with
@ -53,7 +66,8 @@ void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::slk::Save(res, res_builder); memgraph::slk::Save(res, res_builder);
} }
void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth, void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder) { slk::Reader *req_reader, slk::Builder *res_builder) {
replication::DropAuthDataReq req; replication::DropAuthDataReq req;
memgraph::slk::Load(&req, req_reader); memgraph::slk::Load(&req, req_reader);
@ -61,6 +75,12 @@ void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_s
using memgraph::replication::DropAuthDataRes; using memgraph::replication::DropAuthDataRes;
DropAuthDataRes res(false); DropAuthDataRes res(false);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::DropAuthDataRes::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot // Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change. // of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with // If MAIN has changed we need to check this new group_timestamp is consistent with
@ -155,14 +175,14 @@ void Register(replication::RoleReplicaData const &data, system::ReplicaHandlerAc
auth::SynchedAuth &auth) { auth::SynchedAuth &auth) {
// NOTE: Register even without license as the user could add a license at run-time // NOTE: Register even without license as the user could add a license at run-time
data.server->rpc_server_.Register<replication::UpdateAuthDataRpc>( data.server->rpc_server_.Register<replication::UpdateAuthDataRpc>(
[system_state_access, &auth](auto *req_reader, auto *res_builder) mutable { [&data, system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received UpdateAuthDataRpc"); spdlog::debug("Received UpdateAuthDataRpc");
UpdateAuthDataHandler(system_state_access, auth, req_reader, res_builder); UpdateAuthDataHandler(system_state_access, data.uuid_, auth, req_reader, res_builder);
}); });
data.server->rpc_server_.Register<replication::DropAuthDataRpc>( data.server->rpc_server_.Register<replication::DropAuthDataRpc>(
[system_state_access, &auth](auto *req_reader, auto *res_builder) mutable { [&data, system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received DropAuthDataRpc"); spdlog::debug("Received DropAuthDataRpc");
DropAuthDataHandler(system_state_access, auth, req_reader, res_builder); DropAuthDataHandler(system_state_access, data.uuid_, auth, req_reader, res_builder);
}); });
} }
#endif #endif

View File

@ -17,10 +17,16 @@
#include "system/state.hpp" #include "system/state.hpp"
namespace memgraph::auth { namespace memgraph::auth {
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req);
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void UpdateAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth, void UpdateAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder); slk::Reader *req_reader, slk::Builder *res_builder);
void DropAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth, void DropAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder); slk::Reader *req_reader, slk::Builder *res_builder);
bool SystemRecoveryHandler(auth::SynchedAuth &auth, auth::Auth::Config auth_config, bool SystemRecoveryHandler(auth::SynchedAuth &auth, auth::Auth::Config auth_config,

View File

@ -89,6 +89,7 @@ void Load(auth::Auth::Config *self, memgraph::slk::Reader *reader) {
// Serialize code for UpdateAuthDataReq // Serialize code for UpdateAuthDataReq
void Save(const memgraph::replication::UpdateAuthDataReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::replication::UpdateAuthDataReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.epoch_id, builder); memgraph::slk::Save(self.epoch_id, builder);
memgraph::slk::Save(self.expected_group_timestamp, builder); memgraph::slk::Save(self.expected_group_timestamp, builder);
memgraph::slk::Save(self.new_group_timestamp, builder); memgraph::slk::Save(self.new_group_timestamp, builder);
@ -96,6 +97,7 @@ void Save(const memgraph::replication::UpdateAuthDataReq &self, memgraph::slk::B
memgraph::slk::Save(self.role, builder); memgraph::slk::Save(self.role, builder);
} }
void Load(memgraph::replication::UpdateAuthDataReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::replication::UpdateAuthDataReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->epoch_id, reader); memgraph::slk::Load(&self->epoch_id, reader);
memgraph::slk::Load(&self->expected_group_timestamp, reader); memgraph::slk::Load(&self->expected_group_timestamp, reader);
memgraph::slk::Load(&self->new_group_timestamp, reader); memgraph::slk::Load(&self->new_group_timestamp, reader);
@ -113,6 +115,7 @@ void Load(memgraph::replication::UpdateAuthDataRes *self, memgraph::slk::Reader
// Serialize code for DropAuthDataReq // Serialize code for DropAuthDataReq
void Save(const memgraph::replication::DropAuthDataReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::replication::DropAuthDataReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.epoch_id, builder); memgraph::slk::Save(self.epoch_id, builder);
memgraph::slk::Save(self.expected_group_timestamp, builder); memgraph::slk::Save(self.expected_group_timestamp, builder);
memgraph::slk::Save(self.new_group_timestamp, builder); memgraph::slk::Save(self.new_group_timestamp, builder);
@ -120,6 +123,7 @@ void Save(const memgraph::replication::DropAuthDataReq &self, memgraph::slk::Bui
memgraph::slk::Save(self.name, builder); memgraph::slk::Save(self.name, builder);
} }
void Load(memgraph::replication::DropAuthDataReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::replication::DropAuthDataReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->epoch_id, reader); memgraph::slk::Load(&self->epoch_id, reader);
memgraph::slk::Load(&self->expected_group_timestamp, reader); memgraph::slk::Load(&self->expected_group_timestamp, reader);
memgraph::slk::Load(&self->new_group_timestamp, reader); memgraph::slk::Load(&self->new_group_timestamp, reader);

View File

@ -27,17 +27,22 @@ struct UpdateAuthDataReq {
static void Load(UpdateAuthDataReq *self, memgraph::slk::Reader *reader); static void Load(UpdateAuthDataReq *self, memgraph::slk::Reader *reader);
static void Save(const UpdateAuthDataReq &self, memgraph::slk::Builder *builder); static void Save(const UpdateAuthDataReq &self, memgraph::slk::Builder *builder);
UpdateAuthDataReq() = default; UpdateAuthDataReq() = default;
UpdateAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, auth::User user) UpdateAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
: epoch_id{std::move(epoch_id)}, auth::User user)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts}, expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts}, new_group_timestamp{new_ts},
user{std::move(user)} {} user{std::move(user)} {}
UpdateAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, auth::Role role) UpdateAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
: epoch_id{std::move(epoch_id)}, auth::Role role)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts}, expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts}, new_group_timestamp{new_ts},
role{std::move(role)} {} role{std::move(role)} {}
utils::UUID main_uuid;
std::string epoch_id; std::string epoch_id;
uint64_t expected_group_timestamp; uint64_t expected_group_timestamp;
uint64_t new_group_timestamp; uint64_t new_group_timestamp;
@ -69,13 +74,16 @@ struct DropAuthDataReq {
enum class DataType { USER, ROLE }; enum class DataType { USER, ROLE };
DropAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, DataType type, std::string_view name) DropAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
: epoch_id{std::move(epoch_id)}, DataType type, std::string_view name)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts}, expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts}, new_group_timestamp{new_ts},
type{type}, type{type},
name{name} {} name{name} {}
utils::UUID main_uuid;
std::string epoch_id; std::string epoch_id;
uint64_t expected_group_timestamp; uint64_t expected_group_timestamp;
uint64_t new_group_timestamp; uint64_t new_group_timestamp;

View File

@ -9,6 +9,7 @@
// by the Apache License, Version 2.0, included in the file // by the Apache License, Version 2.0, included in the file
// licenses/APL.txt. // licenses/APL.txt.
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include "coordination/coordinator_client.hpp" #include "coordination/coordinator_client.hpp"
@ -71,16 +72,17 @@ auto CoordinatorClient::SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCal
auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; } auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; }
auto CoordinatorClient::SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool { auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid,
ReplicationClientsInfo replication_clients_info) const -> bool {
try { try {
auto stream{rpc_client_.Stream<PromoteReplicaToMainRpc>(std::move(replication_clients_info))}; auto stream{rpc_client_.Stream<PromoteReplicaToMainRpc>(uuid, std::move(replication_clients_info))};
if (!stream.AwaitResponse().success) { if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC failover response!"); spdlog::error("Failed to receive successful PromoteReplicaToMainRpc response!");
return false; return false;
} }
return true; return true;
} catch (rpc::RpcFailedException const &) { } catch (rpc::RpcFailedException const &) {
spdlog::error("RPC error occurred while sending failover RPC!"); spdlog::error("RPC error occurred while sending PromoteReplicaToMainRpc!");
} }
return false; return false;
} }
@ -101,5 +103,19 @@ auto CoordinatorClient::DemoteToReplica() const -> bool {
return false; return false;
} }
auto CoordinatorClient::SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool {
try {
auto stream{rpc_client_.Stream<replication_coordination_glue::SwapMainUUIDRpc>(uuid)};
if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC swapping of uuid response!");
return false;
}
return true;
} catch (const rpc::RpcFailedException &) {
spdlog::error("RPC error occurred while sending swapping uuid RPC!");
}
return false;
}
} // namespace memgraph::coordination } // namespace memgraph::coordination
#endif #endif

View File

@ -11,6 +11,7 @@
#include "coordination/coordinator_instance.hpp" #include "coordination/coordinator_instance.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp" #include "coordination/register_main_replica_coordinator_status.hpp"
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include "coordination/coordinator_data.hpp" #include "coordination/coordinator_data.hpp"
@ -32,60 +33,94 @@ CoordinatorData::CoordinatorData() {
return *instance; return *instance;
}; };
replica_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { replica_succ_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_}; auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing replica successful callback", instance_name); spdlog::trace("Instance {} performing replica successful callback", instance_name);
find_instance(coord_data, instance_name).OnSuccessPing(); auto &instance = find_instance(coord_data, instance_name);
if (!instance.GetMainUUID().has_value() || main_uuid_ != instance.GetMainUUID().value()) {
if (!instance.SendSwapAndUpdateUUID(main_uuid_)) {
spdlog::error(
fmt::format("Failed to swap uuid for replica instance {} which is alive", instance.InstanceName()));
return;
}
}
instance.OnSuccessPing();
}; };
replica_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { replica_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_}; auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing replica failure callback", instance_name); spdlog::trace("Instance {} performing replica failure callback", instance_name);
find_instance(coord_data, instance_name).OnFailPing(); auto &instance = find_instance(coord_data, instance_name);
instance.OnFailPing();
// We need to restart main uuid from instance since it was "down" at least a second
// There is slight delay, if we choose to use isAlive, instance can be down and back up in less than
// our isAlive time difference, which would lead to instance setting UUID to nullopt and stopping accepting any
// incoming RPCs from valid main
// TODO(antoniofilipovic) this needs here more complex logic
// We need to get id of main replica is listening to on successful ping
// and swap it to correct uuid if it failed
instance.SetNewMainUUID();
}; };
main_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { main_succ_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_}; auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing main successful callback", instance_name); spdlog::trace("Instance {} performing main successful callback", instance_name);
auto &instance = find_instance(coord_data, instance_name); auto &instance = find_instance(coord_data, instance_name);
if (instance.IsAlive() || !coord_data->ClusterHasAliveMain_()) { const auto &instance_uuid = instance.GetMainUUID();
MG_ASSERT(instance_uuid.has_value(), "Instance must have uuid set");
if (main_uuid_ == instance_uuid.value()) {
instance.OnSuccessPing(); instance.OnSuccessPing();
return; return;
} }
// TODO(antoniof) make demoteToReplica idempotent since main can be demoted to replica but
// swapUUID can fail
bool const demoted = instance.DemoteToReplica(coord_data->replica_succ_cb_, coord_data->replica_fail_cb_); bool const demoted = instance.DemoteToReplica(coord_data->replica_succ_cb_, coord_data->replica_fail_cb_);
if (demoted) { if (demoted) {
instance.OnSuccessPing(); instance.OnSuccessPing();
spdlog::info("Instance {} demoted to replica", instance_name); spdlog::info("Instance {} demoted to replica", instance_name);
} else { } else {
spdlog::error("Instance {} failed to become replica", instance_name); spdlog::error("Instance {} failed to become replica", instance_name);
return;
}
if (!instance.SendSwapAndUpdateUUID(main_uuid_)) {
spdlog::error(fmt::format("Failed to swap uuid for demoted main instance {}", instance.InstanceName()));
return;
} }
}; };
main_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { main_fail_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_}; auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing main failure callback", instance_name); spdlog::trace("Instance {} performing main failure callback", instance_name);
find_instance(coord_data, instance_name).OnFailPing(); auto &instance = find_instance(coord_data, instance_name);
instance.OnFailPing();
const auto &instance_uuid = instance.GetMainUUID();
MG_ASSERT(instance_uuid.has_value(), "Instance must have uuid set");
if (!coord_data->ClusterHasAliveMain_()) { if (!instance.IsAlive() && main_uuid_ == instance_uuid.value()) {
spdlog::info("Cluster without main instance, trying automatic failover"); spdlog::info("Cluster without main instance, trying automatic failover");
coord_data->TryFailover(); coord_data->TryFailover();
} }
}; };
} }
auto CoordinatorData::ClusterHasAliveMain_() const -> bool {
auto const alive_main = [](CoordinatorInstance const &instance) { return instance.IsMain() && instance.IsAlive(); };
return std::ranges::any_of(registered_instances_, alive_main);
}
auto CoordinatorData::TryFailover() -> void { auto CoordinatorData::TryFailover() -> void {
auto replica_instances = registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica); std::vector<CoordinatorInstance *> alive_registered_replica_instances{};
std::ranges::transform(registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica) |
ranges::views::filter(&CoordinatorInstance::IsAlive),
std::back_inserter(alive_registered_replica_instances),
[](CoordinatorInstance &instance) { return &instance; });
auto chosen_replica_instance = std::ranges::find_if(replica_instances, &CoordinatorInstance::IsAlive); // TODO(antoniof) more complex logic of choosing replica instance
if (chosen_replica_instance == replica_instances.end()) { CoordinatorInstance *chosen_replica_instance =
!alive_registered_replica_instances.empty() ? alive_registered_replica_instances[0] : nullptr;
if (nullptr == chosen_replica_instance) {
spdlog::warn("Failover failed since all replicas are down!"); spdlog::warn("Failover failed since all replicas are down!");
return; return;
} }
@ -93,21 +128,39 @@ auto CoordinatorData::TryFailover() -> void {
chosen_replica_instance->PauseFrequentCheck(); chosen_replica_instance->PauseFrequentCheck();
utils::OnScopeExit scope_exit{[&chosen_replica_instance] { chosen_replica_instance->ResumeFrequentCheck(); }}; utils::OnScopeExit scope_exit{[&chosen_replica_instance] { chosen_replica_instance->ResumeFrequentCheck(); }};
std::vector<ReplClientInfo> repl_clients_info; utils::UUID potential_new_main_uuid = utils::UUID{};
repl_clients_info.reserve(std::ranges::distance(replica_instances)); spdlog::trace("Generated potential new main uuid");
auto const not_chosen_replica_instance = [&chosen_replica_instance](CoordinatorInstance const &instance) { auto not_chosen_instance = [chosen_replica_instance](auto *instance) {
return instance != *chosen_replica_instance; return *instance != *chosen_replica_instance;
}; };
// If for some replicas swap fails, for others on successful ping we will revert back on next change
// or we will do failover first again and then it will be consistent again
for (auto *other_replica_instance : alive_registered_replica_instances | ranges::views::filter(not_chosen_instance)) {
if (!other_replica_instance->SendSwapAndUpdateUUID(potential_new_main_uuid)) {
spdlog::error(fmt::format("Failed to swap uuid for instance {} which is alive, aborting failover",
other_replica_instance->InstanceName()));
return;
}
}
std::ranges::transform(registered_instances_ | ranges::views::filter(not_chosen_replica_instance), std::vector<ReplClientInfo> repl_clients_info;
repl_clients_info.reserve(registered_instances_.size() - 1);
std::ranges::transform(registered_instances_ | ranges::views::filter([chosen_replica_instance](const auto &instance) {
return *chosen_replica_instance != instance;
}),
std::back_inserter(repl_clients_info), std::back_inserter(repl_clients_info),
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); }); [](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
if (!chosen_replica_instance->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) { if (!chosen_replica_instance->PromoteToMain(potential_new_main_uuid, std::move(repl_clients_info), main_succ_cb_,
main_fail_cb_)) {
spdlog::warn("Failover failed since promoting replica to main failed!"); spdlog::warn("Failover failed since promoting replica to main failed!");
return; return;
} }
chosen_replica_instance->SetNewMainUUID(potential_new_main_uuid);
main_uuid_ = potential_new_main_uuid;
spdlog::info("Failover successful! Instance {} promoted to main.", chosen_replica_instance->InstanceName()); spdlog::info("Failover successful! Instance {} promoted to main.", chosen_replica_instance->InstanceName());
} }
@ -160,14 +213,28 @@ auto CoordinatorData::SetInstanceToMain(std::string instance_name) -> SetInstanc
auto const is_not_new_main = [&instance_name](CoordinatorInstance const &instance) { auto const is_not_new_main = [&instance_name](CoordinatorInstance const &instance) {
return instance.InstanceName() != instance_name; return instance.InstanceName() != instance_name;
}; };
auto potential_new_main_uuid = utils::UUID{};
spdlog::trace("Generated potential new main uuid");
for (auto &other_instance : registered_instances_ | ranges::views::filter(is_not_new_main)) {
if (!other_instance.SendSwapAndUpdateUUID(potential_new_main_uuid)) {
spdlog::error(
fmt::format("Failed to swap uuid for instance {}, aborting failover", other_instance.InstanceName()));
return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED;
}
}
std::ranges::transform(registered_instances_ | ranges::views::filter(is_not_new_main), std::ranges::transform(registered_instances_ | ranges::views::filter(is_not_new_main),
std::back_inserter(repl_clients_info), std::back_inserter(repl_clients_info),
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); }); [](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
if (!new_main->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) { if (!new_main->PromoteToMain(potential_new_main_uuid, std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN; return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
} }
new_main->SetNewMainUUID(potential_new_main_uuid);
main_uuid_ = potential_new_main_uuid;
spdlog::info("Instance {} promoted to main", instance_name); spdlog::info("Instance {} promoted to main", instance_name);
return SetInstanceToMainCoordinatorStatus::SUCCESS; return SetInstanceToMainCoordinatorStatus::SUCCESS;
} }

View File

@ -16,6 +16,7 @@
#include "coordination/coordinator_rpc.hpp" #include "coordination/coordinator_rpc.hpp"
#include "coordination/include/coordination/coordinator_server.hpp" #include "coordination/include/coordination/coordinator_server.hpp"
#include "replication/state.hpp"
namespace memgraph::dbms { namespace memgraph::dbms {
@ -32,6 +33,29 @@ void CoordinatorHandlers::Register(memgraph::coordination::CoordinatorServer &se
spdlog::info("Received DemoteMainToReplicaRpc from coordinator server"); spdlog::info("Received DemoteMainToReplicaRpc from coordinator server");
CoordinatorHandlers::DemoteMainToReplicaHandler(replication_handler, req_reader, res_builder); CoordinatorHandlers::DemoteMainToReplicaHandler(replication_handler, req_reader, res_builder);
}); });
server.Register<replication_coordination_glue::SwapMainUUIDRpc>(
[&replication_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void {
spdlog::info("Received SwapMainUUIDRPC on coordinator server");
CoordinatorHandlers::SwapMainUUIDHandler(replication_handler, req_reader, res_builder);
});
}
void CoordinatorHandlers::SwapMainUUIDHandler(replication::ReplicationHandler &replication_handler,
slk::Reader *req_reader, slk::Builder *res_builder) {
if (!replication_handler.IsReplica()) {
spdlog::error("Setting main uuid must be performed on replica.");
slk::Save(replication_coordination_glue::SwapMainUUIDRes{false}, res_builder);
return;
}
replication_coordination_glue::SwapMainUUIDReq req;
slk::Load(&req, req_reader);
spdlog::info(fmt::format("Set replica data UUID to main uuid {}", std::string(req.uuid)));
std::get<memgraph::replication::RoleReplicaData>(replication_handler.GetReplState().ReplicationData()).uuid_ =
req.uuid;
slk::Save(replication_coordination_glue::SwapMainUUIDRes{true}, res_builder);
} }
void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler, void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler,
@ -51,7 +75,7 @@ void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHan
.ip_address = req.replication_client_info.replication_ip_address, .ip_address = req.replication_client_info.replication_ip_address,
.port = req.replication_client_info.replication_port}; .port = req.replication_client_info.replication_port};
if (!replication_handler.SetReplicationRoleReplica(clients_config)) { if (!replication_handler.SetReplicationRoleReplica(clients_config, std::nullopt)) {
spdlog::error("Demoting main to replica failed!"); spdlog::error("Demoting main to replica failed!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return; return;
@ -67,18 +91,17 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return; return;
} }
coordination::PromoteReplicaToMainReq req;
slk::Load(&req, req_reader);
// This can fail because of disk. If it does, the cluster state could get inconsistent. // This can fail because of disk. If it does, the cluster state could get inconsistent.
// We don't handle disk issues. // We don't handle disk issues.
if (!replication_handler.DoReplicaToMainPromotion()) { if (const bool success = replication_handler.DoReplicaToMainPromotion(req.main_uuid_); !success) {
spdlog::error("Promoting replica to main failed!"); spdlog::error("Promoting replica to main failed!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return; return;
} }
coordination::PromoteReplicaToMainReq req;
slk::Load(&req, req_reader);
auto const converter = [](const auto &repl_info_config) { auto const converter = [](const auto &repl_info_config) {
return replication::ReplicationClientConfig{ return replication::ReplicationClientConfig{
.name = repl_info_config.instance_name, .name = repl_info_config.instance_name,
@ -90,7 +113,7 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
// registering replicas // registering replicas
for (auto const &config : req.replication_clients_info | ranges::views::transform(converter)) { for (auto const &config : req.replication_clients_info | ranges::views::transform(converter)) {
auto instance_client = replication_handler.RegisterReplica(config); auto instance_client = replication_handler.RegisterReplica(config, false);
if (instance_client.HasError()) { if (instance_client.HasError()) {
using enum memgraph::replication::RegisterReplicaError; using enum memgraph::replication::RegisterReplicaError;
switch (instance_client.GetError()) { switch (instance_client.GetError()) {
@ -109,13 +132,17 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
spdlog::error("Registered replica could not be persisted!"); spdlog::error("Registered replica could not be persisted!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return; return;
case memgraph::query::RegisterReplicaError::ERROR_ACCEPTING_MAIN:
spdlog::error("Replica didn't accept change of main!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
case memgraph::query::RegisterReplicaError::CONNECTION_FAILED: case memgraph::query::RegisterReplicaError::CONNECTION_FAILED:
// Connection failure is not a fatal error // Connection failure is not a fatal error
break; break;
} }
} }
} }
spdlog::error(fmt::format("FICO : Promote replica to main was success {}", std::string(req.main_uuid_)));
slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder); slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder);
} }

View File

@ -49,9 +49,9 @@ auto CoordinatorInstance::IsMain() const -> bool {
return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN; return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN;
} }
auto CoordinatorInstance::PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb, auto CoordinatorInstance::PromoteToMain(utils::UUID uuid, ReplicationClientsInfo repl_clients_info,
HealthCheckCallback main_fail_cb) -> bool { HealthCheckCallback main_succ_cb, HealthCheckCallback main_fail_cb) -> bool {
if (!client_.SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) { if (!client_.SendPromoteReplicaToMainRpc(uuid, std::move(repl_clients_info))) {
return false; return false;
} }
@ -80,5 +80,17 @@ auto CoordinatorInstance::ReplicationClientInfo() const -> CoordinatorClientConf
return client_.ReplicationClientInfo(); return client_.ReplicationClientInfo();
} }
auto CoordinatorInstance::GetClient() -> CoordinatorClient & { return client_; }
void CoordinatorInstance::SetNewMainUUID(const std::optional<utils::UUID> &main_uuid) { main_uuid_ = main_uuid; }
auto CoordinatorInstance::GetMainUUID() -> const std::optional<utils::UUID> & { return main_uuid_; }
auto CoordinatorInstance::SendSwapAndUpdateUUID(const utils::UUID &main_uuid) -> bool {
if (!replication_coordination_glue::SendSwapMainUUIDRpc(client_.RpcClient(), main_uuid)) {
return false;
}
SetNewMainUUID(main_uuid_);
return true;
}
} // namespace memgraph::coordination } // namespace memgraph::coordination
#endif #endif

View File

@ -77,10 +77,12 @@ void Load(memgraph::coordination::PromoteReplicaToMainRes *self, memgraph::slk::
} }
void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid_, builder);
memgraph::slk::Save(self.replication_clients_info, builder); memgraph::slk::Save(self.replication_clients_info, builder);
} }
void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid_, reader);
memgraph::slk::Load(&self->replication_clients_info, reader); memgraph::slk::Load(&self->replication_clients_info, reader);
} }

View File

@ -12,7 +12,7 @@
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include "coordination/coordinator_server.hpp" #include "coordination/coordinator_server.hpp"
#include "replication_coordination_glue/messages.hpp" #include "replication_coordination_glue/handler.hpp"
namespace memgraph::coordination { namespace memgraph::coordination {

View File

@ -11,6 +11,7 @@
#pragma once #pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp" #include "coordination/coordinator_config.hpp"
@ -44,13 +45,20 @@ class CoordinatorClient {
auto InstanceName() const -> std::string; auto InstanceName() const -> std::string;
auto SocketAddress() const -> std::string; auto SocketAddress() const -> std::string;
[[nodiscard]] auto SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool;
[[nodiscard]] auto DemoteToReplica() const -> bool; [[nodiscard]] auto DemoteToReplica() const -> bool;
auto SendPromoteReplicaToMainRpc(const utils::UUID &uuid, ReplicationClientsInfo replication_clients_info) const
-> bool;
auto SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool;
auto ReplicationClientInfo() const -> ReplClientInfo; auto ReplicationClientInfo() const -> ReplClientInfo;
auto SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void; auto SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void;
auto RpcClient() -> rpc::Client & { return rpc_client_; }
friend bool operator==(CoordinatorClient const &first, CoordinatorClient const &second) { friend bool operator==(CoordinatorClient const &first, CoordinatorClient const &second) {
return first.config_ == second.config_; return first.config_ == second.config_;
} }

View File

@ -11,17 +11,18 @@
#pragma once #pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include <list>
#include "coordination/coordinator_instance.hpp" #include "coordination/coordinator_instance.hpp"
#include "coordination/coordinator_instance_status.hpp" #include "coordination/coordinator_instance_status.hpp"
#include "coordination/coordinator_server.hpp" #include "coordination/coordinator_server.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp" #include "coordination/register_main_replica_coordinator_status.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "utils/rw_lock.hpp" #include "utils/rw_lock.hpp"
#include "utils/thread_pool.hpp" #include "utils/thread_pool.hpp"
#include <list>
namespace memgraph::coordination { namespace memgraph::coordination {
class CoordinatorData { class CoordinatorData {
public: public:
@ -36,12 +37,11 @@ class CoordinatorData {
auto ShowInstances() const -> std::vector<CoordinatorInstanceStatus>; auto ShowInstances() const -> std::vector<CoordinatorInstanceStatus>;
private: private:
auto ClusterHasAliveMain_() const -> bool;
mutable utils::RWLock coord_data_lock_{utils::RWLock::Priority::READ}; mutable utils::RWLock coord_data_lock_{utils::RWLock::Priority::READ};
HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_; HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_;
// NOTE: Must be std::list because we rely on pointer stability // NOTE: Must be std::list because we rely on pointer stability
std::list<CoordinatorInstance> registered_instances_; std::list<CoordinatorInstance> registered_instances_;
utils::UUID main_uuid_;
}; };
struct CoordinatorMainReplicaData { struct CoordinatorMainReplicaData {

View File

@ -31,6 +31,8 @@ class CoordinatorHandlers {
slk::Builder *res_builder); slk::Builder *res_builder);
static void DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader, static void DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader,
slk::Builder *res_builder); slk::Builder *res_builder);
static void SwapMainUUIDHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader, slk::Builder *res_builder);
}; };
} // namespace memgraph::dbms } // namespace memgraph::dbms

View File

@ -16,6 +16,7 @@
#include "coordination/coordinator_client.hpp" #include "coordination/coordinator_client.hpp"
#include "coordination/coordinator_cluster_config.hpp" #include "coordination/coordinator_cluster_config.hpp"
#include "coordination/coordinator_exceptions.hpp" #include "coordination/coordinator_exceptions.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "replication_coordination_glue/role.hpp" #include "replication_coordination_glue/role.hpp"
namespace memgraph::coordination { namespace memgraph::coordination {
@ -44,7 +45,7 @@ class CoordinatorInstance {
auto IsReplica() const -> bool; auto IsReplica() const -> bool;
auto IsMain() const -> bool; auto IsMain() const -> bool;
auto PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb, auto PromoteToMain(utils::UUID main_uuid, ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
HealthCheckCallback main_fail_cb) -> bool; HealthCheckCallback main_fail_cb) -> bool;
auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool; auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool;
@ -53,11 +54,25 @@ class CoordinatorInstance {
auto ReplicationClientInfo() const -> ReplClientInfo; auto ReplicationClientInfo() const -> ReplClientInfo;
auto GetClient() -> CoordinatorClient &;
void SetNewMainUUID(const std::optional<utils::UUID> &main_uuid = std::nullopt);
auto GetMainUUID() -> const std::optional<utils::UUID> &;
auto SendSwapAndUpdateUUID(const utils::UUID &main_uuid) -> bool;
private: private:
CoordinatorClient client_; CoordinatorClient client_;
replication_coordination_glue::ReplicationRole replication_role_; replication_coordination_glue::ReplicationRole replication_role_;
std::chrono::system_clock::time_point last_response_time_{}; std::chrono::system_clock::time_point last_response_time_{};
// TODO this needs to be atomic? What if instance is alive and then we read it and it has changed
bool is_alive_{false}; bool is_alive_{false};
// for replica this is main uuid of current main
// for "main" main this same as in CoordinatorData
// it is set to nullopt when replica is down
// TLDR; when replica is down and comes back up we reset uuid of main replica is listening to
// so we need to send swap uuid again
std::optional<utils::UUID> main_uuid_;
friend bool operator==(CoordinatorInstance const &first, CoordinatorInstance const &second) { friend bool operator==(CoordinatorInstance const &first, CoordinatorInstance const &second) {
return first.client_ == second.client_ && first.replication_role_ == second.replication_role_; return first.client_ == second.client_ && first.replication_role_ == second.replication_role_;

View File

@ -11,6 +11,7 @@
#pragma once #pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp" #include "coordination/coordinator_config.hpp"
@ -26,10 +27,13 @@ struct PromoteReplicaToMainReq {
static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader); static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder); static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder);
explicit PromoteReplicaToMainReq(std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info) explicit PromoteReplicaToMainReq(const utils::UUID &uuid,
: replication_clients_info(std::move(replication_clients_info)) {} std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info)
: main_uuid_(uuid), replication_clients_info(std::move(replication_clients_info)) {}
PromoteReplicaToMainReq() = default; PromoteReplicaToMainReq() = default;
// get uuid here
utils::UUID main_uuid_;
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info; std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info;
}; };
@ -83,22 +87,19 @@ using DemoteMainToReplicaRpc = rpc::RequestResponse<DemoteMainToReplicaReq, Demo
// SLK serialization declarations // SLK serialization declarations
namespace memgraph::slk { namespace memgraph::slk {
// PromoteReplicaToMainRpc
void Save(const memgraph::coordination::PromoteReplicaToMainRes &self, memgraph::slk::Builder *builder); void Save(const memgraph::coordination::PromoteReplicaToMainRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::PromoteReplicaToMainRes *self, memgraph::slk::Reader *reader); void Load(memgraph::coordination::PromoteReplicaToMainRes *self, memgraph::slk::Reader *reader);
void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder); void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader); void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
// DemoteMainToReplicaRpc
void Save(const memgraph::coordination::DemoteMainToReplicaRes &self, memgraph::slk::Builder *builder); void Save(const memgraph::coordination::DemoteMainToReplicaRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::DemoteMainToReplicaRes *self, memgraph::slk::Reader *reader); void Load(memgraph::coordination::DemoteMainToReplicaRes *self, memgraph::slk::Reader *reader);
void Save(const memgraph::coordination::DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder); void Save(const memgraph::coordination::DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader); void Load(memgraph::coordination::DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader);
} // namespace memgraph::slk } // namespace memgraph::slk
#endif #endif

View File

@ -30,6 +30,7 @@ enum class SetInstanceToMainCoordinatorStatus : uint8_t {
NOT_COORDINATOR, NOT_COORDINATOR,
SUCCESS, SUCCESS,
COULD_NOT_PROMOTE_TO_MAIN, COULD_NOT_PROMOTE_TO_MAIN,
SWAP_UUID_FAILED
}; };
} // namespace memgraph::coordination } // namespace memgraph::coordination

View File

@ -38,6 +38,8 @@ std::string RegisterReplicaErrorToString(query::RegisterReplicaError error) {
return "CONNECTION_FAILED"; return "CONNECTION_FAILED";
case COULD_NOT_BE_PERSISTED: case COULD_NOT_BE_PERSISTED:
return "COULD_NOT_BE_PERSISTED"; return "COULD_NOT_BE_PERSISTED";
case ERROR_ACCEPTING_MAIN:
return "ERROR_ACCEPTING_MAIN";
} }
} }
@ -52,7 +54,7 @@ void RestoreReplication(replication::RoleMainData &mainData, DatabaseAccess db_a
spdlog::info("Replica {} restoration started for {}.", instance_client.name_, db_acc->name()); spdlog::info("Replica {} restoration started for {}.", instance_client.name_, db_acc->name());
const auto &ret = db_acc->storage()->repl_storage_state_.replication_clients_.WithLock( const auto &ret = db_acc->storage()->repl_storage_state_.replication_clients_.WithLock(
[&, db_acc](auto &storage_clients) mutable -> utils::BasicResult<query::RegisterReplicaError> { [&, db_acc](auto &storage_clients) mutable -> utils::BasicResult<query::RegisterReplicaError> {
auto client = std::make_unique<storage::ReplicationStorageClient>(instance_client); auto client = std::make_unique<storage::ReplicationStorageClient>(instance_client, mainData.uuid_);
auto *storage = db_acc->storage(); auto *storage = db_acc->storage();
client->Start(storage, std::move(db_acc)); client->Start(storage, std::move(db_acc));
// After start the storage <-> replica state should be READY or RECOVERING (if correctly started) // After start the storage <-> replica state should be READY or RECOVERING (if correctly started)
@ -239,14 +241,16 @@ struct DropDatabase : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during DBMS execution */ void DoDurability() override { /* Done during DBMS execution */
} }
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch, bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override { memgraph::system::Transaction const &txn) const override {
auto check_response = [](const storage::replication::DropDatabaseRes &response) { auto check_response = [](const storage::replication::DropDatabaseRes &response) {
return response.result != storage::replication::DropDatabaseRes::Result::FAILURE; return response.result != storage::replication::DropDatabaseRes::Result::FAILURE;
}; };
return client.SteamAndFinalizeDelta<storage::replication::DropDatabaseRpc>( return client.SteamAndFinalizeDelta<storage::replication::DropDatabaseRpc>(
check_response, epoch.id(), txn.last_committed_system_timestamp(), txn.timestamp(), uuid_); check_response, main_uuid, std::string(epoch.id()), txn.last_committed_system_timestamp(), txn.timestamp(),
uuid_);
} }
void PostReplication(replication::RoleMainData &mainData) const override {} void PostReplication(replication::RoleMainData &mainData) const override {}
@ -323,14 +327,16 @@ struct CreateDatabase : memgraph::system::ISystemAction {
// Done during dbms execution // Done during dbms execution
} }
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch, bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override { memgraph::system::Transaction const &txn) const override {
auto check_response = [](const storage::replication::CreateDatabaseRes &response) { auto check_response = [](const storage::replication::CreateDatabaseRes &response) {
return response.result != storage::replication::CreateDatabaseRes::Result::FAILURE; return response.result != storage::replication::CreateDatabaseRes::Result::FAILURE;
}; };
return client.SteamAndFinalizeDelta<storage::replication::CreateDatabaseRpc>( return client.SteamAndFinalizeDelta<storage::replication::CreateDatabaseRpc>(
check_response, epoch.id(), txn.last_committed_system_timestamp(), txn.timestamp(), config_); check_response, main_uuid, std::string(epoch.id()), txn.last_committed_system_timestamp(), txn.timestamp(),
config_);
} }
void PostReplication(replication::RoleMainData &mainData) const override { void PostReplication(replication::RoleMainData &mainData) const override {

View File

@ -29,6 +29,7 @@
#include "kvstore/kvstore.hpp" #include "kvstore/kvstore.hpp"
#include "license/license.hpp" #include "license/license.hpp"
#include "replication/replication_client.hpp" #include "replication/replication_client.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "storage/v2/config.hpp" #include "storage/v2/config.hpp"
#include "storage/v2/transaction.hpp" #include "storage/v2/transaction.hpp"
#include "system/system.hpp" #include "system/system.hpp"
@ -261,6 +262,16 @@ class DbmsHandler {
#endif #endif
} }
replication::ReplicationState &ReplicationState() { return repl_state_; }
replication::ReplicationState const &ReplicationState() const { return repl_state_; }
bool IsMain() const { return repl_state_.IsMain(); }
bool IsReplica() const { return repl_state_.IsReplica(); }
#ifdef MG_ENTERPRISE
// coordination::CoordinatorState &CoordinatorState() { return coordinator_state_; }
#endif
/** /**
* @brief Return the statistics all databases. * @brief Return the statistics all databases.
* *

View File

@ -76,47 +76,84 @@ std::optional<DatabaseAccess> GetDatabaseAccessor(dbms::DbmsHandler *dbms_handle
return std::nullopt; return std::nullopt;
} }
} }
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
} // namespace } // namespace
void InMemoryReplicationHandlers::Register(dbms::DbmsHandler *dbms_handler, replication::ReplicationServer &server) { void InMemoryReplicationHandlers::Register(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &data) {
server.rpc_server_.Register<storage::replication::HeartbeatRpc>([dbms_handler](auto *req_reader, auto *res_builder) { auto &server = *data.server;
server.rpc_server_.Register<storage::replication::HeartbeatRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received HeartbeatRpc"); spdlog::debug("Received HeartbeatRpc");
InMemoryReplicationHandlers::HeartbeatHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::HeartbeatHandler(dbms_handler, data.uuid_, req_reader, res_builder);
}); });
server.rpc_server_.Register<storage::replication::AppendDeltasRpc>( server.rpc_server_.Register<storage::replication::AppendDeltasRpc>(
[dbms_handler](auto *req_reader, auto *res_builder) { [&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received AppendDeltasRpc"); spdlog::debug("Received AppendDeltasRpc");
InMemoryReplicationHandlers::AppendDeltasHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::AppendDeltasHandler(dbms_handler, data.uuid_, req_reader, res_builder);
}); });
server.rpc_server_.Register<storage::replication::SnapshotRpc>([dbms_handler](auto *req_reader, auto *res_builder) { server.rpc_server_.Register<storage::replication::SnapshotRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SnapshotRpc"); spdlog::debug("Received SnapshotRpc");
InMemoryReplicationHandlers::SnapshotHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::SnapshotHandler(dbms_handler, data.uuid_, req_reader, res_builder);
}); });
server.rpc_server_.Register<storage::replication::WalFilesRpc>([dbms_handler](auto *req_reader, auto *res_builder) { server.rpc_server_.Register<storage::replication::WalFilesRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received WalFilesRpc"); spdlog::debug("Received WalFilesRpc");
InMemoryReplicationHandlers::WalFilesHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::WalFilesHandler(dbms_handler, data.uuid_, req_reader, res_builder);
}); });
server.rpc_server_.Register<storage::replication::CurrentWalRpc>([dbms_handler](auto *req_reader, auto *res_builder) { server.rpc_server_.Register<storage::replication::CurrentWalRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received CurrentWalRpc"); spdlog::debug("Received CurrentWalRpc");
InMemoryReplicationHandlers::CurrentWalHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::CurrentWalHandler(dbms_handler, data.uuid_, req_reader, res_builder);
}); });
server.rpc_server_.Register<storage::replication::TimestampRpc>([dbms_handler](auto *req_reader, auto *res_builder) { server.rpc_server_.Register<storage::replication::TimestampRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received TimestampRpc"); spdlog::debug("Received TimestampRpc");
InMemoryReplicationHandlers::TimestampHandler(dbms_handler, req_reader, res_builder); InMemoryReplicationHandlers::TimestampHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<replication_coordination_glue::SwapMainUUIDRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SwapMainUUIDHandler");
InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms_handler, data, req_reader, res_builder);
}); });
} }
void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { replication::RoleReplicaData &role_replica_data,
slk::Reader *req_reader, slk::Builder *res_builder) {
if (!dbms_handler->IsReplica()) {
spdlog::error("Setting main uuid must be performed on replica.");
slk::Save(replication_coordination_glue::SwapMainUUIDRes{false}, res_builder);
return;
}
replication_coordination_glue::SwapMainUUIDReq req;
slk::Load(&req, req_reader);
spdlog::info(fmt::format("Set replica data UUID to main uuid {}", std::string(req.uuid)));
dbms_handler->ReplicationState().TryPersistRoleReplica(role_replica_data.config, req.uuid);
role_replica_data.uuid_ = req.uuid;
slk::Save(replication_coordination_glue::SwapMainUUIDRes{true}, res_builder);
}
void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::HeartbeatReq req; storage::replication::HeartbeatReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
if (!db_acc) {
if (!current_main_uuid.has_value() || req.main_uuid != *current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::HeartbeatReq::kType.name);
storage::replication::HeartbeatRes res{false, 0, ""}; storage::replication::HeartbeatRes res{false, 0, ""};
slk::Save(res, res_builder); slk::Save(res, res_builder);
return; return;
} }
// TODO: this handler is agnostic of InMemory, move to be reused by on-disk // TODO: this handler is agnostic of InMemory, move to be reused by on-disk
auto const *storage = db_acc->get()->storage(); auto const *storage = db_acc->get()->storage();
storage::replication::HeartbeatRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load(), storage::replication::HeartbeatRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load(),
@ -124,10 +161,19 @@ void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handl
slk::Save(res, res_builder); slk::Save(res, res_builder);
} }
void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::AppendDeltasReq req; storage::replication::AppendDeltasReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::AppendDeltasReq::kType.name);
storage::replication::AppendDeltasRes res{false, 0};
slk::Save(res, res_builder);
return;
}
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
if (!db_acc) { if (!db_acc) {
storage::replication::AppendDeltasRes res{false, 0}; storage::replication::AppendDeltasRes res{false, 0};
@ -187,8 +233,9 @@ void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_ha
spdlog::debug("Replication recovery from append deltas finished, replica is now up to date!"); spdlog::debug("Replication recovery from append deltas finished, replica is now up to date!");
} }
void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::SnapshotReq req; storage::replication::SnapshotReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -197,6 +244,12 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle
slk::Save(res, res_builder); slk::Save(res, res_builder);
return; return;
} }
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::SnapshotReq::kType.name);
storage::replication::SnapshotRes res{false, 0};
slk::Save(res, res_builder);
return;
}
storage::replication::Decoder decoder(req_reader); storage::replication::Decoder decoder(req_reader);
@ -270,8 +323,9 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle
spdlog::debug("Replication recovery from snapshot finished!"); spdlog::debug("Replication recovery from snapshot finished!");
} }
void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::WalFilesReq req; storage::replication::WalFilesReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -280,6 +334,12 @@ void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handle
slk::Save(res, res_builder); slk::Save(res, res_builder);
return; return;
} }
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::WalFilesReq::kType.name);
storage::replication::WalFilesRes res{false, 0};
slk::Save(res, res_builder);
return;
}
const auto wal_file_number = req.file_number; const auto wal_file_number = req.file_number;
spdlog::debug("Received WAL files: {}", wal_file_number); spdlog::debug("Received WAL files: {}", wal_file_number);
@ -298,8 +358,9 @@ void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handle
spdlog::debug("Replication recovery from WAL files ended successfully, replica is now up to date!"); spdlog::debug("Replication recovery from WAL files ended successfully, replica is now up to date!");
} }
void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::CurrentWalReq req; storage::replication::CurrentWalReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -309,6 +370,13 @@ void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_hand
return; return;
} }
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::CurrentWalReq::kType.name);
storage::replication::CurrentWalRes res{false, 0};
slk::Save(res, res_builder);
return;
}
storage::replication::Decoder decoder(req_reader); storage::replication::Decoder decoder(req_reader);
auto *storage = static_cast<storage::InMemoryStorage *>(db_acc->get()->storage()); auto *storage = static_cast<storage::InMemoryStorage *>(db_acc->get()->storage());
@ -370,8 +438,9 @@ void InMemoryReplicationHandlers::LoadWal(storage::InMemoryStorage *storage, sto
} }
} }
void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handler,
slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::TimestampReq req; storage::replication::TimestampReq req;
slk::Load(&req, req_reader); slk::Load(&req, req_reader);
auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid); auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -381,12 +450,20 @@ void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handl
return; return;
} }
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::TimestampReq::kType.name);
storage::replication::CurrentWalRes res{false, 0};
slk::Save(res, res_builder);
return;
}
// TODO: this handler is agnostic of InMemory, move to be reused by on-disk // TODO: this handler is agnostic of InMemory, move to be reused by on-disk
auto const *storage = db_acc->get()->storage(); auto const *storage = db_acc->get()->storage();
storage::replication::TimestampRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load()}; storage::replication::TimestampRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load()};
slk::Save(res, res_builder); slk::Save(res, res_builder);
} }
/////// AF how does this work, does it get all deltas at once or what?
uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage *storage, uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage *storage,
storage::durability::BaseDecoder *decoder, storage::durability::BaseDecoder *decoder,
const uint64_t version) { const uint64_t version) {

View File

@ -12,6 +12,7 @@
#pragma once #pragma once
#include "replication/replication_server.hpp" #include "replication/replication_server.hpp"
#include "replication/state.hpp"
#include "storage/v2/replication/serialization.hpp" #include "storage/v2/replication/serialization.hpp"
namespace memgraph::storage { namespace memgraph::storage {
@ -23,21 +24,30 @@ class DbmsHandler;
class InMemoryReplicationHandlers { class InMemoryReplicationHandlers {
public: public:
static void Register(dbms::DbmsHandler *dbms_handler, replication::ReplicationServer &server); static void Register(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &data);
private: private:
// RPC handlers // RPC handlers
static void HeartbeatHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void HeartbeatHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void SnapshotHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void SnapshotHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void WalFilesHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void WalFilesHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void CurrentWalHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void CurrentWalHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void TimestampHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); static void TimestampHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &role_replica_data,
slk::Reader *req_reader, slk::Builder *res_builder);
static void LoadWal(storage::InMemoryStorage *storage, storage::replication::Decoder *decoder); static void LoadWal(storage::InMemoryStorage *storage, storage::replication::Decoder *decoder);

View File

View File

View File

View File

@ -21,7 +21,8 @@ namespace memgraph::dbms {
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder) { const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::storage::replication::CreateDatabaseRes; using memgraph::storage::replication::CreateDatabaseRes;
CreateDatabaseRes res(CreateDatabaseRes::Result::FAILURE); CreateDatabaseRes res(CreateDatabaseRes::Result::FAILURE);
@ -35,6 +36,12 @@ void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::storage::replication::CreateDatabaseReq req; memgraph::storage::replication::CreateDatabaseReq req;
memgraph::slk::Load(&req, req_reader); memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, memgraph::storage::replication::CreateDatabaseReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot // Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change. // of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with // If MAIN has changed we need to check this new group_timestamp is consistent with
@ -63,7 +70,8 @@ void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::slk::Save(res, res_builder); memgraph::slk::Save(res, res_builder);
} }
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, DbmsHandler &dbms_handler, void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder) { slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::storage::replication::DropDatabaseRes; using memgraph::storage::replication::DropDatabaseRes;
DropDatabaseRes res(DropDatabaseRes::Result::FAILURE); DropDatabaseRes res(DropDatabaseRes::Result::FAILURE);
@ -78,6 +86,12 @@ void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_s
memgraph::storage::replication::DropDatabaseReq req; memgraph::storage::replication::DropDatabaseReq req;
memgraph::slk::Load(&req, req_reader); memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, memgraph::storage::replication::DropDatabaseReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot // Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change. // of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with // If MAIN has changed we need to check this new group_timestamp is consistent with
@ -177,14 +191,14 @@ void Register(replication::RoleReplicaData const &data, system::ReplicaHandlerAc
dbms::DbmsHandler &dbms_handler) { dbms::DbmsHandler &dbms_handler) {
// NOTE: Register even without license as the user could add a license at run-time // NOTE: Register even without license as the user could add a license at run-time
data.server->rpc_server_.Register<storage::replication::CreateDatabaseRpc>( data.server->rpc_server_.Register<storage::replication::CreateDatabaseRpc>(
[system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable { [&data, system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received CreateDatabaseRpc"); spdlog::debug("Received CreateDatabaseRpc");
CreateDatabaseHandler(system_state_access, dbms_handler, req_reader, res_builder); CreateDatabaseHandler(system_state_access, data.uuid_, dbms_handler, req_reader, res_builder);
}); });
data.server->rpc_server_.Register<storage::replication::DropDatabaseRpc>( data.server->rpc_server_.Register<storage::replication::DropDatabaseRpc>(
[system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable { [&data, system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received DropDatabaseRpc"); spdlog::debug("Received DropDatabaseRpc");
DropDatabaseHandler(system_state_access, dbms_handler, req_reader, res_builder); DropDatabaseHandler(system_state_access, data.uuid_, dbms_handler, req_reader, res_builder);
}); });
} }
#endif #endif

View File

@ -17,11 +17,21 @@
#include "system/state.hpp" #include "system/state.hpp"
namespace memgraph::dbms { namespace memgraph::dbms {
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
inline void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
// RPC handlers // RPC handlers
void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder); slk::Reader *req_reader, slk::Builder *res_builder);
bool SystemRecoveryHandler(DbmsHandler &dbms_handler, const std::vector<storage::SalientConfig> &database_configs); bool SystemRecoveryHandler(DbmsHandler &dbms_handler, const std::vector<storage::SalientConfig> &database_configs);

View File

@ -29,13 +29,15 @@ struct CreateDatabaseReq {
static void Load(CreateDatabaseReq *self, memgraph::slk::Reader *reader); static void Load(CreateDatabaseReq *self, memgraph::slk::Reader *reader);
static void Save(const CreateDatabaseReq &self, memgraph::slk::Builder *builder); static void Save(const CreateDatabaseReq &self, memgraph::slk::Builder *builder);
CreateDatabaseReq() = default; CreateDatabaseReq() = default;
CreateDatabaseReq(std::string_view epoch_id, uint64_t expected_group_timestamp, uint64_t new_group_timestamp, CreateDatabaseReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_group_timestamp,
storage::SalientConfig config) uint64_t new_group_timestamp, storage::SalientConfig config)
: epoch_id(std::string(epoch_id)), : main_uuid(main_uuid),
epoch_id(std::move(epoch_id)),
expected_group_timestamp{expected_group_timestamp}, expected_group_timestamp{expected_group_timestamp},
new_group_timestamp(new_group_timestamp), new_group_timestamp(new_group_timestamp),
config(std::move(config)) {} config(std::move(config)) {}
utils::UUID main_uuid;
std::string epoch_id; std::string epoch_id;
uint64_t expected_group_timestamp; uint64_t expected_group_timestamp;
uint64_t new_group_timestamp; uint64_t new_group_timestamp;
@ -65,13 +67,15 @@ struct DropDatabaseReq {
static void Load(DropDatabaseReq *self, memgraph::slk::Reader *reader); static void Load(DropDatabaseReq *self, memgraph::slk::Reader *reader);
static void Save(const DropDatabaseReq &self, memgraph::slk::Builder *builder); static void Save(const DropDatabaseReq &self, memgraph::slk::Builder *builder);
DropDatabaseReq() = default; DropDatabaseReq() = default;
DropDatabaseReq(std::string_view epoch_id, uint64_t expected_group_timestamp, uint64_t new_group_timestamp, DropDatabaseReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_group_timestamp,
const utils::UUID &uuid) uint64_t new_group_timestamp, const utils::UUID &uuid)
: epoch_id(std::string(epoch_id)), : main_uuid(main_uuid),
epoch_id(std::move(epoch_id)),
expected_group_timestamp{expected_group_timestamp}, expected_group_timestamp{expected_group_timestamp},
new_group_timestamp(new_group_timestamp), new_group_timestamp(new_group_timestamp),
uuid(uuid) {} uuid(uuid) {}
utils::UUID main_uuid;
std::string epoch_id; std::string epoch_id;
uint64_t expected_group_timestamp; uint64_t expected_group_timestamp;
uint64_t new_group_timestamp; uint64_t new_group_timestamp;

0
src/dbms/utils.hpp Normal file
View File

View File

@ -327,7 +327,7 @@ class ReplQueryHandler {
.port = static_cast<uint16_t>(*port), .port = static_cast<uint16_t>(*port),
}; };
if (!handler_->SetReplicationRoleReplica(config)) { if (!handler_->SetReplicationRoleReplica(config, std::nullopt)) {
throw QueryRuntimeException("Couldn't set role to replica!"); throw QueryRuntimeException("Couldn't set role to replica!");
} }
} }
@ -368,7 +368,7 @@ class ReplQueryHandler {
.replica_check_frequency = replica_check_frequency, .replica_check_frequency = replica_check_frequency,
.ssl = std::nullopt}; .ssl = std::nullopt};
const auto error = handler_->TryRegisterReplica(replication_config).HasError(); const auto error = handler_->TryRegisterReplica(replication_config, true).HasError();
if (error) { if (error) {
throw QueryRuntimeException(fmt::format("Couldn't register replica '{}'!", name)); throw QueryRuntimeException(fmt::format("Couldn't register replica '{}'!", name));
@ -518,7 +518,9 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
throw QueryRuntimeException("SET INSTANCE TO MAIN query can only be run on a coordinator!"); throw QueryRuntimeException("SET INSTANCE TO MAIN query can only be run on a coordinator!");
case COULD_NOT_PROMOTE_TO_MAIN: case COULD_NOT_PROMOTE_TO_MAIN:
throw QueryRuntimeException( throw QueryRuntimeException(
"Couldn't set replica instance to main!. Check coordinator and replica for more logs"); "Couldn't set replica instance to main! Check coordinator and replica for more logs");
case SWAP_UUID_FAILED:
throw QueryRuntimeException("Couldn't set replica instance to main. Replicas didn't swap uuid of new main.");
case SUCCESS: case SUCCESS:
break; break;
} }

View File

@ -13,6 +13,7 @@
#include "replication_coordination_glue/role.hpp" #include "replication_coordination_glue/role.hpp"
#include "utils/result.hpp" #include "utils/result.hpp"
#include "utils/uuid.hpp"
// BEGIN fwd declares // BEGIN fwd declares
namespace memgraph::replication { namespace memgraph::replication {
@ -23,7 +24,13 @@ struct ReplicationClientConfig;
namespace memgraph::query { namespace memgraph::query {
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED }; enum class RegisterReplicaError : uint8_t {
NAME_EXISTS,
ENDPOINT_EXISTS,
CONNECTION_FAILED,
COULD_NOT_BE_PERSISTED,
ERROR_ACCEPTING_MAIN
};
enum class UnregisterReplicaResult : uint8_t { enum class UnregisterReplicaResult : uint8_t {
NOT_MAIN, NOT_MAIN,
COULD_NOT_BE_PERSISTED, COULD_NOT_BE_PERSISTED,
@ -39,13 +46,14 @@ struct ReplicationQueryHandler {
virtual bool SetReplicationRoleMain() = 0; virtual bool SetReplicationRoleMain() = 0;
// as MAIN, become REPLICA // as MAIN, become REPLICA
virtual bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) = 0; virtual bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) = 0;
// as MAIN, define and connect to REPLICAs // as MAIN, define and connect to REPLICAs
virtual auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config) virtual auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> utils::BasicResult<RegisterReplicaError> = 0; -> utils::BasicResult<RegisterReplicaError> = 0;
virtual auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config) virtual auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> utils::BasicResult<RegisterReplicaError> = 0; -> utils::BasicResult<RegisterReplicaError> = 0;
// as MAIN, remove a REPLICA connection // as MAIN, remove a REPLICA connection

View File

@ -21,6 +21,6 @@ target_include_directories(mg-replication PUBLIC include)
find_package(fmt REQUIRED) find_package(fmt REQUIRED)
target_link_libraries(mg-replication target_link_libraries(mg-replication
PUBLIC mg::utils mg::kvstore lib::json mg::rpc mg::slk mg::io mg::repl_coord_glue PUBLIC mg::utils mg::kvstore lib::json mg::rpc mg::slk mg::io mg::repl_coord_glue mg-flags
PRIVATE fmt::fmt PRIVATE fmt::fmt
) )

View File

@ -54,7 +54,7 @@ struct ReplicationClient {
} catch (const rpc::RpcFailedException &) { } catch (const rpc::RpcFailedException &) {
// Nothing to do...wait for a reconnect // Nothing to do...wait for a reconnect
// NOTE: Here we are communicating with the instance connection. // NOTE: Here we are communicating with the instance connection.
// We don't have access to the undelying client; so the only thing we can do it // We don't have access to the underlying client; so the only thing we can do it
// tell the callback that this is a reconnection and to check the state // tell the callback that this is a reconnection and to check the state
reconnect = true; reconnect = true;
} }
@ -106,6 +106,9 @@ struct ReplicationClient {
communication::ClientContext rpc_context_; communication::ClientContext rpc_context_;
rpc::Client rpc_client_; rpc::Client rpc_client_;
std::chrono::seconds replica_check_frequency_; std::chrono::seconds replica_check_frequency_;
// True only when we are migrating from V1 or V2 to V3 in replication durability
// and we want to set replica to listen to main
bool try_set_uuid{false};
// TODO: Better, this was the easiest place to put this // TODO: Better, this was the easiest place to put this
enum class State { enum class State {

View File

@ -21,10 +21,12 @@
#include "status.hpp" #include "status.hpp"
#include "utils/result.hpp" #include "utils/result.hpp"
#include "utils/synchronized.hpp" #include "utils/synchronized.hpp"
#include "utils/uuid.hpp"
#include <atomic> #include <atomic>
#include <cstdint> #include <cstdint>
#include <list> #include <list>
#include <optional>
#include <variant> #include <variant>
#include <vector> #include <vector>
@ -37,7 +39,11 @@ enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, COULD_
struct RoleMainData { struct RoleMainData {
RoleMainData() = default; RoleMainData() = default;
explicit RoleMainData(ReplicationEpoch e) : epoch_(std::move(e)) {} explicit RoleMainData(ReplicationEpoch e, std::optional<utils::UUID> uuid = std::nullopt) : epoch_(std::move(e)) {
if (uuid) {
uuid_ = *uuid;
}
}
~RoleMainData() = default; ~RoleMainData() = default;
RoleMainData(RoleMainData const &) = delete; RoleMainData(RoleMainData const &) = delete;
@ -47,11 +53,14 @@ struct RoleMainData {
ReplicationEpoch epoch_; ReplicationEpoch epoch_;
std::list<ReplicationClient> registered_replicas_{}; // TODO: data race issues std::list<ReplicationClient> registered_replicas_{}; // TODO: data race issues
utils::UUID uuid_;
}; };
struct RoleReplicaData { struct RoleReplicaData {
ReplicationServerConfig config; ReplicationServerConfig config;
std::unique_ptr<ReplicationServer> server; std::unique_ptr<ReplicationServer> server;
// uuid of main replica is listening to
std::optional<utils::UUID> uuid_;
}; };
// Global (instance) level object // Global (instance) level object
@ -83,18 +92,19 @@ struct ReplicationState {
bool HasDurability() const { return nullptr != durability_; } bool HasDurability() const { return nullptr != durability_; }
bool TryPersistRoleMain(std::string new_epoch); bool TryPersistRoleMain(std::string new_epoch, utils::UUID main_uuid);
bool TryPersistRoleReplica(const ReplicationServerConfig &config); bool TryPersistRoleReplica(const ReplicationServerConfig &config, const std::optional<utils::UUID> &main_uuid);
bool TryPersistUnregisterReplica(std::string_view name); bool TryPersistUnregisterReplica(std::string_view name);
bool TryPersistRegisteredReplica(const ReplicationClientConfig &config); bool TryPersistRegisteredReplica(const ReplicationClientConfig &config, utils::UUID main_uuid);
// TODO: locked access // TODO: locked access
auto ReplicationData() -> ReplicationData_t & { return replication_data_; } auto ReplicationData() -> ReplicationData_t & { return replication_data_; }
auto ReplicationData() const -> ReplicationData_t const & { return replication_data_; } auto ReplicationData() const -> ReplicationData_t const & { return replication_data_; }
utils::BasicResult<RegisterReplicaError, ReplicationClient *> RegisterReplica(const ReplicationClientConfig &config); utils::BasicResult<RegisterReplicaError, ReplicationClient *> RegisterReplica(const ReplicationClientConfig &config);
bool SetReplicationRoleMain(); bool SetReplicationRoleMain(const utils::UUID &main_uuid);
bool SetReplicationRoleReplica(const ReplicationServerConfig &config); bool SetReplicationRoleReplica(const ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid = std::nullopt);
private: private:
bool HandleVersionMigration(durability::ReplicationRoleEntry &data) const; bool HandleVersionMigration(durability::ReplicationRoleEntry &data) const;

View File

@ -31,25 +31,28 @@ constexpr auto *kReplicationReplicaPrefix{"__replication_replica:"}; // introdu
enum class DurabilityVersion : uint8_t { enum class DurabilityVersion : uint8_t {
V1, // no distinct key for replicas V1, // no distinct key for replicas
V2, // this version, epoch, replica prefix introduced V2, // epoch, replica prefix introduced
V3, // this version, main uuid introduced
}; };
// fragment of key: "__replication_role" // fragment of key: "__replication_role"
struct MainRole { struct MainRole {
ReplicationEpoch epoch{}; ReplicationEpoch epoch{};
std::optional<utils::UUID> main_uuid{};
friend bool operator==(MainRole const &, MainRole const &) = default; friend bool operator==(MainRole const &, MainRole const &) = default;
}; };
// fragment of key: "__replication_role" // fragment of key: "__replication_role"
struct ReplicaRole { struct ReplicaRole {
ReplicationServerConfig config{}; ReplicationServerConfig config{};
std::optional<utils::UUID> main_uuid{};
friend bool operator==(ReplicaRole const &, ReplicaRole const &) = default; friend bool operator==(ReplicaRole const &, ReplicaRole const &) = default;
}; };
// from key: "__replication_role" // from key: "__replication_role"
struct ReplicationRoleEntry { struct ReplicationRoleEntry {
DurabilityVersion version = DurabilityVersion version =
DurabilityVersion::V2; // if not latest then migration required for kReplicationReplicaPrefix DurabilityVersion::V3; // if not latest then migration required for kReplicationReplicaPrefix
std::variant<MainRole, ReplicaRole> role; std::variant<MainRole, ReplicaRole> role;
friend bool operator==(ReplicationRoleEntry const &, ReplicationRoleEntry const &) = default; friend bool operator==(ReplicationRoleEntry const &, ReplicationRoleEntry const &) = default;

View File

View File

@ -10,7 +10,7 @@
// licenses/APL.txt. // licenses/APL.txt.
#include "replication/replication_server.hpp" #include "replication/replication_server.hpp"
#include "replication_coordination_glue/messages.hpp" #include "replication_coordination_glue/handler.hpp"
namespace memgraph::replication { namespace memgraph::replication {
namespace { namespace {

View File

@ -10,12 +10,15 @@
// licenses/APL.txt. // licenses/APL.txt.
#include "replication/state.hpp" #include "replication/state.hpp"
#include <optional>
#include "flags/replication.hpp"
#include "replication/replication_client.hpp" #include "replication/replication_client.hpp"
#include "replication/replication_server.hpp" #include "replication/replication_server.hpp"
#include "replication/status.hpp" #include "replication/status.hpp"
#include "utils/file.hpp" #include "utils/file.hpp"
#include "utils/result.hpp" #include "utils/result.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp" #include "utils/variant_helpers.hpp"
constexpr auto kReplicationDirectory = std::string_view{"replication"}; constexpr auto kReplicationDirectory = std::string_view{"replication"};
@ -36,9 +39,9 @@ ReplicationState::ReplicationState(std::optional<std::filesystem::path> durabili
durability_ = std::make_unique<kvstore::KVStore>(std::move(repl_dir)); durability_ = std::make_unique<kvstore::KVStore>(std::move(repl_dir));
spdlog::info("Replication configuration will be stored and will be automatically restored in case of a crash."); spdlog::info("Replication configuration will be stored and will be automatically restored in case of a crash.");
auto replicationData = FetchReplicationData(); auto fetched_replication_data = FetchReplicationData();
if (replicationData.HasError()) { if (fetched_replication_data.HasError()) {
switch (replicationData.GetError()) { switch (fetched_replication_data.GetError()) {
using enum ReplicationState::FetchReplicationError; using enum ReplicationState::FetchReplicationError;
case NOTHING_FETCHED: { case NOTHING_FETCHED: {
spdlog::debug("Cannot find data needed for restore replication role in persisted metadata."); spdlog::debug("Cannot find data needed for restore replication role in persisted metadata.");
@ -51,15 +54,21 @@ ReplicationState::ReplicationState(std::optional<std::filesystem::path> durabili
} }
} }
} }
replication_data_ = std::move(replicationData).GetValue(); auto replication_data = std::move(fetched_replication_data).GetValue();
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port && std::holds_alternative<RoleReplicaData>(replication_data)) {
std::get<RoleReplicaData>(replication_data).uuid_.reset();
}
#endif
replication_data_ = std::move(replication_data);
} }
bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &config) { bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) {
if (!HasDurability()) return true; if (!HasDurability()) return true;
auto data = durability::ReplicationRoleEntry{.role = durability::ReplicaRole{ auto data =
.config = config, durability::ReplicationRoleEntry{.role = durability::ReplicaRole{.config = config, .main_uuid = main_uuid}};
}};
if (!durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) { if (!durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) {
spdlog::error("Error when saving REPLICA replication role in settings."); spdlog::error("Error when saving REPLICA replication role in settings.");
@ -78,11 +87,11 @@ bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &conf
return true; return true;
} }
bool ReplicationState::TryPersistRoleMain(std::string new_epoch) { bool ReplicationState::TryPersistRoleMain(std::string new_epoch, utils::UUID main_uuid) {
if (!HasDurability()) return true; if (!HasDurability()) return true;
auto data = auto data = durability::ReplicationRoleEntry{
durability::ReplicationRoleEntry{.role = durability::MainRole{.epoch = ReplicationEpoch{std::move(new_epoch)}}}; .role = durability::MainRole{.epoch = ReplicationEpoch{std::move(new_epoch)}, .main_uuid = main_uuid}};
if (durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) { if (durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) {
role_persisted = RolePersisted::YES; role_persisted = RolePersisted::YES;
@ -128,7 +137,8 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
return std::visit( return std::visit(
utils::Overloaded{ utils::Overloaded{
[&](durability::MainRole &&r) -> FetchReplicationResult_t { [&](durability::MainRole &&r) -> FetchReplicationResult_t {
auto res = RoleMainData{std::move(r.epoch)}; auto res =
RoleMainData{std::move(r.epoch), r.main_uuid.has_value() ? r.main_uuid.value() : utils::UUID{}};
auto b = durability_->begin(durability::kReplicationReplicaPrefix); auto b = durability_->begin(durability::kReplicationReplicaPrefix);
auto e = durability_->end(durability::kReplicationReplicaPrefix); auto e = durability_->end(durability::kReplicationReplicaPrefix);
for (; b != e; ++b) { for (; b != e; ++b) {
@ -143,6 +153,8 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
} }
// Instance clients // Instance clients
res.registered_replicas_.emplace_back(data.config); res.registered_replicas_.emplace_back(data.config);
// Bump for each replica uuid
res.registered_replicas_.back().try_set_uuid = !r.main_uuid.has_value();
} catch (...) { } catch (...) {
return FetchReplicationError::PARSE_ERROR; return FetchReplicationError::PARSE_ERROR;
} }
@ -150,7 +162,9 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
return {std::move(res)}; return {std::move(res)};
}, },
[&](durability::ReplicaRole &&r) -> FetchReplicationResult_t { [&](durability::ReplicaRole &&r) -> FetchReplicationResult_t {
return {RoleReplicaData{r.config, std::make_unique<ReplicationServer>(r.config)}}; // False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return {RoleReplicaData{r.config, std::make_unique<ReplicationServer>(r.config), r.main_uuid}};
}, },
}, },
std::move(data.role)); std::move(data.role));
@ -192,21 +206,29 @@ bool ReplicationState::HandleVersionMigration(durability::ReplicationRoleEntry &
[[fallthrough]]; [[fallthrough]];
} }
case durability::DurabilityVersion::V2: { case durability::DurabilityVersion::V2: {
// do nothing - add code if V3 ever happens if (std::holds_alternative<durability::MainRole>(data.role)) {
auto &main = std::get<durability::MainRole>(data.role);
main.main_uuid = utils::UUID{};
}
data.version = durability::DurabilityVersion::V3;
break;
}
case durability::DurabilityVersion::V3: {
// do nothing - add code if V4 ever happens
break; break;
} }
} }
return true; return true;
} }
bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig &config) { bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig &config, utils::UUID main_uuid) {
if (!HasDurability()) return true; if (!HasDurability()) return true;
// If any replicas are persisted then Role must be persisted // If any replicas are persisted then Role must be persisted
if (role_persisted != RolePersisted::YES) { if (role_persisted != RolePersisted::YES) {
DMG_ASSERT(IsMain(), "MAIN is expected"); DMG_ASSERT(IsMain(), "MAIN is expected");
auto epoch_str = std::string(std::get<RoleMainData>(replication_data_).epoch_.id()); auto epoch_str = std::string(std::get<RoleMainData>(replication_data_).epoch_.id());
if (!TryPersistRoleMain(std::move(epoch_str))) return false; if (!TryPersistRoleMain(std::move(epoch_str), main_uuid)) return false;
} }
auto data = durability::ReplicationReplicaEntry{.config = config}; auto data = durability::ReplicationReplicaEntry{.config = config};
@ -217,22 +239,28 @@ bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig
return false; return false;
} }
bool ReplicationState::SetReplicationRoleMain() { bool ReplicationState::SetReplicationRoleMain(const utils::UUID &main_uuid) {
auto new_epoch = utils::GenerateUUID(); auto new_epoch = utils::GenerateUUID();
if (!TryPersistRoleMain(new_epoch)) { if (!TryPersistRoleMain(new_epoch, main_uuid)) {
return false; return false;
} }
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}};
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}, main_uuid};
return true; return true;
} }
bool ReplicationState::SetReplicationRoleReplica(const ReplicationServerConfig &config) { bool ReplicationState::SetReplicationRoleReplica(const ReplicationServerConfig &config,
if (!TryPersistRoleReplica(config)) { const std::optional<utils::UUID> &main_uuid) {
// False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
if (!TryPersistRoleReplica(config, main_uuid)) {
return false; return false;
} }
replication_data_ = RoleReplicaData{config, std::make_unique<ReplicationServer>(config)}; // False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
replication_data_ = RoleReplicaData{config, std::make_unique<ReplicationServer>(config), std::nullopt};
return true; return true;
} }
@ -264,7 +292,7 @@ utils::BasicResult<RegisterReplicaError, ReplicationClient *> ReplicationState::
} }
// Durability // Durability
if (!TryPersistRegisteredReplica(config)) { if (!TryPersistRegisteredReplica(config, mainData.uuid_)) {
return RegisterReplicaError::COULD_NOT_BE_PERSISTED; return RegisterReplicaError::COULD_NOT_BE_PERSISTED;
} }

View File

@ -26,21 +26,28 @@ constexpr auto *kSSLCertFile = "replica_ssl_cert_file";
constexpr auto *kReplicationRole = "replication_role"; constexpr auto *kReplicationRole = "replication_role";
constexpr auto *kEpoch = "epoch"; constexpr auto *kEpoch = "epoch";
constexpr auto *kVersion = "durability_version"; constexpr auto *kVersion = "durability_version";
constexpr auto *kMainUUID = "main_uuid";
void to_json(nlohmann::json &j, const ReplicationRoleEntry &p) { void to_json(nlohmann::json &j, const ReplicationRoleEntry &p) {
auto processMAIN = [&](MainRole const &main) { auto processMAIN = [&](MainRole const &main) {
j = nlohmann::json{{kVersion, p.version}, auto common = nlohmann::json{{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN}, {kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN},
{kEpoch, main.epoch.id()}}; {kEpoch, main.epoch.id()}};
if (p.version != DurabilityVersion::V1 && p.version != DurabilityVersion::V2) {
MG_ASSERT(main.main_uuid.has_value(), "Main should have id ready on version >= V3");
common[kMainUUID] = main.main_uuid.value();
}
j = std::move(common);
}; };
auto processREPLICA = [&](ReplicaRole const &replica) { auto processREPLICA = [&](ReplicaRole const &replica) {
j = nlohmann::json{ auto common = nlohmann::json{{kVersion, p.version},
{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA}, {kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA},
{kIpAddress, replica.config.ip_address}, {kIpAddress, replica.config.ip_address},
{kPort, replica.config.port} {kPort, replica.config.port}};
// TODO: SSL if (replica.main_uuid.has_value()) {
}; common[kMainUUID] = replica.main_uuid.value();
}
j = std::move(common);
}; };
std::visit(utils::Overloaded{processMAIN, processREPLICA}, p.role); std::visit(utils::Overloaded{processMAIN, processREPLICA}, p.role);
} }
@ -56,7 +63,12 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) {
auto json_epoch = j.value(kEpoch, std::string{}); auto json_epoch = j.value(kEpoch, std::string{});
auto epoch = ReplicationEpoch{}; auto epoch = ReplicationEpoch{};
if (!json_epoch.empty()) epoch.SetEpoch(json_epoch); if (!json_epoch.empty()) epoch.SetEpoch(json_epoch);
p = ReplicationRoleEntry{.version = version, .role = MainRole{.epoch = std::move(epoch)}}; auto main_role = MainRole{.epoch = std::move(epoch)};
if (j.contains(kMainUUID)) {
main_role.main_uuid = j.at(kMainUUID);
}
p = ReplicationRoleEntry{.version = version, .role = std::move(main_role)};
break; break;
} }
case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: { case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: {
@ -66,7 +78,13 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) {
j.at(kIpAddress).get_to(ip_address); j.at(kIpAddress).get_to(ip_address);
j.at(kPort).get_to(port); j.at(kPort).get_to(port);
auto config = ReplicationServerConfig{.ip_address = std::move(ip_address), .port = port}; auto config = ReplicationServerConfig{.ip_address = std::move(ip_address), .port = port};
p = ReplicationRoleEntry{.version = version, .role = ReplicaRole{.config = std::move(config)}}; auto replica_role = ReplicaRole{.config = std::move(config)};
if (j.contains(kMainUUID)) {
replica_role.main_uuid = j.at(kMainUUID);
}
p = ReplicationRoleEntry{.version = version, .role = std::move(replica_role)};
break; break;
} }
} }

View File

@ -6,6 +6,7 @@ target_sources(mg-repl_coord_glue
messages.hpp messages.hpp
mode.hpp mode.hpp
role.hpp role.hpp
handler.hpp
PRIVATE PRIVATE
messages.cpp messages.cpp

View File

@ -0,0 +1,41 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include "rpc/client.hpp"
#include "utils/uuid.hpp"
#include "messages.hpp"
#include "rpc/messages.hpp"
namespace memgraph::replication_coordination_glue {
inline bool SendSwapMainUUIDRpc(memgraph::rpc::Client &rpc_client_, const memgraph::utils::UUID &uuid) {
try {
auto stream{rpc_client_.Stream<SwapMainUUIDRpc>(uuid)};
if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC swapping of uuid response!");
return false;
}
return true;
} catch (const memgraph::rpc::RpcFailedException &) {
spdlog::error("RPC error occurred while sending swapping uuid RPC!");
}
return false;
}
inline void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) {
FrequentHeartbeatReq req;
FrequentHeartbeatReq::Load(&req, req_reader);
memgraph::slk::Load(&req, req_reader);
FrequentHeartbeatRes res{};
memgraph::slk::Save(res, res_builder);
}
} // namespace memgraph::replication_coordination_glue

View File

@ -29,6 +29,25 @@ void Load(memgraph::replication_coordination_glue::FrequentHeartbeatReq * /*self
/* Nothing to serialize */ /* Nothing to serialize */
} }
// Serialize code for SwapMainUUIDRes
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDRes &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.success, builder);
}
void Load(memgraph::replication_coordination_glue::SwapMainUUIDRes *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->success, reader);
}
// Serialize code for SwapMainUUIDReq
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.uuid, builder);
}
void Load(memgraph::replication_coordination_glue::SwapMainUUIDReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->uuid, reader);
}
} // namespace memgraph::slk } // namespace memgraph::slk
namespace memgraph::replication_coordination_glue { namespace memgraph::replication_coordination_glue {
@ -39,6 +58,10 @@ constexpr utils::TypeInfo FrequentHeartbeatReq::kType{utils::TypeId::REP_FREQUEN
constexpr utils::TypeInfo FrequentHeartbeatRes::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_RES, "FrequentHeartbeatRes", constexpr utils::TypeInfo FrequentHeartbeatRes::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_RES, "FrequentHeartbeatRes",
nullptr}; nullptr};
constexpr utils::TypeInfo SwapMainUUIDReq::kType{utils::TypeId::COORD_SWAP_UUID_REQ, "SwapUUIDReq", nullptr};
constexpr utils::TypeInfo SwapMainUUIDRes::kType{utils::TypeId::COORD_SWAP_UUID_RES, "SwapUUIDRes", nullptr};
void FrequentHeartbeatReq::Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) { void FrequentHeartbeatReq::Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self, builder); memgraph::slk::Save(self, builder);
} }
@ -52,12 +75,16 @@ void FrequentHeartbeatRes::Load(FrequentHeartbeatRes *self, memgraph::slk::Reade
memgraph::slk::Load(self, reader); memgraph::slk::Load(self, reader);
} }
void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) { void SwapMainUUIDReq::Save(const SwapMainUUIDReq &self, memgraph::slk::Builder *builder) {
FrequentHeartbeatReq req; memgraph::slk::Save(self, builder);
FrequentHeartbeatReq::Load(&req, req_reader);
memgraph::slk::Load(&req, req_reader);
FrequentHeartbeatRes res{};
memgraph::slk::Save(res, res_builder);
} }
void SwapMainUUIDReq::Load(SwapMainUUIDReq *self, memgraph::slk::Reader *reader) { memgraph::slk::Load(self, reader); }
void SwapMainUUIDRes::Save(const SwapMainUUIDRes &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self, builder);
}
void SwapMainUUIDRes::Load(SwapMainUUIDRes *self, memgraph::slk::Reader *reader) { memgraph::slk::Load(self, reader); }
} // namespace memgraph::replication_coordination_glue } // namespace memgraph::replication_coordination_glue

View File

@ -13,6 +13,7 @@
#include "rpc/messages.hpp" #include "rpc/messages.hpp"
#include "slk/serialization.hpp" #include "slk/serialization.hpp"
#include "utils/uuid.hpp"
namespace memgraph::replication_coordination_glue { namespace memgraph::replication_coordination_glue {
@ -36,7 +37,34 @@ struct FrequentHeartbeatRes {
using FrequentHeartbeatRpc = rpc::RequestResponse<FrequentHeartbeatReq, FrequentHeartbeatRes>; using FrequentHeartbeatRpc = rpc::RequestResponse<FrequentHeartbeatReq, FrequentHeartbeatRes>;
void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder); struct SwapMainUUIDReq {
static const utils::TypeInfo kType;
static const utils::TypeInfo &GetTypeInfo() { return kType; }
static void Load(SwapMainUUIDReq *self, memgraph::slk::Reader *reader);
static void Save(const SwapMainUUIDReq &self, memgraph::slk::Builder *builder);
explicit SwapMainUUIDReq(const utils::UUID &uuid) : uuid(uuid) {}
SwapMainUUIDReq() = default;
utils::UUID uuid;
};
struct SwapMainUUIDRes {
static const utils::TypeInfo kType;
static const utils::TypeInfo &GetTypeInfo() { return kType; }
static void Load(SwapMainUUIDRes *self, memgraph::slk::Reader *reader);
static void Save(const SwapMainUUIDRes &self, memgraph::slk::Builder *builder);
explicit SwapMainUUIDRes(bool success) : success(success) {}
SwapMainUUIDRes() = default;
bool success;
};
using SwapMainUUIDRpc = rpc::RequestResponse<SwapMainUUIDReq, SwapMainUUIDRes>;
} // namespace memgraph::replication_coordination_glue } // namespace memgraph::replication_coordination_glue
@ -46,4 +74,10 @@ void Load(memgraph::replication_coordination_glue::FrequentHeartbeatRes *self, m
void Save(const memgraph::replication_coordination_glue::FrequentHeartbeatReq & /*self*/, void Save(const memgraph::replication_coordination_glue::FrequentHeartbeatReq & /*self*/,
memgraph::slk::Builder * /*builder*/); memgraph::slk::Builder * /*builder*/);
void Load(memgraph::replication_coordination_glue::FrequentHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/); void Load(memgraph::replication_coordination_glue::FrequentHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/);
// SwapMainUUIDRpc
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::replication_coordination_glue::SwapMainUUIDReq *self, memgraph::slk::Reader *reader);
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::replication_coordination_glue::SwapMainUUIDRes *self, memgraph::slk::Reader *reader);
} // namespace memgraph::slk } // namespace memgraph::slk

View File

@ -7,8 +7,8 @@ target_sources(mg-replication_handler
include/replication_handler/system_rpc.hpp include/replication_handler/system_rpc.hpp
PRIVATE PRIVATE
replication_handler.cpp
system_replication.cpp system_replication.cpp
replication_handler.cpp
system_rpc.cpp system_rpc.cpp
) )
target_include_directories(mg-replication_handler PUBLIC include) target_include_directories(mg-replication_handler PUBLIC include)

View File

@ -22,10 +22,10 @@ inline std::optional<query::RegisterReplicaError> HandleRegisterReplicaStatus(
utils::BasicResult<replication::RegisterReplicaError, replication::ReplicationClient *> &instance_client); utils::BasicResult<replication::RegisterReplicaError, replication::ReplicationClient *> &instance_client);
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void StartReplicaClient(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler, void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid,
auth::SynchedAuth &auth); system::System *system, auth::SynchedAuth &auth);
#else #else
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler); void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid);
#endif #endif
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
@ -33,8 +33,8 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
// When being called by interpreter no need to gain lock, it should already be under a system transaction // When being called by interpreter no need to gain lock, it should already be under a system transaction
// But concurrently the FrequentCheck is running and will need to lock before reading last_committed_system_timestamp_ // But concurrently the FrequentCheck is running and will need to lock before reading last_committed_system_timestamp_
template <bool REQUIRE_LOCK = false> template <bool REQUIRE_LOCK = false>
void SystemRestore(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler, void SystemRestore(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth) { const utils::UUID &main_uuid, system::System *system, auth::SynchedAuth &auth) {
// Check if system is up to date // Check if system is up to date
if (client.state_.WithLock( if (client.state_.WithLock(
[](auto &state) { return state == memgraph::replication::ReplicationClient::State::READY; })) [](auto &state) { return state == memgraph::replication::ReplicationClient::State::READY; }))
@ -69,12 +69,12 @@ void SystemRestore(replication::ReplicationClient &client, system::System *syste
// Handle only default database is no license // Handle only default database is no license
if (!license::global_license_checker.IsEnterpriseValidFast()) { if (!license::global_license_checker.IsEnterpriseValidFast()) {
return client.rpc_client_.Stream<replication::SystemRecoveryRpc>( return client.rpc_client_.Stream<replication::SystemRecoveryRpc>(
db_info.last_committed_timestamp, std::move(db_info.configs), auth::Auth::Config{}, main_uuid, db_info.last_committed_timestamp, std::move(db_info.configs), auth::Auth::Config{},
std::vector<auth::User>{}, std::vector<auth::Role>{}); std::vector<auth::User>{}, std::vector<auth::Role>{});
} }
return auth.WithLock([&](auto &locked_auth) { return auth.WithLock([&](auto &locked_auth) {
return client.rpc_client_.Stream<replication::SystemRecoveryRpc>( return client.rpc_client_.Stream<replication::SystemRecoveryRpc>(
db_info.last_committed_timestamp, std::move(db_info.configs), locked_auth.GetConfig(), main_uuid, db_info.last_committed_timestamp, std::move(db_info.configs), locked_auth.GetConfig(),
locked_auth.AllUsers(), locked_auth.AllRoles()); locked_auth.AllUsers(), locked_auth.AllRoles());
}); });
}); });
@ -109,28 +109,32 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
bool SetReplicationRoleMain() override; bool SetReplicationRoleMain() override;
// as MAIN, become REPLICA // as MAIN, become REPLICA
bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) override; bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) override;
// as MAIN, define and connect to REPLICAs // as MAIN, define and connect to REPLICAs
auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config) auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override; -> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override;
auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config) auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override; -> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override;
// as MAIN, remove a REPLICA connection // as MAIN, remove a REPLICA connection
auto UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult override; auto UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult override;
bool DoReplicaToMainPromotion(); bool DoReplicaToMainPromotion(const utils::UUID &main_uuid);
// Helper pass-through (TODO: remove) // Helper pass-through (TODO: remove)
auto GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole override; auto GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole override;
bool IsMain() const override; bool IsMain() const override;
bool IsReplica() const override; bool IsReplica() const override;
auto GetReplState() const -> const memgraph::replication::ReplicationState &;
auto GetReplState() -> memgraph::replication::ReplicationState &;
private: private:
template <bool HandleFailure> template <bool HandleFailure>
auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config) auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> { -> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
MG_ASSERT(repl_state_.IsMain(), "Only main instance can register a replica!"); MG_ASSERT(repl_state_.IsMain(), "Only main instance can register a replica!");
@ -154,10 +158,19 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
if (!memgraph::dbms::allow_mt_repl && dbms_handler_.All().size() > 1) { if (!memgraph::dbms::allow_mt_repl && dbms_handler_.All().size() > 1) {
spdlog::warn("Multi-tenant replication is currently not supported!"); spdlog::warn("Multi-tenant replication is currently not supported!");
} }
const auto main_uuid =
std::get<memgraph::replication::RoleMainData>(dbms_handler_.ReplicationState().ReplicationData()).uuid_;
if (send_swap_uuid) {
if (!memgraph::replication_coordination_glue::SendSwapMainUUIDRpc(maybe_client.GetValue()->rpc_client_,
main_uuid)) {
return memgraph::query::RegisterReplicaError::ERROR_ACCEPTING_MAIN;
}
}
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
// Update system before enabling individual storage <-> replica clients // Update system before enabling individual storage <-> replica clients
SystemRestore(*maybe_client.GetValue(), system_, dbms_handler_, auth_); SystemRestore(*maybe_client.GetValue(), dbms_handler_, main_uuid, system_, auth_);
#endif #endif
const auto dbms_error = HandleRegisterReplicaStatus(maybe_client); const auto dbms_error = HandleRegisterReplicaStatus(maybe_client);
@ -177,8 +190,9 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
if (storage->storage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return; if (storage->storage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return;
all_clients_good &= storage->repl_storage_state_.replication_clients_.WithLock( all_clients_good &= storage->repl_storage_state_.replication_clients_.WithLock(
[storage, &instance_client_ptr, db_acc = std::move(db_acc)](auto &storage_clients) mutable { // NOLINT [storage, &instance_client_ptr, db_acc = std::move(db_acc),
auto client = std::make_unique<storage::ReplicationStorageClient>(*instance_client_ptr); main_uuid](auto &storage_clients) mutable { // NOLINT
auto client = std::make_unique<storage::ReplicationStorageClient>(*instance_client_ptr, main_uuid);
// All good, start replica client // All good, start replica client
client->Start(storage, std::move(db_acc)); client->Start(storage, std::move(db_acc));
// After start the storage <-> replica state should be READY or RECOVERING (if correctly started) // After start the storage <-> replica state should be READY or RECOVERING (if correctly started)
@ -201,9 +215,9 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
// No client error, start instance level client // No client error, start instance level client
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
StartReplicaClient(*instance_client_ptr, system_, dbms_handler_, auth_); StartReplicaClient(*instance_client_ptr, dbms_handler_, main_uuid, system_, auth_);
#else #else
StartReplicaClient(*instance_client_ptr, dbms_handler_); StartReplicaClient(*instance_client_ptr, dbms_handler_, main_uuid);
#endif #endif
return {}; return {};
} }

View File

@ -17,15 +17,23 @@
#include "system/state.hpp" #include "system/state.hpp"
namespace memgraph::replication { namespace memgraph::replication {
inline void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void SystemHeartbeatHandler(uint64_t ts, slk::Reader *req_reader, slk::Builder *res_builder); void SystemHeartbeatHandler(uint64_t ts, const std::optional<utils::UUID> &current_main_uuid, slk::Reader *req_reader,
void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth, slk::Reader *req_reader,
slk::Builder *res_builder); slk::Builder *res_builder);
void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
std::optional<utils::UUID> &current_main_uuid, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth, slk::Reader *req_reader, slk::Builder *res_builder);
void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth); void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth);
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data, auth::SynchedAuth &auth); bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data, auth::SynchedAuth &auth);
#else #else
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data); bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data);
#endif #endif
} // namespace memgraph::replication } // namespace memgraph::replication

View File

@ -27,6 +27,8 @@ struct SystemHeartbeatReq {
static void Load(SystemHeartbeatReq *self, memgraph::slk::Reader *reader); static void Load(SystemHeartbeatReq *self, memgraph::slk::Reader *reader);
static void Save(const SystemHeartbeatReq &self, memgraph::slk::Builder *builder); static void Save(const SystemHeartbeatReq &self, memgraph::slk::Builder *builder);
SystemHeartbeatReq() = default; SystemHeartbeatReq() = default;
explicit SystemHeartbeatReq(const utils::UUID &main_uuid) : main_uuid(main_uuid) {}
utils::UUID main_uuid;
}; };
struct SystemHeartbeatRes { struct SystemHeartbeatRes {
@ -50,14 +52,17 @@ struct SystemRecoveryReq {
static void Load(SystemRecoveryReq *self, memgraph::slk::Reader *reader); static void Load(SystemRecoveryReq *self, memgraph::slk::Reader *reader);
static void Save(const SystemRecoveryReq &self, memgraph::slk::Builder *builder); static void Save(const SystemRecoveryReq &self, memgraph::slk::Builder *builder);
SystemRecoveryReq() = default; SystemRecoveryReq() = default;
SystemRecoveryReq(uint64_t forced_group_timestamp, std::vector<storage::SalientConfig> database_configs, SystemRecoveryReq(const utils::UUID &main_uuid, uint64_t forced_group_timestamp,
auth::Auth::Config auth_config, std::vector<auth::User> users, std::vector<auth::Role> roles) std::vector<storage::SalientConfig> database_configs, auth::Auth::Config auth_config,
: forced_group_timestamp{forced_group_timestamp}, std::vector<auth::User> users, std::vector<auth::Role> roles)
: main_uuid(main_uuid),
forced_group_timestamp{forced_group_timestamp},
database_configs(std::move(database_configs)), database_configs(std::move(database_configs)),
auth_config(std::move(auth_config)), auth_config(std::move(auth_config)),
users{std::move(users)}, users{std::move(users)},
roles{std::move(roles)} {} roles{std::move(roles)} {}
utils::UUID main_uuid;
uint64_t forced_group_timestamp; uint64_t forced_group_timestamp;
std::vector<storage::SalientConfig> database_configs; std::vector<storage::SalientConfig> database_configs;
auth::Auth::Config auth_config; auth::Auth::Config auth_config;

View File

@ -24,14 +24,18 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
*/ */
// Startup replication state (if recovered at startup) // Startup replication state (if recovered at startup)
auto replica = [&dbms_handler, &auth](memgraph::replication::RoleReplicaData const &data) { auto replica = [&dbms_handler, &auth](memgraph::replication::RoleReplicaData &data) {
return memgraph::replication::StartRpcServer(dbms_handler, data, auth); return StartRpcServer(dbms_handler, data, auth);
}; };
// Replication recovery and frequent check start // Replication recovery and frequent check start
auto main = [system, &dbms_handler, &auth](memgraph::replication::RoleMainData &mainData) { auto main = [system, &dbms_handler, &auth](memgraph::replication::RoleMainData &mainData) {
for (auto &client : mainData.registered_replicas_) { for (auto &client : mainData.registered_replicas_) {
memgraph::replication::SystemRestore(client, system, dbms_handler, auth); if (client.try_set_uuid &&
replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, mainData.uuid_)) {
client.try_set_uuid = false;
}
SystemRestore(client, dbms_handler, mainData.uuid_, system, auth);
} }
// DBMS here // DBMS here
dbms_handler.ForEach([&mainData](memgraph::dbms::DatabaseAccess db_acc) { dbms_handler.ForEach([&mainData](memgraph::dbms::DatabaseAccess db_acc) {
@ -39,7 +43,7 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
}); });
for (auto &client : mainData.registered_replicas_) { for (auto &client : mainData.registered_replicas_) {
memgraph::replication::StartReplicaClient(client, system, dbms_handler, auth); StartReplicaClient(client, dbms_handler, mainData.uuid_, system, auth);
} }
// Warning // Warning
@ -62,7 +66,7 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
void RecoverReplication(memgraph::replication::ReplicationState &repl_state, void RecoverReplication(memgraph::replication::ReplicationState &repl_state,
memgraph::dbms::DbmsHandler &dbms_handler) { memgraph::dbms::DbmsHandler &dbms_handler) {
// Startup replication state (if recovered at startup) // Startup replication state (if recovered at startup)
auto replica = [&dbms_handler](memgraph::replication::RoleReplicaData const &data) { auto replica = [&dbms_handler](memgraph::replication::RoleReplicaData &data) {
return memgraph::replication::StartRpcServer(dbms_handler, data); return memgraph::replication::StartRpcServer(dbms_handler, data);
}; };
@ -71,7 +75,11 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state,
dbms::DbmsHandler::RecoverStorageReplication(dbms_handler.Get(), mainData); dbms::DbmsHandler::RecoverStorageReplication(dbms_handler.Get(), mainData);
for (auto &client : mainData.registered_replicas_) { for (auto &client : mainData.registered_replicas_) {
memgraph::replication::StartReplicaClient(client, dbms_handler); if (client.try_set_uuid &&
replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, mainData.uuid_)) {
client.try_set_uuid = false;
}
memgraph::replication::StartReplicaClient(client, dbms_handler, mainData.uuid_);
} }
// Warning // Warning
@ -112,10 +120,11 @@ inline std::optional<query::RegisterReplicaError> HandleRegisterReplicaStatus(
} }
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void StartReplicaClient(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler, void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid,
auth::SynchedAuth &auth) { system::System *system, auth::SynchedAuth &auth) {
#else #else
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler) { void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler,
utils::UUID main_uuid) {
#endif #endif
// No client error, start instance level client // No client error, start instance level client
auto const &endpoint = client.rpc_client_.Endpoint(); auto const &endpoint = client.rpc_client_.Endpoint();
@ -124,8 +133,12 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
system = system, system = system,
#endif #endif
license = license::global_license_checker.IsEnterpriseValidFast()]( license = license::global_license_checker.IsEnterpriseValidFast(),
bool reconnect, replication::ReplicationClient &client) mutable { main_uuid](bool reconnect, replication::ReplicationClient &client) mutable {
if (client.try_set_uuid &&
memgraph::replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, main_uuid)) {
client.try_set_uuid = false;
}
// Working connection // Working connection
// Check if system needs restoration // Check if system needs restoration
if (reconnect) { if (reconnect) {
@ -138,7 +151,7 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
client.state_.WithLock([](auto &state) { state = memgraph::replication::ReplicationClient::State::BEHIND; }); client.state_.WithLock([](auto &state) { state = memgraph::replication::ReplicationClient::State::BEHIND; });
} }
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
SystemRestore<true>(client, system, dbms_handler, auth); SystemRestore<true>(client, dbms_handler, main_uuid, system, auth);
#endif #endif
// Check if any database has been left behind // Check if any database has been left behind
dbms_handler.ForEach([&name = client.name_, reconnect](dbms::DatabaseAccess db_acc) { dbms_handler.ForEach([&name = client.name_, reconnect](dbms::DatabaseAccess db_acc) {
@ -174,14 +187,15 @@ bool ReplicationHandler::SetReplicationRoleMain() {
}; };
auto const replica_handler = [this](memgraph::replication::RoleReplicaData const &) { auto const replica_handler = [this](memgraph::replication::RoleReplicaData const &) {
return DoReplicaToMainPromotion(); return DoReplicaToMainPromotion(utils::UUID{});
}; };
// TODO: under lock // TODO: under lock
return std::visit(memgraph::utils::Overloaded{main_handler, replica_handler}, repl_state_.ReplicationData()); return std::visit(memgraph::utils::Overloaded{main_handler, replica_handler}, repl_state_.ReplicationData());
} }
bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) { bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) {
// We don't want to restart the server if we're already a REPLICA // We don't want to restart the server if we're already a REPLICA
if (repl_state_.IsReplica()) { if (repl_state_.IsReplica()) {
return false; return false;
@ -198,15 +212,14 @@ bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::
std::get<memgraph::replication::RoleMainData>(repl_state_.ReplicationData()).registered_replicas_.clear(); std::get<memgraph::replication::RoleMainData>(repl_state_.ReplicationData()).registered_replicas_.clear();
// Creates the server // Creates the server
repl_state_.SetReplicationRoleReplica(config); repl_state_.SetReplicationRoleReplica(config, main_uuid);
// Start // Start
const auto success = const auto success = std::visit(memgraph::utils::Overloaded{[](memgraph::replication::RoleMainData &) {
std::visit(memgraph::utils::Overloaded{[](memgraph::replication::RoleMainData const &) {
// ASSERT // ASSERT
return false; return false;
}, },
[this](memgraph::replication::RoleReplicaData const &data) { [this](memgraph::replication::RoleReplicaData &data) {
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
return StartRpcServer(dbms_handler_, data, auth_); return StartRpcServer(dbms_handler_, data, auth_);
#else #else
@ -218,7 +231,7 @@ bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::
return success; return success;
} }
bool ReplicationHandler::DoReplicaToMainPromotion() { bool ReplicationHandler::DoReplicaToMainPromotion(const utils::UUID &main_uuid) {
// STEP 1) bring down all REPLICA servers // STEP 1) bring down all REPLICA servers
dbms_handler_.ForEach([](dbms::DatabaseAccess db_acc) { dbms_handler_.ForEach([](dbms::DatabaseAccess db_acc) {
auto *storage = db_acc->storage(); auto *storage = db_acc->storage();
@ -228,7 +241,7 @@ bool ReplicationHandler::DoReplicaToMainPromotion() {
// STEP 2) Change to MAIN // STEP 2) Change to MAIN
// TODO: restore replication servers if false? // TODO: restore replication servers if false?
if (!repl_state_.SetReplicationRoleMain()) { if (!repl_state_.SetReplicationRoleMain(main_uuid)) {
// TODO: Handle recovery on failure??? // TODO: Handle recovery on failure???
return false; return false;
} }
@ -244,14 +257,16 @@ bool ReplicationHandler::DoReplicaToMainPromotion() {
}; };
// as MAIN, define and connect to REPLICAs // as MAIN, define and connect to REPLICAs
auto ReplicationHandler::TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config) auto ReplicationHandler::TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config,
bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> { -> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
return RegisterReplica_<false>(config); return RegisterReplica_<false>(config, send_swap_uuid);
} }
auto ReplicationHandler::RegisterReplica(const memgraph::replication::ReplicationClientConfig &config) auto ReplicationHandler::RegisterReplica(const memgraph::replication::ReplicationClientConfig &config,
bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> { -> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
return RegisterReplica_<true>(config); return RegisterReplica_<true>(config, send_swap_uuid);
} }
auto ReplicationHandler::UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult { auto ReplicationHandler::UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult {
@ -284,6 +299,10 @@ auto ReplicationHandler::GetRole() const -> memgraph::replication_coordination_g
return repl_state_.GetRole(); return repl_state_.GetRole();
} }
auto ReplicationHandler::GetReplState() const -> const memgraph::replication::ReplicationState & { return repl_state_; }
auto ReplicationHandler::GetReplState() -> memgraph::replication::ReplicationState & { return repl_state_; }
bool ReplicationHandler::IsMain() const { return repl_state_.IsMain(); } bool ReplicationHandler::IsMain() const { return repl_state_.IsMain(); }
bool ReplicationHandler::IsReplica() const { return repl_state_.IsReplica(); } bool ReplicationHandler::IsReplica() const { return repl_state_.IsReplica(); }

View File

@ -21,7 +21,8 @@
namespace memgraph::replication { namespace memgraph::replication {
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
void SystemHeartbeatHandler(const uint64_t ts, slk::Reader *req_reader, slk::Builder *res_builder) { void SystemHeartbeatHandler(const uint64_t ts, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
replication::SystemHeartbeatRes res{0}; replication::SystemHeartbeatRes res{0};
// Ignore if no license // Ignore if no license
@ -30,17 +31,23 @@ void SystemHeartbeatHandler(const uint64_t ts, slk::Reader *req_reader, slk::Bui
memgraph::slk::Save(res, res_builder); memgraph::slk::Save(res, res_builder);
return; return;
} }
replication::SystemHeartbeatReq req; replication::SystemHeartbeatReq req;
replication::SystemHeartbeatReq::Load(&req, req_reader); replication::SystemHeartbeatReq::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::SystemHeartbeatRes::kType.name);
replication::SystemHeartbeatRes res(-1);
memgraph::slk::Save(res, res_builder);
return;
}
res = replication::SystemHeartbeatRes{ts}; res = replication::SystemHeartbeatRes{ts};
memgraph::slk::Save(res, res_builder); memgraph::slk::Save(res, res_builder);
} }
void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth, slk::Reader *req_reader, const std::optional<utils::UUID> &current_main_uuid, dbms::DbmsHandler &dbms_handler,
slk::Builder *res_builder) { auth::SynchedAuth &auth, slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::replication::SystemRecoveryRes; using memgraph::replication::SystemRecoveryRes;
SystemRecoveryRes res(SystemRecoveryRes::Result::FAILURE); SystemRecoveryRes res(SystemRecoveryRes::Result::FAILURE);
@ -49,6 +56,11 @@ void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::replication::SystemRecoveryReq req; memgraph::replication::SystemRecoveryReq req;
memgraph::slk::Load(&req, req_reader); memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, SystemRecoveryReq::kType.name);
return;
}
/* /*
* DBMS * DBMS
*/ */
@ -74,15 +86,16 @@ void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_
auto system_state_access = dbms_handler.system_->CreateSystemStateAccess(); auto system_state_access = dbms_handler.system_->CreateSystemStateAccess();
// System // System
// TODO: remove, as this is not used
data.server->rpc_server_.Register<replication::SystemHeartbeatRpc>( data.server->rpc_server_.Register<replication::SystemHeartbeatRpc>(
[system_state_access](auto *req_reader, auto *res_builder) { [&data, system_state_access](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SystemHeartbeatRpc"); spdlog::debug("Received SystemHeartbeatRpc");
SystemHeartbeatHandler(system_state_access.LastCommitedTS(), req_reader, res_builder); SystemHeartbeatHandler(system_state_access.LastCommitedTS(), data.uuid_, req_reader, res_builder);
}); });
data.server->rpc_server_.Register<replication::SystemRecoveryRpc>( data.server->rpc_server_.Register<replication::SystemRecoveryRpc>(
[system_state_access, &dbms_handler, &auth](auto *req_reader, auto *res_builder) mutable { [&data, system_state_access, &dbms_handler, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received SystemRecoveryRpc"); spdlog::debug("Received SystemRecoveryRpc");
SystemRecoveryHandler(system_state_access, dbms_handler, auth, req_reader, res_builder); SystemRecoveryHandler(system_state_access, data.uuid_, dbms_handler, auth, req_reader, res_builder);
}); });
// DBMS // DBMS
@ -94,13 +107,12 @@ void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_
#endif #endif
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data, bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data, auth::SynchedAuth &auth) {
auth::SynchedAuth &auth) {
#else #else
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data) { bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data) {
#endif #endif
// Register storage handlers // Register storage handlers
dbms::InMemoryReplicationHandlers::Register(&dbms_handler, *data.server); dbms::InMemoryReplicationHandlers::Register(&dbms_handler, data);
#ifdef MG_ENTERPRISE #ifdef MG_ENTERPRISE
// Register system handlers // Register system handlers
Register(data, dbms_handler, auth); Register(data, dbms_handler, auth);
@ -112,4 +124,5 @@ bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleRepl
} }
return true; return true;
} }
} // namespace memgraph::replication } // namespace memgraph::replication

View File

@ -29,15 +29,16 @@ void Load(memgraph::replication::SystemHeartbeatRes *self, memgraph::slk::Reader
} }
// Serialize code for SystemHeartbeatReq // Serialize code for SystemHeartbeatReq
void Save(const memgraph::replication::SystemHeartbeatReq & /*self*/, memgraph::slk::Builder * /*builder*/) { void Save(const memgraph::replication::SystemHeartbeatReq &self, memgraph::slk::Builder *builder) {
/* Nothing to serialize */ memgraph::slk::Save(self.main_uuid, builder);
} }
void Load(memgraph::replication::SystemHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/) { void Load(memgraph::replication::SystemHeartbeatReq *self, memgraph::slk::Reader *reader) {
/* Nothing to serialize */ memgraph::slk::Load(&self->main_uuid, reader);
} }
// Serialize code for SystemRecoveryReq // Serialize code for SystemRecoveryReq
void Save(const memgraph::replication::SystemRecoveryReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::replication::SystemRecoveryReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.forced_group_timestamp, builder); memgraph::slk::Save(self.forced_group_timestamp, builder);
memgraph::slk::Save(self.database_configs, builder); memgraph::slk::Save(self.database_configs, builder);
memgraph::slk::Save(self.auth_config, builder); memgraph::slk::Save(self.auth_config, builder);
@ -46,6 +47,7 @@ void Save(const memgraph::replication::SystemRecoveryReq &self, memgraph::slk::B
} }
void Load(memgraph::replication::SystemRecoveryReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::replication::SystemRecoveryReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->forced_group_timestamp, reader); memgraph::slk::Load(&self->forced_group_timestamp, reader);
memgraph::slk::Load(&self->database_configs, reader); memgraph::slk::Load(&self->database_configs, reader);
memgraph::slk::Load(&self->auth_config, reader); memgraph::slk::Load(&self->auth_config, reader);

View File

@ -214,7 +214,6 @@ class Client {
// Build and send the request. // Build and send the request.
slk::Save(req_type.id, handler.GetBuilder()); slk::Save(req_type.id, handler.GetBuilder());
slk::Save(rpc::current_version, handler.GetBuilder()); slk::Save(rpc::current_version, handler.GetBuilder());
TRequestResponse::Request::Save(request, handler.GetBuilder()); TRequestResponse::Request::Save(request, handler.GetBuilder());
// Return the handler to the user. // Return the handler to the user.

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd. // Copyright 2024 Memgraph Ltd.
// //
// Use of this software is governed by the Business Source License // Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -28,6 +28,9 @@ constexpr auto v1 = Version{2023'10'30'0'2'13};
// for any TypeIds that get added. // for any TypeIds that get added.
constexpr auto v2 = Version{2023'12'07'0'2'14}; constexpr auto v2 = Version{2023'12'07'0'2'14};
constexpr auto current_version = v2; // To each RPC main uuid was added
constexpr auto v3 = Version{2024'02'02'0'2'14};
constexpr auto current_version = v3;
} // namespace memgraph::rpc } // namespace memgraph::rpc

View File

@ -18,6 +18,7 @@
#include "storage/v2/inmemory/storage.hpp" #include "storage/v2/inmemory/storage.hpp"
#include "storage/v2/replication/recovery.hpp" #include "storage/v2/replication/recovery.hpp"
#include "utils/on_scope_exit.hpp" #include "utils/on_scope_exit.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp" #include "utils/variant_helpers.hpp"
namespace memgraph::storage { namespace memgraph::storage {
@ -26,7 +27,8 @@ namespace memgraph::storage {
// contained in the internal buffer and the file. // contained in the internal buffer and the file.
class InMemoryCurrentWalHandler { class InMemoryCurrentWalHandler {
public: public:
explicit InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client); explicit InMemoryCurrentWalHandler(const utils::UUID &main_uuid, InMemoryStorage const *storage,
rpc::Client &rpc_client);
void AppendFilename(const std::string &filename); void AppendFilename(const std::string &filename);
void AppendSize(size_t size); void AppendSize(size_t size);
@ -43,8 +45,9 @@ class InMemoryCurrentWalHandler {
}; };
////// CurrentWalHandler ////// ////// CurrentWalHandler //////
InMemoryCurrentWalHandler::InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client) InMemoryCurrentWalHandler::InMemoryCurrentWalHandler(const utils::UUID &main_uuid, InMemoryStorage const *storage,
: stream_(rpc_client.Stream<replication::CurrentWalRpc>(storage->uuid())) {} rpc::Client &rpc_client)
: stream_(rpc_client.Stream<replication::CurrentWalRpc>(main_uuid, storage->uuid())) {}
void InMemoryCurrentWalHandler::AppendFilename(const std::string &filename) { void InMemoryCurrentWalHandler::AppendFilename(const std::string &filename) {
replication::Encoder encoder(stream_.GetBuilder()); replication::Encoder encoder(stream_.GetBuilder());
@ -69,10 +72,10 @@ void InMemoryCurrentWalHandler::AppendBufferData(const uint8_t *buffer, const si
replication::CurrentWalRes InMemoryCurrentWalHandler::Finalize() { return stream_.AwaitResponse(); } replication::CurrentWalRes InMemoryCurrentWalHandler::Finalize() { return stream_.AwaitResponse(); }
////// ReplicationClient Helpers ////// ////// ReplicationClient Helpers //////
replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &client, replication::WalFilesRes TransferWalFiles(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::vector<std::filesystem::path> &wal_files) { const std::vector<std::filesystem::path> &wal_files) {
MG_ASSERT(!wal_files.empty(), "Wal files list is empty!"); MG_ASSERT(!wal_files.empty(), "Wal files list is empty!");
auto stream = client.Stream<replication::WalFilesRpc>(uuid, wal_files.size()); auto stream = client.Stream<replication::WalFilesRpc>(main_uuid, uuid, wal_files.size());
replication::Encoder encoder(stream.GetBuilder()); replication::Encoder encoder(stream.GetBuilder());
for (const auto &wal : wal_files) { for (const auto &wal : wal_files) {
spdlog::debug("Sending wal file: {}", wal); spdlog::debug("Sending wal file: {}", wal);
@ -81,16 +84,17 @@ replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &
return stream.AwaitResponse(); return stream.AwaitResponse();
} }
replication::SnapshotRes TransferSnapshot(const utils::UUID &uuid, rpc::Client &client, replication::SnapshotRes TransferSnapshot(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::filesystem::path &path) { const std::filesystem::path &path) {
auto stream = client.Stream<replication::SnapshotRpc>(uuid); auto stream = client.Stream<replication::SnapshotRpc>(main_uuid, uuid);
replication::Encoder encoder(stream.GetBuilder()); replication::Encoder encoder(stream.GetBuilder());
encoder.WriteFile(path); encoder.WriteFile(path);
return stream.AwaitResponse(); return stream.AwaitResponse();
} }
uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file) { uint64_t ReplicateCurrentWal(const utils::UUID &main_uuid, const InMemoryStorage *storage, rpc::Client &client,
InMemoryCurrentWalHandler stream{storage, client}; durability::WalFile const &wal_file) {
InMemoryCurrentWalHandler stream{main_uuid, storage, client};
stream.AppendFilename(wal_file.Path().filename()); stream.AppendFilename(wal_file.Path().filename());
utils::InputFile file; utils::InputFile file;
MG_ASSERT(file.Open(wal_file.Path()), "Failed to open current WAL file at {}!", wal_file.Path()); MG_ASSERT(file.Open(wal_file.Path()), "Failed to open current WAL file at {}!", wal_file.Path());

View File

@ -19,13 +19,14 @@ class InMemoryStorage;
////// ReplicationClient Helpers ////// ////// ReplicationClient Helpers //////
replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &client, replication::WalFilesRes TransferWalFiles(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::vector<std::filesystem::path> &wal_files); const std::vector<std::filesystem::path> &wal_files);
replication::SnapshotRes TransferSnapshot(const utils::UUID &uuid, rpc::Client &client, replication::SnapshotRes TransferSnapshot(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::filesystem::path &path); const std::filesystem::path &path);
uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file); uint64_t ReplicateCurrentWal(const utils::UUID &main_uuid, const InMemoryStorage *storage, rpc::Client &client,
durability::WalFile const &wal_file);
auto GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker, auto GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker,
const InMemoryStorage *storage) -> std::vector<RecoveryStep>; const InMemoryStorage *storage) -> std::vector<RecoveryStep>;

View File

@ -1847,6 +1847,7 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
// A single transaction will always be contained in a single WAL file. // A single transaction will always be contained in a single WAL file.
auto current_commit_timestamp = transaction.commit_timestamp->load(std::memory_order_acquire); auto current_commit_timestamp = transaction.commit_timestamp->load(std::memory_order_acquire);
//////// AF only this calls initialize transaction
repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber(), this, db_acc); repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber(), this, db_acc);
auto append_deltas = [&](auto callback) { auto append_deltas = [&](auto callback) {

View File

@ -14,6 +14,7 @@
#include "storage/v2/storage.hpp" #include "storage/v2/storage.hpp"
#include "utils/exceptions.hpp" #include "utils/exceptions.hpp"
#include "utils/on_scope_exit.hpp" #include "utils/on_scope_exit.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp" #include "utils/variant_helpers.hpp"
#include <algorithm> #include <algorithm>
@ -25,8 +26,9 @@ template <typename>
namespace memgraph::storage { namespace memgraph::storage {
ReplicationStorageClient::ReplicationStorageClient(::memgraph::replication::ReplicationClient &client) ReplicationStorageClient::ReplicationStorageClient(::memgraph::replication::ReplicationClient &client,
: client_{client} {} utils::UUID main_uuid)
: client_{client}, main_uuid_(main_uuid) {}
void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAccessProtector db_acc) { void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAccessProtector db_acc) {
uint64_t current_commit_timestamp{kTimestampInitialId}; uint64_t current_commit_timestamp{kTimestampInitialId};
@ -34,14 +36,13 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce
auto &replStorageState = storage->repl_storage_state_; auto &replStorageState = storage->repl_storage_state_;
auto hb_stream{client_.rpc_client_.Stream<replication::HeartbeatRpc>( auto hb_stream{client_.rpc_client_.Stream<replication::HeartbeatRpc>(
storage->uuid(), replStorageState.last_commit_timestamp_, std::string{replStorageState.epoch_.id()})}; main_uuid_, storage->uuid(), replStorageState.last_commit_timestamp_, std::string{replStorageState.epoch_.id()})};
const auto replica = hb_stream.AwaitResponse(); const auto replica = hb_stream.AwaitResponse();
#ifdef MG_ENTERPRISE // Multi-tenancy is only supported in enterprise #ifdef MG_ENTERPRISE // Multi-tenancy is only supported in enterprise
if (!replica.success) { // Replica is missing the current database if (!replica.success) { // Replica is missing the current database
client_.state_.WithLock([&](auto &state) { client_.state_.WithLock([&](auto &state) {
spdlog::debug("Replica '{}' missing database '{}' - '{}'", client_.name_, storage->name(), spdlog::debug("Replica '{}' can't respond or missing database '{}' - '{}'", client_.name_, storage->name(),
std::string{storage->uuid()}); std::string{storage->uuid()});
state = memgraph::replication::ReplicationClient::State::BEHIND; state = memgraph::replication::ReplicationClient::State::BEHIND;
}); });
@ -95,7 +96,7 @@ TimestampInfo ReplicationStorageClient::GetTimestampInfo(Storage const *storage)
info.current_number_of_timestamp_behind_master = 0; info.current_number_of_timestamp_behind_master = 0;
try { try {
auto stream{client_.rpc_client_.Stream<replication::TimestampRpc>(storage->uuid())}; auto stream{client_.rpc_client_.Stream<replication::TimestampRpc>(main_uuid_, storage->uuid())};
const auto response = stream.AwaitResponse(); const auto response = stream.AwaitResponse();
const auto is_success = response.success; const auto is_success = response.success;
@ -173,7 +174,7 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren
case READY: case READY:
MG_ASSERT(!replica_stream_); MG_ASSERT(!replica_stream_);
try { try {
replica_stream_.emplace(storage, client_.rpc_client_, current_wal_seq_num); replica_stream_.emplace(storage, client_.rpc_client_, current_wal_seq_num, main_uuid_);
*locked_state = REPLICATING; *locked_state = REPLICATING;
} catch (const rpc::RpcFailedException &) { } catch (const rpc::RpcFailedException &) {
*locked_state = MAYBE_BEHIND; *locked_state = MAYBE_BEHIND;
@ -183,6 +184,9 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren
} }
} }
//////// AF: you can't finialize transaction replication if you are not replicating
/////// AF: if there is no stream or it is Defunct than we need to set replica in MAYBE_BEHIND -> is that even used
/////// AF:
bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, DatabaseAccessProtector db_acc) { bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, DatabaseAccessProtector db_acc) {
// We can only check the state because it guarantees to be only // We can only check the state because it guarantees to be only
// valid during a single transaction replication (if the assumption // valid during a single transaction replication (if the assumption
@ -256,19 +260,21 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph:
spdlog::trace("Recovering in step: {}", i++); spdlog::trace("Recovering in step: {}", i++);
try { try {
rpc::Client &rpcClient = client_.rpc_client_; rpc::Client &rpcClient = client_.rpc_client_;
std::visit(utils::Overloaded{ std::visit(
[&replica_commit, mem_storage, &rpcClient](RecoverySnapshot const &snapshot) { utils::Overloaded{
[&replica_commit, mem_storage, &rpcClient, main_uuid = main_uuid_](RecoverySnapshot const &snapshot) {
spdlog::debug("Sending the latest snapshot file: {}", snapshot); spdlog::debug("Sending the latest snapshot file: {}", snapshot);
auto response = TransferSnapshot(mem_storage->uuid(), rpcClient, snapshot); auto response = TransferSnapshot(main_uuid, mem_storage->uuid(), rpcClient, snapshot);
replica_commit = response.current_commit_timestamp; replica_commit = response.current_commit_timestamp;
}, },
[&replica_commit, mem_storage, &rpcClient](RecoveryWals const &wals) { [&replica_commit, mem_storage, &rpcClient, main_uuid = main_uuid_](RecoveryWals const &wals) {
spdlog::debug("Sending the latest wal files"); spdlog::debug("Sending the latest wal files");
auto response = TransferWalFiles(mem_storage->uuid(), rpcClient, wals); auto response = TransferWalFiles(main_uuid, mem_storage->uuid(), rpcClient, wals);
replica_commit = response.current_commit_timestamp; replica_commit = response.current_commit_timestamp;
spdlog::debug("Wal files successfully transferred."); spdlog::debug("Wal files successfully transferred.");
}, },
[&replica_commit, mem_storage, &rpcClient](RecoveryCurrentWal const &current_wal) { [&replica_commit, mem_storage, &rpcClient,
main_uuid = main_uuid_](RecoveryCurrentWal const &current_wal) {
std::unique_lock transaction_guard(mem_storage->engine_lock_); std::unique_lock transaction_guard(mem_storage->engine_lock_);
if (mem_storage->wal_file_ && if (mem_storage->wal_file_ &&
mem_storage->wal_file_->SequenceNumber() == current_wal.current_wal_seq_num) { mem_storage->wal_file_->SequenceNumber() == current_wal.current_wal_seq_num) {
@ -276,7 +282,7 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph:
mem_storage->wal_file_->DisableFlushing(); mem_storage->wal_file_->DisableFlushing();
transaction_guard.unlock(); transaction_guard.unlock();
spdlog::debug("Sending current wal file"); spdlog::debug("Sending current wal file");
replica_commit = ReplicateCurrentWal(mem_storage, rpcClient, *mem_storage->wal_file_); replica_commit = ReplicateCurrentWal(main_uuid, mem_storage, rpcClient, *mem_storage->wal_file_);
} else { } else {
spdlog::debug("Cannot recover using current wal file"); spdlog::debug("Cannot recover using current wal file");
} }
@ -314,10 +320,12 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph:
} }
////// ReplicaStream ////// ////// ReplicaStream //////
ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num) ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num,
utils::UUID main_uuid)
: storage_{storage}, : storage_{storage},
stream_(rpc_client.Stream<replication::AppendDeltasRpc>( stream_(rpc_client.Stream<replication::AppendDeltasRpc>(
storage->uuid(), storage->repl_storage_state_.last_commit_timestamp_.load(), current_seq_num)) { main_uuid, storage->uuid(), storage->repl_storage_state_.last_commit_timestamp_.load(), current_seq_num)),
main_uuid_(main_uuid) {
replication::Encoder encoder{stream_.GetBuilder()}; replication::Encoder encoder{stream_.GetBuilder()};
encoder.WriteString(storage->repl_storage_state_.epoch_.id()); encoder.WriteString(storage->repl_storage_state_.epoch_.id());
} }

View File

@ -28,6 +28,7 @@
#include "utils/scheduler.hpp" #include "utils/scheduler.hpp"
#include "utils/synchronized.hpp" #include "utils/synchronized.hpp"
#include "utils/thread_pool.hpp" #include "utils/thread_pool.hpp"
#include "utils/uuid.hpp"
#include <atomic> #include <atomic>
#include <concepts> #include <concepts>
@ -48,7 +49,7 @@ class ReplicationStorageClient;
// Handler used for transferring the current transaction. // Handler used for transferring the current transaction.
class ReplicaStream { class ReplicaStream {
public: public:
explicit ReplicaStream(Storage *storage, rpc::Client &rpc_client, uint64_t current_seq_num); explicit ReplicaStream(Storage *storage, rpc::Client &rpc_client, uint64_t current_seq_num, utils::UUID main_uuid);
/// @throw rpc::RpcFailedException /// @throw rpc::RpcFailedException
void AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t final_commit_timestamp); void AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t final_commit_timestamp);
@ -72,6 +73,7 @@ class ReplicaStream {
private: private:
Storage *storage_; Storage *storage_;
rpc::Client::StreamHandler<replication::AppendDeltasRpc> stream_; rpc::Client::StreamHandler<replication::AppendDeltasRpc> stream_;
utils::UUID main_uuid_;
}; };
template <typename F> template <typename F>
@ -84,7 +86,7 @@ class ReplicationStorageClient {
friend struct ::memgraph::replication::ReplicationClient; friend struct ::memgraph::replication::ReplicationClient;
public: public:
explicit ReplicationStorageClient(::memgraph::replication::ReplicationClient &client); explicit ReplicationStorageClient(::memgraph::replication::ReplicationClient &client, utils::UUID main_uuid);
ReplicationStorageClient(ReplicationStorageClient const &) = delete; ReplicationStorageClient(ReplicationStorageClient const &) = delete;
ReplicationStorageClient &operator=(ReplicationStorageClient const &) = delete; ReplicationStorageClient &operator=(ReplicationStorageClient const &) = delete;
@ -202,6 +204,8 @@ class ReplicationStorageClient {
replica_stream_; // Currently active stream (nullopt if not in use), note: a single stream per rpc client replica_stream_; // Currently active stream (nullopt if not in use), note: a single stream per rpc client
mutable utils::Synchronized<replication::ReplicaState, utils::SpinLock> replica_state_{ mutable utils::Synchronized<replication::ReplicaState, utils::SpinLock> replica_state_{
replication::ReplicaState::MAYBE_BEHIND}; replication::ReplicaState::MAYBE_BEHIND};
const utils::UUID main_uuid_;
}; };
} // namespace memgraph::storage } // namespace memgraph::storage

View File

@ -114,10 +114,12 @@ void Load(memgraph::storage::replication::TimestampRes *self, memgraph::slk::Rea
// Serialize code for TimestampReq // Serialize code for TimestampReq
void Save(const memgraph::storage::replication::TimestampReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::TimestampReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
} }
void Load(memgraph::storage::replication::TimestampReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::TimestampReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
} }
@ -136,10 +138,12 @@ void Load(memgraph::storage::replication::CurrentWalRes *self, memgraph::slk::Re
// Serialize code for CurrentWalReq // Serialize code for CurrentWalReq
void Save(const memgraph::storage::replication::CurrentWalReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::CurrentWalReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
} }
void Load(memgraph::storage::replication::CurrentWalReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::CurrentWalReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
} }
@ -158,11 +162,13 @@ void Load(memgraph::storage::replication::WalFilesRes *self, memgraph::slk::Read
// Serialize code for WalFilesReq // Serialize code for WalFilesReq
void Save(const memgraph::storage::replication::WalFilesReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::WalFilesReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.file_number, builder); memgraph::slk::Save(self.file_number, builder);
} }
void Load(memgraph::storage::replication::WalFilesReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::WalFilesReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->file_number, reader); memgraph::slk::Load(&self->file_number, reader);
} }
@ -182,10 +188,12 @@ void Load(memgraph::storage::replication::SnapshotRes *self, memgraph::slk::Read
// Serialize code for SnapshotReq // Serialize code for SnapshotReq
void Save(const memgraph::storage::replication::SnapshotReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::SnapshotReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
} }
void Load(memgraph::storage::replication::SnapshotReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::SnapshotReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
} }
@ -206,12 +214,14 @@ void Load(memgraph::storage::replication::HeartbeatRes *self, memgraph::slk::Rea
// Serialize code for HeartbeatReq // Serialize code for HeartbeatReq
void Save(const memgraph::storage::replication::HeartbeatReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::HeartbeatReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.main_commit_timestamp, builder); memgraph::slk::Save(self.main_commit_timestamp, builder);
memgraph::slk::Save(self.epoch_id, builder); memgraph::slk::Save(self.epoch_id, builder);
} }
void Load(memgraph::storage::replication::HeartbeatReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::HeartbeatReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->main_commit_timestamp, reader); memgraph::slk::Load(&self->main_commit_timestamp, reader);
memgraph::slk::Load(&self->epoch_id, reader); memgraph::slk::Load(&self->epoch_id, reader);
@ -232,12 +242,14 @@ void Load(memgraph::storage::replication::AppendDeltasRes *self, memgraph::slk::
// Serialize code for AppendDeltasReq // Serialize code for AppendDeltasReq
void Save(const memgraph::storage::replication::AppendDeltasReq &self, memgraph::slk::Builder *builder) { void Save(const memgraph::storage::replication::AppendDeltasReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder); memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.previous_commit_timestamp, builder); memgraph::slk::Save(self.previous_commit_timestamp, builder);
memgraph::slk::Save(self.seq_num, builder); memgraph::slk::Save(self.seq_num, builder);
} }
void Load(memgraph::storage::replication::AppendDeltasReq *self, memgraph::slk::Reader *reader) { void Load(memgraph::storage::replication::AppendDeltasReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader); memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->previous_commit_timestamp, reader); memgraph::slk::Load(&self->previous_commit_timestamp, reader);
memgraph::slk::Load(&self->seq_num, reader); memgraph::slk::Load(&self->seq_num, reader);

View File

@ -32,9 +32,11 @@ struct AppendDeltasReq {
static void Load(AppendDeltasReq *self, memgraph::slk::Reader *reader); static void Load(AppendDeltasReq *self, memgraph::slk::Reader *reader);
static void Save(const AppendDeltasReq &self, memgraph::slk::Builder *builder); static void Save(const AppendDeltasReq &self, memgraph::slk::Builder *builder);
AppendDeltasReq() = default; AppendDeltasReq() = default;
AppendDeltasReq(const utils::UUID &uuid, uint64_t previous_commit_timestamp, uint64_t seq_num) AppendDeltasReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t previous_commit_timestamp,
: uuid{uuid}, previous_commit_timestamp(previous_commit_timestamp), seq_num(seq_num) {} uint64_t seq_num)
: main_uuid{main_uuid}, uuid{uuid}, previous_commit_timestamp(previous_commit_timestamp), seq_num(seq_num) {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
uint64_t previous_commit_timestamp; uint64_t previous_commit_timestamp;
uint64_t seq_num; uint64_t seq_num;
@ -63,9 +65,11 @@ struct HeartbeatReq {
static void Load(HeartbeatReq *self, memgraph::slk::Reader *reader); static void Load(HeartbeatReq *self, memgraph::slk::Reader *reader);
static void Save(const HeartbeatReq &self, memgraph::slk::Builder *builder); static void Save(const HeartbeatReq &self, memgraph::slk::Builder *builder);
HeartbeatReq() = default; HeartbeatReq() = default;
HeartbeatReq(const utils::UUID &uuid, uint64_t main_commit_timestamp, std::string epoch_id) HeartbeatReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t main_commit_timestamp,
: uuid{uuid}, main_commit_timestamp(main_commit_timestamp), epoch_id(std::move(epoch_id)) {} std::string epoch_id)
: main_uuid(main_uuid), uuid{uuid}, main_commit_timestamp(main_commit_timestamp), epoch_id(std::move(epoch_id)) {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
uint64_t main_commit_timestamp; uint64_t main_commit_timestamp;
std::string epoch_id; std::string epoch_id;
@ -95,8 +99,9 @@ struct SnapshotReq {
static void Load(SnapshotReq *self, memgraph::slk::Reader *reader); static void Load(SnapshotReq *self, memgraph::slk::Reader *reader);
static void Save(const SnapshotReq &self, memgraph::slk::Builder *builder); static void Save(const SnapshotReq &self, memgraph::slk::Builder *builder);
SnapshotReq() = default; SnapshotReq() = default;
explicit SnapshotReq(const utils::UUID &uuid) : uuid{uuid} {} explicit SnapshotReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid{main_uuid}, uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
}; };
@ -123,8 +128,10 @@ struct WalFilesReq {
static void Load(WalFilesReq *self, memgraph::slk::Reader *reader); static void Load(WalFilesReq *self, memgraph::slk::Reader *reader);
static void Save(const WalFilesReq &self, memgraph::slk::Builder *builder); static void Save(const WalFilesReq &self, memgraph::slk::Builder *builder);
WalFilesReq() = default; WalFilesReq() = default;
explicit WalFilesReq(const utils::UUID &uuid, uint64_t file_number) : uuid{uuid}, file_number(file_number) {} explicit WalFilesReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t file_number)
: main_uuid{main_uuid}, uuid{uuid}, file_number(file_number) {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
uint64_t file_number; uint64_t file_number;
}; };
@ -152,8 +159,9 @@ struct CurrentWalReq {
static void Load(CurrentWalReq *self, memgraph::slk::Reader *reader); static void Load(CurrentWalReq *self, memgraph::slk::Reader *reader);
static void Save(const CurrentWalReq &self, memgraph::slk::Builder *builder); static void Save(const CurrentWalReq &self, memgraph::slk::Builder *builder);
CurrentWalReq() = default; CurrentWalReq() = default;
explicit CurrentWalReq(const utils::UUID &uuid) : uuid{uuid} {} explicit CurrentWalReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid(main_uuid), uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
}; };
@ -180,8 +188,9 @@ struct TimestampReq {
static void Load(TimestampReq *self, memgraph::slk::Reader *reader); static void Load(TimestampReq *self, memgraph::slk::Reader *reader);
static void Save(const TimestampReq &self, memgraph::slk::Builder *builder); static void Save(const TimestampReq &self, memgraph::slk::Builder *builder);
TimestampReq() = default; TimestampReq() = default;
explicit TimestampReq(const utils::UUID &uuid) : uuid{uuid} {} explicit TimestampReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid(main_uuid), uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid; utils::UUID uuid;
}; };

View File

@ -1,4 +1,4 @@
// Copyright 2024 Memgraph Ltd. // Copyright 2023 Memgraph Ltd.
// //
// Use of this software is governed by the Business Source License // Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -27,7 +27,7 @@ struct ISystemAction {
virtual void DoDurability() = 0; virtual void DoDurability() = 0;
/// Prepare the RPC payload that will be sent to all replicas clients /// Prepare the RPC payload that will be sent to all replicas clients
virtual bool DoReplication(memgraph::replication::ReplicationClient &client, virtual bool DoReplication(memgraph::replication::ReplicationClient &client, const utils::UUID &main_uuid,
memgraph::replication::ReplicationEpoch const &epoch, memgraph::replication::ReplicationEpoch const &epoch,
Transaction const &system_tx) const = 0; Transaction const &system_tx) const = 0;

View File

@ -99,7 +99,7 @@ struct DoReplication {
auto sync_status = AllSyncReplicaStatus::AllCommitsConfirmed; auto sync_status = AllSyncReplicaStatus::AllCommitsConfirmed;
for (auto &client : main_data_.registered_replicas_) { for (auto &client : main_data_.registered_replicas_) {
bool completed = action.DoReplication(client, main_data_.epoch_, system_tx); bool completed = action.DoReplication(client, main_data_.uuid_, main_data_.epoch_, system_tx);
if (!completed && client.mode_ == replication_coordination_glue::ReplicationMode::SYNC) { if (!completed && client.mode_ == replication_coordination_glue::ReplicationMode::SYNC) {
sync_status = AllSyncReplicaStatus::SomeCommitsUnconfirmed; sync_status = AllSyncReplicaStatus::SomeCommitsUnconfirmed;
} }

View File

@ -97,12 +97,16 @@ enum class TypeId : uint64_t {
REP_UPDATE_AUTH_DATA_RES, REP_UPDATE_AUTH_DATA_RES,
REP_DROP_AUTH_DATA_REQ, REP_DROP_AUTH_DATA_REQ,
REP_DROP_AUTH_DATA_RES, REP_DROP_AUTH_DATA_RES,
REP_TRY_SET_MAIN_UUID_REQ,
REP_TRY_SET_MAIN_UUID_RES,
// Coordinator // Coordinator
COORD_FAILOVER_REQ, COORD_FAILOVER_REQ,
COORD_FAILOVER_RES, COORD_FAILOVER_RES,
COORD_SET_REPL_MAIN_REQ, COORD_SET_REPL_MAIN_REQ,
COORD_SET_REPL_MAIN_RES, COORD_SET_REPL_MAIN_RES,
COORD_SWAP_UUID_REQ,
COORD_SWAP_UUID_RES,
// AST // AST
AST_LABELIX = 3000, AST_LABELIX = 3000,

View File

@ -3,6 +3,7 @@ find_package(gflags REQUIRED)
copy_e2e_python_files(ha_experimental coordinator.py) copy_e2e_python_files(ha_experimental coordinator.py)
copy_e2e_python_files(ha_experimental automatic_failover.py) copy_e2e_python_files(ha_experimental automatic_failover.py)
copy_e2e_python_files(ha_experimental manual_setting_replicas.py) copy_e2e_python_files(ha_experimental manual_setting_replicas.py)
copy_e2e_python_files(ha_experimental not_replicate_from_old_main.py)
copy_e2e_python_files(ha_experimental common.py) copy_e2e_python_files(ha_experimental common.py)
copy_e2e_python_files(ha_experimental workloads.yaml) copy_e2e_python_files(ha_experimental workloads.yaml)

View File

@ -13,6 +13,7 @@ import os
import shutil import shutil
import sys import sys
import tempfile import tempfile
import time
import interactive_mg_runner import interactive_mg_runner
import pytest import pytest
@ -131,6 +132,7 @@ def test_replication_works_on_failover():
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas) mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_on_new_main = [ expected_data_on_new_main = [
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), ("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
("instance_3", "127.0.0.1:10003", "sync", 0, 0, "ready"), ("instance_3", "127.0.0.1:10003", "sync", 0, 0, "ready"),
@ -141,8 +143,8 @@ def test_replication_works_on_failover():
execute_and_fetch_all(new_main_cursor, "CREATE ();") execute_and_fetch_all(new_main_cursor, "CREATE ();")
# 6 # 6
alive_replica_cursror = connect(host="localhost", port=7689).cursor() alive_replica_cursor = connect(host="localhost", port=7689).cursor()
res = execute_and_fetch_all(alive_replica_cursror, "MATCH (n) RETURN count(n) as count;")[0][0] res = execute_and_fetch_all(alive_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be replicated" assert res == 1, "Vertex should be replicated"
interactive_mg_runner.stop_all(MEMGRAPH_INSTANCES_DESCRIPTION) interactive_mg_runner.stop_all(MEMGRAPH_INSTANCES_DESCRIPTION)
@ -344,65 +346,60 @@ def test_automatic_failover_main_back_as_replica():
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance3) mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance3)
def test_automatic_failover_main_back_as_main(): def test_replica_instance_restarts_replication_works():
safe_execute(shutil.rmtree, TEMP_DIR) safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") cursor = connect(host="localhost", port=7690).cursor()
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
coord_cursor = connect(host="localhost", port=7690).cursor() def show_repl_cluster():
return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")))
def retrieve_data_show_repl_cluster(): expected_data_up = [
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
expected_data_all_down = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", False, "unknown"),
("instance_3", "127.0.0.1:10013", False, "unknown"),
]
mg_sleep_and_assert(expected_data_all_down, retrieve_data_show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_main_back = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", False, "unknown"),
("instance_3", "127.0.0.1:10013", True, "main"),
]
mg_sleep_and_assert(expected_data_main_back, retrieve_data_show_repl_cluster)
instance3_cursor = connect(host="localhost", port=7687).cursor()
def retrieve_data_show_repl_role_instance3():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
expected_data_replicas_back = [
("instance_1", "127.0.0.1:10011", True, "replica"), ("instance_1", "127.0.0.1:10011", True, "replica"),
("instance_2", "127.0.0.1:10012", True, "replica"), ("instance_2", "127.0.0.1:10012", True, "replica"),
("instance_3", "127.0.0.1:10013", True, "main"), ("instance_3", "127.0.0.1:10013", True, "main"),
] ]
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
mg_sleep_and_assert(expected_data_replicas_back, retrieve_data_show_repl_cluster) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
expected_data_down = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", True, "replica"),
("instance_3", "127.0.0.1:10013", True, "main"),
]
mg_sleep_and_assert(expected_data_down, show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
expected_data_on_main_show_replicas = [
("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"),
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
]
instance3_cursor = connect(host="localhost", port=7687).cursor()
instance1_cursor = connect(host="localhost", port=7688).cursor() instance1_cursor = connect(host="localhost", port=7688).cursor()
instance2_cursor = connect(host="localhost", port=7689).cursor()
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICAS;")))
mg_sleep_and_assert(expected_data_on_main_show_replicas, retrieve_data_show_repl_role_instance1)
def retrieve_data_show_repl_role_instance1(): def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;"))) return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
def retrieve_data_show_repl_role_instance2(): expected_data_replica = [("replica",)]
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;"))) mg_sleep_and_assert(expected_data_replica, retrieve_data_show_repl_role_instance1)
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1) execute_and_fetch_all(instance3_cursor, "CREATE ();")
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3) def retrieve_data_replica():
return execute_and_fetch_all(instance1_cursor, "MATCH (n) RETURN count(n);")[0][0]
expected_data_replica = 1
mg_sleep_and_assert(expected_data_replica, retrieve_data_replica)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,117 @@
# Copyright 2024 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import os
import sys
import interactive_mg_runner
import pytest
from common import execute_and_fetch_all
from mg_utils import mg_sleep_and_assert
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
interactive_mg_runner.PROJECT_DIR = os.path.normpath(
os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..")
)
interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build"))
interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph"))
MEMGRAPH_FIRST_CLUSTER_DESCRIPTION = {
"shared_replica": {
"args": ["--bolt-port", "7688", "--log-level", "TRACE"],
"log_file": "replica2.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
},
"main1": {
"args": ["--bolt-port", "7687", "--log-level", "TRACE"],
"log_file": "main.log",
"setup_queries": ["REGISTER REPLICA shared_replica SYNC TO '127.0.0.1:10001' ;"],
},
}
MEMGRAPH_INSTANCES_DESCRIPTION = {
"replica": {
"args": ["--bolt-port", "7689", "--log-level", "TRACE"],
"log_file": "replica.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
},
"main_2": {
"args": ["--bolt-port", "7690", "--log-level", "TRACE"],
"log_file": "main_2.log",
"setup_queries": [
"REGISTER REPLICA shared_replica SYNC TO '127.0.0.1:10001' ;",
"REGISTER REPLICA replica SYNC TO '127.0.0.1:10002' ; ",
],
},
}
def test_replication_works_on_failover(connection):
# Goal of this test is to check that after changing `shared_replica`
# to be part of new cluster, `main` (old cluster) can't write any more to it
# 1
interactive_mg_runner.start_all_keep_others(MEMGRAPH_FIRST_CLUSTER_DESCRIPTION)
# 2
main_cursor = connection(7687, "main1").cursor()
expected_data_on_main = [
("shared_replica", "127.0.0.1:10001", "sync", 0, 0, "ready"),
]
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
interactive_mg_runner.start_all_keep_others(MEMGRAPH_INSTANCES_DESCRIPTION)
# 4
new_main_cursor = connection(7690, "main_2").cursor()
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
expected_data_on_new_main = [
("replica", "127.0.0.1:10002", "sync", 0, 0, "ready"),
("shared_replica", "127.0.0.1:10001", "sync", 0, 0, "ready"),
]
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
# 5
shared_replica_cursor = connection(7688, "shared_replica").cursor()
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, "CREATE ();")
assert (
str(e.value)
== "Replication Exception: At least one SYNC replica has not confirmed committing last transaction. Check the status of the replicas using 'SHOW REPLICAS' query."
)
res = execute_and_fetch_all(main_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be created"
res = execute_and_fetch_all(shared_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 0, "Vertex shouldn't be replicated"
# 7
execute_and_fetch_all(new_main_cursor, "CREATE ();")
res = execute_and_fetch_all(new_main_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be created"
res = execute_and_fetch_all(shared_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be replicated"
interactive_mg_runner.stop_all()
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -35,3 +35,7 @@ workloads:
- name: "Disabled manual setting of replication cluster" - name: "Disabled manual setting of replication cluster"
binary: "tests/e2e/pytest_runner.sh" binary: "tests/e2e/pytest_runner.sh"
args: ["high_availability_experimental/manual_setting_replicas.py"] args: ["high_availability_experimental/manual_setting_replicas.py"]
- name: "Not replicate from old main"
binary: "tests/e2e/pytest_runner.sh"
args: ["high_availability_experimental/not_replicate_from_old_main.py"]

View File

@ -208,6 +208,11 @@ def start_all(context, procdir="", keep_directories=True):
start_instance(context, key, procdir) start_instance(context, key, procdir)
def start_all_keep_others(context, procdir="", keep_directories=True):
for key, _ in context.items():
start_instance(context, key, procdir)
def start(context, name, procdir=""): def start(context, name, procdir=""):
if name != "all": if name != "all":
start_instance(context, name, procdir) start_instance(context, name, procdir)

View File

@ -13,6 +13,7 @@
#include "replication/state.hpp" #include "replication/state.hpp"
#include "replication/status.hpp" #include "replication/status.hpp"
#include "utils/logging.hpp" #include "utils/logging.hpp"
#include "utils/uuid.hpp"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <fstream> #include <fstream>
@ -48,6 +49,17 @@ TEST(ReplicationDurability, V2Main) {
ASSERT_EQ(role_entry, deser); ASSERT_EQ(role_entry, deser);
} }
TEST(ReplicationDurability, V3Main) {
auto const role_entry = ReplicationRoleEntry{
.version = DurabilityVersion::V3,
.role = MainRole{.epoch = ReplicationEpoch{"TEST_STRING"}, .main_uuid = memgraph::utils::UUID{}}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V1Replica) { TEST(ReplicationDurability, V1Replica) {
auto const role_entry = auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V1, ReplicationRoleEntry{.version = DurabilityVersion::V1,
@ -74,6 +86,33 @@ TEST(ReplicationDurability, V2Replica) {
ASSERT_EQ(role_entry, deser); ASSERT_EQ(role_entry, deser);
} }
TEST(ReplicationDurability, V3ReplicaNoMain) {
auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V3,
.role = ReplicaRole{
.config = ReplicationServerConfig{.ip_address = "000.123.456.789", .port = 2023},
}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V3ReplicaMain) {
auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V2,
.role = ReplicaRole{
.config = ReplicationServerConfig{.ip_address = "000.123.456.789", .port = 2023},
.main_uuid = memgraph::utils::UUID{},
}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, ReplicaEntrySync) { TEST(ReplicationDurability, ReplicaEntrySync) {
using namespace std::chrono_literals; using namespace std::chrono_literals;
using namespace std::string_literals; using namespace std::string_literals;

View File

@ -142,17 +142,21 @@ TEST_F(ReplicationTest, BasicSynchronousReplicationTest) {
MinMemgraph replica(repl_conf); MinMemgraph replica(repl_conf);
auto replica_store_handler = replica.repl_handler; auto replica_store_handler = replica.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
const auto &reg = main.repl_handler.TryRegisterReplica(ReplicationClientConfig{ const auto &reg = main.repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA", .name = "REPLICA",
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
true);
ASSERT_FALSE(reg.HasError()) << (int)reg.GetError(); ASSERT_FALSE(reg.HasError()) << (int)reg.GetError();
// vertex create // vertex create
@ -435,30 +439,38 @@ TEST_F(ReplicationTest, MultipleSynchronousReplicationTest) {
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ std::nullopt);
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}) },
true)
.HasError()); .HasError());
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}) },
true)
.HasError()); .HasError());
const auto *vertex_label = "label"; const auto *vertex_label = "label";
@ -585,17 +597,21 @@ TEST_F(ReplicationTest, RecoveryProcess) {
MinMemgraph replica(repl_conf); MinMemgraph replica(repl_conf);
auto replica_store_handler = replica.repl_handler; auto replica_store_handler = replica.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}) },
true)
.HasError()); .HasError());
ASSERT_EQ(main.db.storage()->GetReplicaState(replicas[0]), ReplicaState::RECOVERY); ASSERT_EQ(main.db.storage()->GetReplicaState(replicas[0]), ReplicaState::RECOVERY);
@ -660,18 +676,22 @@ TEST_F(ReplicationTest, BasicAsynchronousReplicationTest) {
MinMemgraph replica_async(repl_conf); MinMemgraph replica_async(repl_conf);
auto replica_store_handler = replica_async.repl_handler; auto replica_store_handler = replica_async.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA_ASYNC", .name = "REPLICA_ASYNC",
.mode = ReplicationMode::ASYNC, .mode = ReplicationMode::ASYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}) },
true)
.HasError()); .HasError());
static constexpr size_t vertices_create_num = 10; static constexpr size_t vertices_create_num = 10;
@ -706,33 +726,41 @@ TEST_F(ReplicationTest, EpochTest) {
MinMemgraph main(main_conf); MinMemgraph main(main_conf);
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = 10001, .port = 10001,
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}) },
true)
.HasError()); .HasError());
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = 10001, .port = 10001,
}) },
true)
.HasError()); .HasError());
std::optional<Gid> vertex_gid; std::optional<Gid> vertex_gid;
@ -761,12 +789,14 @@ TEST_F(ReplicationTest, EpochTest) {
ASSERT_TRUE(replica1.repl_handler.SetReplicationRoleMain()); ASSERT_TRUE(replica1.repl_handler.SetReplicationRoleMain());
ASSERT_FALSE(replica1.repl_handler ASSERT_FALSE(replica1.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = 10001, .port = 10001,
}) },
true)
.HasError()); .HasError());
@ -789,17 +819,21 @@ TEST_F(ReplicationTest, EpochTest) {
ASSERT_FALSE(acc->Commit().HasError()); ASSERT_FALSE(acc->Commit().HasError());
} }
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
ASSERT_TRUE(main.repl_handler ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}) },
true)
.HasError()); .HasError());
@ -824,35 +858,43 @@ TEST_F(ReplicationTest, ReplicationInformation) {
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
uint16_t replica1_port = 10001; uint16_t replica1_port = 10001;
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = replica1_port, .port = replica1_port,
}); },
std::nullopt);
uint16_t replica2_port = 10002; uint16_t replica2_port = 10002;
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = replica2_port, .port = replica2_port,
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = replica1_port, .port = replica1_port,
}) },
true)
.HasError()); .HasError());
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::ASYNC, .mode = ReplicationMode::ASYNC,
.ip_address = local_host, .ip_address = local_host,
.port = replica2_port, .port = replica2_port,
}) },
true)
.HasError()); .HasError());
@ -881,33 +923,41 @@ TEST_F(ReplicationTest, ReplicationReplicaWithExistingName) {
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
uint16_t replica1_port = 10001; uint16_t replica1_port = 10001;
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = replica1_port, .port = replica1_port,
}); },
std::nullopt);
uint16_t replica2_port = 10002; uint16_t replica2_port = 10002;
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = replica2_port, .port = replica2_port,
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = replica1_port, .port = replica1_port,
}) },
true)
.HasError()); .HasError());
ASSERT_TRUE(main.repl_handler ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::ASYNC, .mode = ReplicationMode::ASYNC,
.ip_address = local_host, .ip_address = local_host,
.port = replica2_port, .port = replica2_port,
}) },
true)
.GetError() == RegisterReplicaError::NAME_EXISTS); .GetError() == RegisterReplicaError::NAME_EXISTS);
} }
@ -916,33 +966,41 @@ TEST_F(ReplicationTest, ReplicationReplicaWithExistingEndPoint) {
MinMemgraph main(main_conf); MinMemgraph main(main_conf);
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = common_port, .port = common_port,
}); },
std::nullopt);
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = common_port, .port = common_port,
}); },
std::nullopt);
ASSERT_FALSE(main.repl_handler ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = common_port, .port = common_port,
}) },
true)
.HasError()); .HasError());
ASSERT_TRUE(main.repl_handler ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::ASYNC, .mode = ReplicationMode::ASYNC,
.ip_address = local_host, .ip_address = local_host,
.port = common_port, .port = common_port,
}) },
true)
.GetError() == RegisterReplicaError::ENDPOINT_EXISTS); .GetError() == RegisterReplicaError::ENDPOINT_EXISTS);
} }
@ -965,30 +1023,38 @@ TEST_F(ReplicationTest, RestoringReplicationAtStartupAfterDroppingReplica) {
std::optional<MinMemgraph> main(main_config); std::optional<MinMemgraph> main(main_config);
MinMemgraph replica1(replica1_config); MinMemgraph replica1(replica1_config);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
MinMemgraph replica2(replica2_config); MinMemgraph replica2(replica2_config);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
std::nullopt);
auto res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{ auto res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
true);
ASSERT_FALSE(res.HasError()) << (int)res.GetError(); ASSERT_FALSE(res.HasError()) << (int)res.GetError();
res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{ res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
true);
ASSERT_FALSE(res.HasError()) << (int)res.GetError(); ASSERT_FALSE(res.HasError()) << (int)res.GetError();
auto replica_infos = main->db.storage()->ReplicasInfo(); auto replica_infos = main->db.storage()->ReplicasInfo();
@ -1022,30 +1088,38 @@ TEST_F(ReplicationTest, RestoringReplicationAtStartup) {
std::optional<MinMemgraph> main(main_config); std::optional<MinMemgraph> main(main_config);
MinMemgraph replica1(repl_conf); MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
std::nullopt);
MinMemgraph replica2(repl2_conf); MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{ replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
auto res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{ std::nullopt);
auto res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0], .name = replicas[0],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}); },
true);
ASSERT_FALSE(res.HasError()); ASSERT_FALSE(res.HasError());
res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{ res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1], .name = replicas[1],
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[1], .port = ports[1],
}); },
true);
ASSERT_FALSE(res.HasError()); ASSERT_FALSE(res.HasError());
auto replica_infos = main->db.storage()->ReplicasInfo(); auto replica_infos = main->db.storage()->ReplicasInfo();
@ -1083,11 +1157,13 @@ TEST_F(ReplicationTest, AddingInvalidReplica) {
MinMemgraph main(main_conf); MinMemgraph main(main_conf);
ASSERT_TRUE(main.repl_handler ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{ .TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA", .name = "REPLICA",
.mode = ReplicationMode::SYNC, .mode = ReplicationMode::SYNC,
.ip_address = local_host, .ip_address = local_host,
.port = ports[0], .port = ports[0],
}) },
.GetError() == RegisterReplicaError::CONNECTION_FAILED); true)
.GetError() == RegisterReplicaError::ERROR_ACCEPTING_MAIN);
} }