Merge branch 'master' into add-logs-to-ci

This commit is contained in:
Marko Barišić 2024-02-07 11:33:27 +01:00 committed by GitHub
commit 28f49a3b90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
70 changed files with 1419 additions and 573 deletions

View File

@ -57,16 +57,19 @@ struct UpdateAuthData : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during Auth execution */
}
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch,
bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override {
auto check_response = [](const replication::UpdateAuthDataRes &response) { return response.success; };
if (user_) {
return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), *user_);
check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
*user_);
}
if (role_) {
return client.SteamAndFinalizeDelta<replication::UpdateAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), *role_);
check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
*role_);
}
// Should never get here
MG_ASSERT(false, "Trying to update auth data that is not a user nor a role");
@ -88,7 +91,8 @@ struct DropAuthData : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during Auth execution */
}
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch,
bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override {
auto check_response = [](const replication::DropAuthDataRes &response) { return response.success; };
@ -102,7 +106,8 @@ struct DropAuthData : memgraph::system::ISystemAction {
break;
}
return client.SteamAndFinalizeDelta<replication::DropAuthDataRpc>(
check_response, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(), type, name_);
check_response, main_uuid, std::string{epoch.id()}, txn.last_committed_system_timestamp(), txn.timestamp(),
type, name_);
}
void PostReplication(replication::RoleMainData &mainData) const override {}

View File

@ -17,8 +17,15 @@
namespace memgraph::auth {
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error(fmt::format("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : ""));
}
#ifdef MG_ENTERPRISE
void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth,
void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder) {
replication::UpdateAuthDataReq req;
memgraph::slk::Load(&req, req_reader);
@ -26,6 +33,12 @@ void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system
using memgraph::replication::UpdateAuthDataRes;
UpdateAuthDataRes res(false);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::UpdateAuthDataReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with
@ -53,7 +66,8 @@ void UpdateAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::slk::Save(res, res_builder);
}
void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth,
void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder) {
replication::DropAuthDataReq req;
memgraph::slk::Load(&req, req_reader);
@ -61,6 +75,12 @@ void DropAuthDataHandler(memgraph::system::ReplicaHandlerAccessToState &system_s
using memgraph::replication::DropAuthDataRes;
DropAuthDataRes res(false);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::DropAuthDataRes::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with
@ -155,14 +175,14 @@ void Register(replication::RoleReplicaData const &data, system::ReplicaHandlerAc
auth::SynchedAuth &auth) {
// NOTE: Register even without license as the user could add a license at run-time
data.server->rpc_server_.Register<replication::UpdateAuthDataRpc>(
[system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
[&data, system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received UpdateAuthDataRpc");
UpdateAuthDataHandler(system_state_access, auth, req_reader, res_builder);
UpdateAuthDataHandler(system_state_access, data.uuid_, auth, req_reader, res_builder);
});
data.server->rpc_server_.Register<replication::DropAuthDataRpc>(
[system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
[&data, system_state_access, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received DropAuthDataRpc");
DropAuthDataHandler(system_state_access, auth, req_reader, res_builder);
DropAuthDataHandler(system_state_access, data.uuid_, auth, req_reader, res_builder);
});
}
#endif

View File

@ -17,10 +17,16 @@
#include "system/state.hpp"
namespace memgraph::auth {
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req);
#ifdef MG_ENTERPRISE
void UpdateAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth,
void UpdateAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder);
void DropAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access, auth::SynchedAuth &auth,
void DropAuthDataHandler(system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, auth::SynchedAuth &auth,
slk::Reader *req_reader, slk::Builder *res_builder);
bool SystemRecoveryHandler(auth::SynchedAuth &auth, auth::Auth::Config auth_config,

View File

@ -89,6 +89,7 @@ void Load(auth::Auth::Config *self, memgraph::slk::Reader *reader) {
// Serialize code for UpdateAuthDataReq
void Save(const memgraph::replication::UpdateAuthDataReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.epoch_id, builder);
memgraph::slk::Save(self.expected_group_timestamp, builder);
memgraph::slk::Save(self.new_group_timestamp, builder);
@ -96,6 +97,7 @@ void Save(const memgraph::replication::UpdateAuthDataReq &self, memgraph::slk::B
memgraph::slk::Save(self.role, builder);
}
void Load(memgraph::replication::UpdateAuthDataReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->epoch_id, reader);
memgraph::slk::Load(&self->expected_group_timestamp, reader);
memgraph::slk::Load(&self->new_group_timestamp, reader);
@ -113,6 +115,7 @@ void Load(memgraph::replication::UpdateAuthDataRes *self, memgraph::slk::Reader
// Serialize code for DropAuthDataReq
void Save(const memgraph::replication::DropAuthDataReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.epoch_id, builder);
memgraph::slk::Save(self.expected_group_timestamp, builder);
memgraph::slk::Save(self.new_group_timestamp, builder);
@ -120,6 +123,7 @@ void Save(const memgraph::replication::DropAuthDataReq &self, memgraph::slk::Bui
memgraph::slk::Save(self.name, builder);
}
void Load(memgraph::replication::DropAuthDataReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->epoch_id, reader);
memgraph::slk::Load(&self->expected_group_timestamp, reader);
memgraph::slk::Load(&self->new_group_timestamp, reader);

View File

@ -27,17 +27,22 @@ struct UpdateAuthDataReq {
static void Load(UpdateAuthDataReq *self, memgraph::slk::Reader *reader);
static void Save(const UpdateAuthDataReq &self, memgraph::slk::Builder *builder);
UpdateAuthDataReq() = default;
UpdateAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, auth::User user)
: epoch_id{std::move(epoch_id)},
UpdateAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
auth::User user)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts},
user{std::move(user)} {}
UpdateAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, auth::Role role)
: epoch_id{std::move(epoch_id)},
UpdateAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
auth::Role role)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts},
role{std::move(role)} {}
utils::UUID main_uuid;
std::string epoch_id;
uint64_t expected_group_timestamp;
uint64_t new_group_timestamp;
@ -69,13 +74,16 @@ struct DropAuthDataReq {
enum class DataType { USER, ROLE };
DropAuthDataReq(std::string epoch_id, uint64_t expected_ts, uint64_t new_ts, DataType type, std::string_view name)
: epoch_id{std::move(epoch_id)},
DropAuthDataReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_ts, uint64_t new_ts,
DataType type, std::string_view name)
: main_uuid(main_uuid),
epoch_id{std::move(epoch_id)},
expected_group_timestamp{expected_ts},
new_group_timestamp{new_ts},
type{type},
name{name} {}
utils::UUID main_uuid;
std::string epoch_id;
uint64_t expected_group_timestamp;
uint64_t new_group_timestamp;

View File

@ -9,6 +9,7 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_client.hpp"
@ -71,16 +72,17 @@ auto CoordinatorClient::SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCal
auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; }
auto CoordinatorClient::SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool {
auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid,
ReplicationClientsInfo replication_clients_info) const -> bool {
try {
auto stream{rpc_client_.Stream<PromoteReplicaToMainRpc>(std::move(replication_clients_info))};
auto stream{rpc_client_.Stream<PromoteReplicaToMainRpc>(uuid, std::move(replication_clients_info))};
if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC failover response!");
spdlog::error("Failed to receive successful PromoteReplicaToMainRpc response!");
return false;
}
return true;
} catch (rpc::RpcFailedException const &) {
spdlog::error("RPC error occurred while sending failover RPC!");
spdlog::error("RPC error occurred while sending PromoteReplicaToMainRpc!");
}
return false;
}
@ -101,5 +103,19 @@ auto CoordinatorClient::DemoteToReplica() const -> bool {
return false;
}
auto CoordinatorClient::SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool {
try {
auto stream{rpc_client_.Stream<replication_coordination_glue::SwapMainUUIDRpc>(uuid)};
if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC swapping of uuid response!");
return false;
}
return true;
} catch (const rpc::RpcFailedException &) {
spdlog::error("RPC error occurred while sending swapping uuid RPC!");
}
return false;
}
} // namespace memgraph::coordination
#endif

View File

@ -11,6 +11,7 @@
#include "coordination/coordinator_instance.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp"
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_data.hpp"
@ -32,60 +33,94 @@ CoordinatorData::CoordinatorData() {
return *instance;
};
replica_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
replica_succ_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing replica successful callback", instance_name);
find_instance(coord_data, instance_name).OnSuccessPing();
auto &instance = find_instance(coord_data, instance_name);
if (!instance.GetMainUUID().has_value() || main_uuid_ != instance.GetMainUUID().value()) {
if (!instance.SendSwapAndUpdateUUID(main_uuid_)) {
spdlog::error(
fmt::format("Failed to swap uuid for replica instance {} which is alive", instance.InstanceName()));
return;
}
}
instance.OnSuccessPing();
};
replica_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing replica failure callback", instance_name);
find_instance(coord_data, instance_name).OnFailPing();
auto &instance = find_instance(coord_data, instance_name);
instance.OnFailPing();
// We need to restart main uuid from instance since it was "down" at least a second
// There is slight delay, if we choose to use isAlive, instance can be down and back up in less than
// our isAlive time difference, which would lead to instance setting UUID to nullopt and stopping accepting any
// incoming RPCs from valid main
// TODO(antoniofilipovic) this needs here more complex logic
// We need to get id of main replica is listening to on successful ping
// and swap it to correct uuid if it failed
instance.SetNewMainUUID();
};
main_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
main_succ_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing main successful callback", instance_name);
auto &instance = find_instance(coord_data, instance_name);
if (instance.IsAlive() || !coord_data->ClusterHasAliveMain_()) {
const auto &instance_uuid = instance.GetMainUUID();
MG_ASSERT(instance_uuid.has_value(), "Instance must have uuid set");
if (main_uuid_ == instance_uuid.value()) {
instance.OnSuccessPing();
return;
}
// TODO(antoniof) make demoteToReplica idempotent since main can be demoted to replica but
// swapUUID can fail
bool const demoted = instance.DemoteToReplica(coord_data->replica_succ_cb_, coord_data->replica_fail_cb_);
if (demoted) {
instance.OnSuccessPing();
spdlog::info("Instance {} demoted to replica", instance_name);
} else {
spdlog::error("Instance {} failed to become replica", instance_name);
return;
}
if (!instance.SendSwapAndUpdateUUID(main_uuid_)) {
spdlog::error(fmt::format("Failed to swap uuid for demoted main instance {}", instance.InstanceName()));
return;
}
};
main_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
main_fail_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
auto lock = std::lock_guard{coord_data->coord_data_lock_};
spdlog::trace("Instance {} performing main failure callback", instance_name);
find_instance(coord_data, instance_name).OnFailPing();
auto &instance = find_instance(coord_data, instance_name);
instance.OnFailPing();
const auto &instance_uuid = instance.GetMainUUID();
MG_ASSERT(instance_uuid.has_value(), "Instance must have uuid set");
if (!coord_data->ClusterHasAliveMain_()) {
if (!instance.IsAlive() && main_uuid_ == instance_uuid.value()) {
spdlog::info("Cluster without main instance, trying automatic failover");
coord_data->TryFailover();
}
};
}
auto CoordinatorData::ClusterHasAliveMain_() const -> bool {
auto const alive_main = [](CoordinatorInstance const &instance) { return instance.IsMain() && instance.IsAlive(); };
return std::ranges::any_of(registered_instances_, alive_main);
}
auto CoordinatorData::TryFailover() -> void {
auto replica_instances = registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica);
std::vector<CoordinatorInstance *> alive_registered_replica_instances{};
std::ranges::transform(registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica) |
ranges::views::filter(&CoordinatorInstance::IsAlive),
std::back_inserter(alive_registered_replica_instances),
[](CoordinatorInstance &instance) { return &instance; });
auto chosen_replica_instance = std::ranges::find_if(replica_instances, &CoordinatorInstance::IsAlive);
if (chosen_replica_instance == replica_instances.end()) {
// TODO(antoniof) more complex logic of choosing replica instance
CoordinatorInstance *chosen_replica_instance =
!alive_registered_replica_instances.empty() ? alive_registered_replica_instances[0] : nullptr;
if (nullptr == chosen_replica_instance) {
spdlog::warn("Failover failed since all replicas are down!");
return;
}
@ -93,21 +128,39 @@ auto CoordinatorData::TryFailover() -> void {
chosen_replica_instance->PauseFrequentCheck();
utils::OnScopeExit scope_exit{[&chosen_replica_instance] { chosen_replica_instance->ResumeFrequentCheck(); }};
std::vector<ReplClientInfo> repl_clients_info;
repl_clients_info.reserve(std::ranges::distance(replica_instances));
utils::UUID potential_new_main_uuid = utils::UUID{};
spdlog::trace("Generated potential new main uuid");
auto const not_chosen_replica_instance = [&chosen_replica_instance](CoordinatorInstance const &instance) {
return instance != *chosen_replica_instance;
auto not_chosen_instance = [chosen_replica_instance](auto *instance) {
return *instance != *chosen_replica_instance;
};
// If for some replicas swap fails, for others on successful ping we will revert back on next change
// or we will do failover first again and then it will be consistent again
for (auto *other_replica_instance : alive_registered_replica_instances | ranges::views::filter(not_chosen_instance)) {
if (!other_replica_instance->SendSwapAndUpdateUUID(potential_new_main_uuid)) {
spdlog::error(fmt::format("Failed to swap uuid for instance {} which is alive, aborting failover",
other_replica_instance->InstanceName()));
return;
}
}
std::ranges::transform(registered_instances_ | ranges::views::filter(not_chosen_replica_instance),
std::vector<ReplClientInfo> repl_clients_info;
repl_clients_info.reserve(registered_instances_.size() - 1);
std::ranges::transform(registered_instances_ | ranges::views::filter([chosen_replica_instance](const auto &instance) {
return *chosen_replica_instance != instance;
}),
std::back_inserter(repl_clients_info),
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
if (!chosen_replica_instance->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
if (!chosen_replica_instance->PromoteToMain(potential_new_main_uuid, std::move(repl_clients_info), main_succ_cb_,
main_fail_cb_)) {
spdlog::warn("Failover failed since promoting replica to main failed!");
return;
}
chosen_replica_instance->SetNewMainUUID(potential_new_main_uuid);
main_uuid_ = potential_new_main_uuid;
spdlog::info("Failover successful! Instance {} promoted to main.", chosen_replica_instance->InstanceName());
}
@ -160,14 +213,28 @@ auto CoordinatorData::SetInstanceToMain(std::string instance_name) -> SetInstanc
auto const is_not_new_main = [&instance_name](CoordinatorInstance const &instance) {
return instance.InstanceName() != instance_name;
};
auto potential_new_main_uuid = utils::UUID{};
spdlog::trace("Generated potential new main uuid");
for (auto &other_instance : registered_instances_ | ranges::views::filter(is_not_new_main)) {
if (!other_instance.SendSwapAndUpdateUUID(potential_new_main_uuid)) {
spdlog::error(
fmt::format("Failed to swap uuid for instance {}, aborting failover", other_instance.InstanceName()));
return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED;
}
}
std::ranges::transform(registered_instances_ | ranges::views::filter(is_not_new_main),
std::back_inserter(repl_clients_info),
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
if (!new_main->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
if (!new_main->PromoteToMain(potential_new_main_uuid, std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
}
new_main->SetNewMainUUID(potential_new_main_uuid);
main_uuid_ = potential_new_main_uuid;
spdlog::info("Instance {} promoted to main", instance_name);
return SetInstanceToMainCoordinatorStatus::SUCCESS;
}

View File

@ -16,6 +16,7 @@
#include "coordination/coordinator_rpc.hpp"
#include "coordination/include/coordination/coordinator_server.hpp"
#include "replication/state.hpp"
namespace memgraph::dbms {
@ -32,6 +33,29 @@ void CoordinatorHandlers::Register(memgraph::coordination::CoordinatorServer &se
spdlog::info("Received DemoteMainToReplicaRpc from coordinator server");
CoordinatorHandlers::DemoteMainToReplicaHandler(replication_handler, req_reader, res_builder);
});
server.Register<replication_coordination_glue::SwapMainUUIDRpc>(
[&replication_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void {
spdlog::info("Received SwapMainUUIDRPC on coordinator server");
CoordinatorHandlers::SwapMainUUIDHandler(replication_handler, req_reader, res_builder);
});
}
void CoordinatorHandlers::SwapMainUUIDHandler(replication::ReplicationHandler &replication_handler,
slk::Reader *req_reader, slk::Builder *res_builder) {
if (!replication_handler.IsReplica()) {
spdlog::error("Setting main uuid must be performed on replica.");
slk::Save(replication_coordination_glue::SwapMainUUIDRes{false}, res_builder);
return;
}
replication_coordination_glue::SwapMainUUIDReq req;
slk::Load(&req, req_reader);
spdlog::info(fmt::format("Set replica data UUID to main uuid {}", std::string(req.uuid)));
std::get<memgraph::replication::RoleReplicaData>(replication_handler.GetReplState().ReplicationData()).uuid_ =
req.uuid;
slk::Save(replication_coordination_glue::SwapMainUUIDRes{true}, res_builder);
}
void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler,
@ -51,7 +75,7 @@ void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHan
.ip_address = req.replication_client_info.replication_ip_address,
.port = req.replication_client_info.replication_port};
if (!replication_handler.SetReplicationRoleReplica(clients_config)) {
if (!replication_handler.SetReplicationRoleReplica(clients_config, std::nullopt)) {
spdlog::error("Demoting main to replica failed!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
@ -67,18 +91,17 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
}
coordination::PromoteReplicaToMainReq req;
slk::Load(&req, req_reader);
// This can fail because of disk. If it does, the cluster state could get inconsistent.
// We don't handle disk issues.
if (!replication_handler.DoReplicaToMainPromotion()) {
if (const bool success = replication_handler.DoReplicaToMainPromotion(req.main_uuid_); !success) {
spdlog::error("Promoting replica to main failed!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
}
coordination::PromoteReplicaToMainReq req;
slk::Load(&req, req_reader);
auto const converter = [](const auto &repl_info_config) {
return replication::ReplicationClientConfig{
.name = repl_info_config.instance_name,
@ -90,7 +113,7 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
// registering replicas
for (auto const &config : req.replication_clients_info | ranges::views::transform(converter)) {
auto instance_client = replication_handler.RegisterReplica(config);
auto instance_client = replication_handler.RegisterReplica(config, false);
if (instance_client.HasError()) {
using enum memgraph::replication::RegisterReplicaError;
switch (instance_client.GetError()) {
@ -109,13 +132,17 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
spdlog::error("Registered replica could not be persisted!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
case memgraph::query::RegisterReplicaError::ERROR_ACCEPTING_MAIN:
spdlog::error("Replica didn't accept change of main!");
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
return;
case memgraph::query::RegisterReplicaError::CONNECTION_FAILED:
// Connection failure is not a fatal error
break;
}
}
}
spdlog::error(fmt::format("FICO : Promote replica to main was success {}", std::string(req.main_uuid_)));
slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder);
}

View File

@ -49,9 +49,9 @@ auto CoordinatorInstance::IsMain() const -> bool {
return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN;
}
auto CoordinatorInstance::PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
HealthCheckCallback main_fail_cb) -> bool {
if (!client_.SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) {
auto CoordinatorInstance::PromoteToMain(utils::UUID uuid, ReplicationClientsInfo repl_clients_info,
HealthCheckCallback main_succ_cb, HealthCheckCallback main_fail_cb) -> bool {
if (!client_.SendPromoteReplicaToMainRpc(uuid, std::move(repl_clients_info))) {
return false;
}
@ -80,5 +80,17 @@ auto CoordinatorInstance::ReplicationClientInfo() const -> CoordinatorClientConf
return client_.ReplicationClientInfo();
}
auto CoordinatorInstance::GetClient() -> CoordinatorClient & { return client_; }
void CoordinatorInstance::SetNewMainUUID(const std::optional<utils::UUID> &main_uuid) { main_uuid_ = main_uuid; }
auto CoordinatorInstance::GetMainUUID() -> const std::optional<utils::UUID> & { return main_uuid_; }
auto CoordinatorInstance::SendSwapAndUpdateUUID(const utils::UUID &main_uuid) -> bool {
if (!replication_coordination_glue::SendSwapMainUUIDRpc(client_.RpcClient(), main_uuid)) {
return false;
}
SetNewMainUUID(main_uuid_);
return true;
}
} // namespace memgraph::coordination
#endif

View File

@ -77,10 +77,12 @@ void Load(memgraph::coordination::PromoteReplicaToMainRes *self, memgraph::slk::
}
void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid_, builder);
memgraph::slk::Save(self.replication_clients_info, builder);
}
void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid_, reader);
memgraph::slk::Load(&self->replication_clients_info, reader);
}

View File

@ -12,7 +12,7 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_server.hpp"
#include "replication_coordination_glue/messages.hpp"
#include "replication_coordination_glue/handler.hpp"
namespace memgraph::coordination {

View File

@ -11,6 +11,7 @@
#pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
@ -44,13 +45,20 @@ class CoordinatorClient {
auto InstanceName() const -> std::string;
auto SocketAddress() const -> std::string;
[[nodiscard]] auto SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool;
[[nodiscard]] auto DemoteToReplica() const -> bool;
auto SendPromoteReplicaToMainRpc(const utils::UUID &uuid, ReplicationClientsInfo replication_clients_info) const
-> bool;
auto SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool;
auto ReplicationClientInfo() const -> ReplClientInfo;
auto SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void;
auto RpcClient() -> rpc::Client & { return rpc_client_; }
friend bool operator==(CoordinatorClient const &first, CoordinatorClient const &second) {
return first.config_ == second.config_;
}

View File

@ -11,17 +11,18 @@
#pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include <list>
#include "coordination/coordinator_instance.hpp"
#include "coordination/coordinator_instance_status.hpp"
#include "coordination/coordinator_server.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "utils/rw_lock.hpp"
#include "utils/thread_pool.hpp"
#include <list>
namespace memgraph::coordination {
class CoordinatorData {
public:
@ -36,12 +37,11 @@ class CoordinatorData {
auto ShowInstances() const -> std::vector<CoordinatorInstanceStatus>;
private:
auto ClusterHasAliveMain_() const -> bool;
mutable utils::RWLock coord_data_lock_{utils::RWLock::Priority::READ};
HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_;
// NOTE: Must be std::list because we rely on pointer stability
std::list<CoordinatorInstance> registered_instances_;
utils::UUID main_uuid_;
};
struct CoordinatorMainReplicaData {

View File

@ -31,6 +31,8 @@ class CoordinatorHandlers {
slk::Builder *res_builder);
static void DemoteMainToReplicaHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader,
slk::Builder *res_builder);
static void SwapMainUUIDHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader, slk::Builder *res_builder);
};
} // namespace memgraph::dbms

View File

@ -16,6 +16,7 @@
#include "coordination/coordinator_client.hpp"
#include "coordination/coordinator_cluster_config.hpp"
#include "coordination/coordinator_exceptions.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "replication_coordination_glue/role.hpp"
namespace memgraph::coordination {
@ -44,7 +45,7 @@ class CoordinatorInstance {
auto IsReplica() const -> bool;
auto IsMain() const -> bool;
auto PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
auto PromoteToMain(utils::UUID main_uuid, ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
HealthCheckCallback main_fail_cb) -> bool;
auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool;
@ -53,11 +54,25 @@ class CoordinatorInstance {
auto ReplicationClientInfo() const -> ReplClientInfo;
auto GetClient() -> CoordinatorClient &;
void SetNewMainUUID(const std::optional<utils::UUID> &main_uuid = std::nullopt);
auto GetMainUUID() -> const std::optional<utils::UUID> &;
auto SendSwapAndUpdateUUID(const utils::UUID &main_uuid) -> bool;
private:
CoordinatorClient client_;
replication_coordination_glue::ReplicationRole replication_role_;
std::chrono::system_clock::time_point last_response_time_{};
// TODO this needs to be atomic? What if instance is alive and then we read it and it has changed
bool is_alive_{false};
// for replica this is main uuid of current main
// for "main" main this same as in CoordinatorData
// it is set to nullopt when replica is down
// TLDR; when replica is down and comes back up we reset uuid of main replica is listening to
// so we need to send swap uuid again
std::optional<utils::UUID> main_uuid_;
friend bool operator==(CoordinatorInstance const &first, CoordinatorInstance const &second) {
return first.client_ == second.client_ && first.replication_role_ == second.replication_role_;

View File

@ -11,6 +11,7 @@
#pragma once
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
@ -26,10 +27,13 @@ struct PromoteReplicaToMainReq {
static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder);
explicit PromoteReplicaToMainReq(std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info)
: replication_clients_info(std::move(replication_clients_info)) {}
explicit PromoteReplicaToMainReq(const utils::UUID &uuid,
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info)
: main_uuid_(uuid), replication_clients_info(std::move(replication_clients_info)) {}
PromoteReplicaToMainReq() = default;
// get uuid here
utils::UUID main_uuid_;
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info;
};
@ -83,22 +87,19 @@ using DemoteMainToReplicaRpc = rpc::RequestResponse<DemoteMainToReplicaReq, Demo
// SLK serialization declarations
namespace memgraph::slk {
// PromoteReplicaToMainRpc
void Save(const memgraph::coordination::PromoteReplicaToMainRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::PromoteReplicaToMainRes *self, memgraph::slk::Reader *reader);
void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
// DemoteMainToReplicaRpc
void Save(const memgraph::coordination::DemoteMainToReplicaRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::DemoteMainToReplicaRes *self, memgraph::slk::Reader *reader);
void Save(const memgraph::coordination::DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::coordination::DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader);
} // namespace memgraph::slk
#endif

View File

@ -30,6 +30,7 @@ enum class SetInstanceToMainCoordinatorStatus : uint8_t {
NOT_COORDINATOR,
SUCCESS,
COULD_NOT_PROMOTE_TO_MAIN,
SWAP_UUID_FAILED
};
} // namespace memgraph::coordination

View File

@ -38,6 +38,8 @@ std::string RegisterReplicaErrorToString(query::RegisterReplicaError error) {
return "CONNECTION_FAILED";
case COULD_NOT_BE_PERSISTED:
return "COULD_NOT_BE_PERSISTED";
case ERROR_ACCEPTING_MAIN:
return "ERROR_ACCEPTING_MAIN";
}
}
@ -52,7 +54,7 @@ void RestoreReplication(replication::RoleMainData &mainData, DatabaseAccess db_a
spdlog::info("Replica {} restoration started for {}.", instance_client.name_, db_acc->name());
const auto &ret = db_acc->storage()->repl_storage_state_.replication_clients_.WithLock(
[&, db_acc](auto &storage_clients) mutable -> utils::BasicResult<query::RegisterReplicaError> {
auto client = std::make_unique<storage::ReplicationStorageClient>(instance_client);
auto client = std::make_unique<storage::ReplicationStorageClient>(instance_client, mainData.uuid_);
auto *storage = db_acc->storage();
client->Start(storage, std::move(db_acc));
// After start the storage <-> replica state should be READY or RECOVERING (if correctly started)
@ -239,14 +241,16 @@ struct DropDatabase : memgraph::system::ISystemAction {
void DoDurability() override { /* Done during DBMS execution */
}
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch,
bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override {
auto check_response = [](const storage::replication::DropDatabaseRes &response) {
return response.result != storage::replication::DropDatabaseRes::Result::FAILURE;
};
return client.SteamAndFinalizeDelta<storage::replication::DropDatabaseRpc>(
check_response, epoch.id(), txn.last_committed_system_timestamp(), txn.timestamp(), uuid_);
check_response, main_uuid, std::string(epoch.id()), txn.last_committed_system_timestamp(), txn.timestamp(),
uuid_);
}
void PostReplication(replication::RoleMainData &mainData) const override {}
@ -323,14 +327,16 @@ struct CreateDatabase : memgraph::system::ISystemAction {
// Done during dbms execution
}
bool DoReplication(replication::ReplicationClient &client, replication::ReplicationEpoch const &epoch,
bool DoReplication(replication::ReplicationClient &client, const utils::UUID &main_uuid,
replication::ReplicationEpoch const &epoch,
memgraph::system::Transaction const &txn) const override {
auto check_response = [](const storage::replication::CreateDatabaseRes &response) {
return response.result != storage::replication::CreateDatabaseRes::Result::FAILURE;
};
return client.SteamAndFinalizeDelta<storage::replication::CreateDatabaseRpc>(
check_response, epoch.id(), txn.last_committed_system_timestamp(), txn.timestamp(), config_);
check_response, main_uuid, std::string(epoch.id()), txn.last_committed_system_timestamp(), txn.timestamp(),
config_);
}
void PostReplication(replication::RoleMainData &mainData) const override {

View File

@ -29,6 +29,7 @@
#include "kvstore/kvstore.hpp"
#include "license/license.hpp"
#include "replication/replication_client.hpp"
#include "replication_coordination_glue/handler.hpp"
#include "storage/v2/config.hpp"
#include "storage/v2/transaction.hpp"
#include "system/system.hpp"
@ -261,6 +262,16 @@ class DbmsHandler {
#endif
}
replication::ReplicationState &ReplicationState() { return repl_state_; }
replication::ReplicationState const &ReplicationState() const { return repl_state_; }
bool IsMain() const { return repl_state_.IsMain(); }
bool IsReplica() const { return repl_state_.IsReplica(); }
#ifdef MG_ENTERPRISE
// coordination::CoordinatorState &CoordinatorState() { return coordinator_state_; }
#endif
/**
* @brief Return the statistics all databases.
*

View File

@ -76,47 +76,84 @@ std::optional<DatabaseAccess> GetDatabaseAccessor(dbms::DbmsHandler *dbms_handle
return std::nullopt;
}
}
void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
} // namespace
void InMemoryReplicationHandlers::Register(dbms::DbmsHandler *dbms_handler, replication::ReplicationServer &server) {
server.rpc_server_.Register<storage::replication::HeartbeatRpc>([dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received HeartbeatRpc");
InMemoryReplicationHandlers::HeartbeatHandler(dbms_handler, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::AppendDeltasRpc>(
[dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received AppendDeltasRpc");
InMemoryReplicationHandlers::AppendDeltasHandler(dbms_handler, req_reader, res_builder);
void InMemoryReplicationHandlers::Register(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &data) {
auto &server = *data.server;
server.rpc_server_.Register<storage::replication::HeartbeatRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received HeartbeatRpc");
InMemoryReplicationHandlers::HeartbeatHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::AppendDeltasRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received AppendDeltasRpc");
InMemoryReplicationHandlers::AppendDeltasHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::SnapshotRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SnapshotRpc");
InMemoryReplicationHandlers::SnapshotHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::WalFilesRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received WalFilesRpc");
InMemoryReplicationHandlers::WalFilesHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::CurrentWalRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received CurrentWalRpc");
InMemoryReplicationHandlers::CurrentWalHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::TimestampRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received TimestampRpc");
InMemoryReplicationHandlers::TimestampHandler(dbms_handler, data.uuid_, req_reader, res_builder);
});
server.rpc_server_.Register<replication_coordination_glue::SwapMainUUIDRpc>(
[&data, dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SwapMainUUIDHandler");
InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms_handler, data, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::SnapshotRpc>([dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SnapshotRpc");
InMemoryReplicationHandlers::SnapshotHandler(dbms_handler, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::WalFilesRpc>([dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received WalFilesRpc");
InMemoryReplicationHandlers::WalFilesHandler(dbms_handler, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::CurrentWalRpc>([dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received CurrentWalRpc");
InMemoryReplicationHandlers::CurrentWalHandler(dbms_handler, req_reader, res_builder);
});
server.rpc_server_.Register<storage::replication::TimestampRpc>([dbms_handler](auto *req_reader, auto *res_builder) {
spdlog::debug("Received TimestampRpc");
InMemoryReplicationHandlers::TimestampHandler(dbms_handler, req_reader, res_builder);
});
}
void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler,
replication::RoleReplicaData &role_replica_data,
slk::Reader *req_reader, slk::Builder *res_builder) {
if (!dbms_handler->IsReplica()) {
spdlog::error("Setting main uuid must be performed on replica.");
slk::Save(replication_coordination_glue::SwapMainUUIDRes{false}, res_builder);
return;
}
replication_coordination_glue::SwapMainUUIDReq req;
slk::Load(&req, req_reader);
spdlog::info(fmt::format("Set replica data UUID to main uuid {}", std::string(req.uuid)));
dbms_handler->ReplicationState().TryPersistRoleReplica(role_replica_data.config, req.uuid);
role_replica_data.uuid_ = req.uuid;
slk::Save(replication_coordination_glue::SwapMainUUIDRes{true}, res_builder);
}
void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::HeartbeatReq req;
slk::Load(&req, req_reader);
auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
if (!db_acc) {
if (!current_main_uuid.has_value() || req.main_uuid != *current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::HeartbeatReq::kType.name);
storage::replication::HeartbeatRes res{false, 0, ""};
slk::Save(res, res_builder);
return;
}
// TODO: this handler is agnostic of InMemory, move to be reused by on-disk
auto const *storage = db_acc->get()->storage();
storage::replication::HeartbeatRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load(),
@ -124,10 +161,19 @@ void InMemoryReplicationHandlers::HeartbeatHandler(dbms::DbmsHandler *dbms_handl
slk::Save(res, res_builder);
}
void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::AppendDeltasReq req;
slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::AppendDeltasReq::kType.name);
storage::replication::AppendDeltasRes res{false, 0};
slk::Save(res, res_builder);
return;
}
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
if (!db_acc) {
storage::replication::AppendDeltasRes res{false, 0};
@ -187,8 +233,9 @@ void InMemoryReplicationHandlers::AppendDeltasHandler(dbms::DbmsHandler *dbms_ha
spdlog::debug("Replication recovery from append deltas finished, replica is now up to date!");
}
void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::SnapshotReq req;
slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -197,6 +244,12 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle
slk::Save(res, res_builder);
return;
}
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::SnapshotReq::kType.name);
storage::replication::SnapshotRes res{false, 0};
slk::Save(res, res_builder);
return;
}
storage::replication::Decoder decoder(req_reader);
@ -270,8 +323,9 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle
spdlog::debug("Replication recovery from snapshot finished!");
}
void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::WalFilesReq req;
slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -280,6 +334,12 @@ void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handle
slk::Save(res, res_builder);
return;
}
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::WalFilesReq::kType.name);
storage::replication::WalFilesRes res{false, 0};
slk::Save(res, res_builder);
return;
}
const auto wal_file_number = req.file_number;
spdlog::debug("Received WAL files: {}", wal_file_number);
@ -298,8 +358,9 @@ void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handle
spdlog::debug("Replication recovery from WAL files ended successfully, replica is now up to date!");
}
void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::CurrentWalReq req;
slk::Load(&req, req_reader);
auto db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -309,6 +370,13 @@ void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_hand
return;
}
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::CurrentWalReq::kType.name);
storage::replication::CurrentWalRes res{false, 0};
slk::Save(res, res_builder);
return;
}
storage::replication::Decoder decoder(req_reader);
auto *storage = static_cast<storage::InMemoryStorage *>(db_acc->get()->storage());
@ -370,8 +438,9 @@ void InMemoryReplicationHandlers::LoadWal(storage::InMemoryStorage *storage, sto
}
}
void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader,
slk::Builder *res_builder) {
void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handler,
const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
storage::replication::TimestampReq req;
slk::Load(&req, req_reader);
auto const db_acc = GetDatabaseAccessor(dbms_handler, req.uuid);
@ -381,12 +450,20 @@ void InMemoryReplicationHandlers::TimestampHandler(dbms::DbmsHandler *dbms_handl
return;
}
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::TimestampReq::kType.name);
storage::replication::CurrentWalRes res{false, 0};
slk::Save(res, res_builder);
return;
}
// TODO: this handler is agnostic of InMemory, move to be reused by on-disk
auto const *storage = db_acc->get()->storage();
storage::replication::TimestampRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load()};
slk::Save(res, res_builder);
}
/////// AF how does this work, does it get all deltas at once or what?
uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage *storage,
storage::durability::BaseDecoder *decoder,
const uint64_t version) {

View File

@ -12,6 +12,7 @@
#pragma once
#include "replication/replication_server.hpp"
#include "replication/state.hpp"
#include "storage/v2/replication/serialization.hpp"
namespace memgraph::storage {
@ -23,21 +24,30 @@ class DbmsHandler;
class InMemoryReplicationHandlers {
public:
static void Register(dbms::DbmsHandler *dbms_handler, replication::ReplicationServer &server);
static void Register(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &data);
private:
// RPC handlers
static void HeartbeatHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void HeartbeatHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void AppendDeltasHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void SnapshotHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void SnapshotHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void WalFilesHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void WalFilesHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void CurrentWalHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void CurrentWalHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void TimestampHandler(dbms::DbmsHandler *dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
static void TimestampHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder);
static void SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &role_replica_data,
slk::Reader *req_reader, slk::Builder *res_builder);
static void LoadWal(storage::InMemoryStorage *storage, storage::replication::Decoder *decoder);

View File

View File

View File

View File

@ -21,7 +21,8 @@ namespace memgraph::dbms {
#ifdef MG_ENTERPRISE
void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder) {
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::storage::replication::CreateDatabaseRes;
CreateDatabaseRes res(CreateDatabaseRes::Result::FAILURE);
@ -35,6 +36,12 @@ void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::storage::replication::CreateDatabaseReq req;
memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, memgraph::storage::replication::CreateDatabaseReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with
@ -63,7 +70,8 @@ void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::slk::Save(res, res_builder);
}
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, DbmsHandler &dbms_handler,
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::storage::replication::DropDatabaseRes;
DropDatabaseRes res(DropDatabaseRes::Result::FAILURE);
@ -78,6 +86,12 @@ void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_s
memgraph::storage::replication::DropDatabaseReq req;
memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, memgraph::storage::replication::DropDatabaseReq::kType.name);
memgraph::slk::Save(res, res_builder);
return;
}
// Note: No need to check epoch, recovery mechanism is done by a full uptodate snapshot
// of the set of databases. Hence no history exists to maintain regarding epoch change.
// If MAIN has changed we need to check this new group_timestamp is consistent with
@ -177,14 +191,14 @@ void Register(replication::RoleReplicaData const &data, system::ReplicaHandlerAc
dbms::DbmsHandler &dbms_handler) {
// NOTE: Register even without license as the user could add a license at run-time
data.server->rpc_server_.Register<storage::replication::CreateDatabaseRpc>(
[system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
[&data, system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received CreateDatabaseRpc");
CreateDatabaseHandler(system_state_access, dbms_handler, req_reader, res_builder);
CreateDatabaseHandler(system_state_access, data.uuid_, dbms_handler, req_reader, res_builder);
});
data.server->rpc_server_.Register<storage::replication::DropDatabaseRpc>(
[system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
[&data, system_state_access, &dbms_handler](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received DropDatabaseRpc");
DropDatabaseHandler(system_state_access, dbms_handler, req_reader, res_builder);
DropDatabaseHandler(system_state_access, data.uuid_, dbms_handler, req_reader, res_builder);
});
}
#endif

View File

@ -17,11 +17,21 @@
#include "system/state.hpp"
namespace memgraph::dbms {
#ifdef MG_ENTERPRISE
inline void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
// RPC handlers
void CreateDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access, DbmsHandler &dbms_handler,
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder);
void DropDatabaseHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
const std::optional<utils::UUID> &current_main_uuid, DbmsHandler &dbms_handler,
slk::Reader *req_reader, slk::Builder *res_builder);
bool SystemRecoveryHandler(DbmsHandler &dbms_handler, const std::vector<storage::SalientConfig> &database_configs);

View File

@ -29,13 +29,15 @@ struct CreateDatabaseReq {
static void Load(CreateDatabaseReq *self, memgraph::slk::Reader *reader);
static void Save(const CreateDatabaseReq &self, memgraph::slk::Builder *builder);
CreateDatabaseReq() = default;
CreateDatabaseReq(std::string_view epoch_id, uint64_t expected_group_timestamp, uint64_t new_group_timestamp,
storage::SalientConfig config)
: epoch_id(std::string(epoch_id)),
CreateDatabaseReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_group_timestamp,
uint64_t new_group_timestamp, storage::SalientConfig config)
: main_uuid(main_uuid),
epoch_id(std::move(epoch_id)),
expected_group_timestamp{expected_group_timestamp},
new_group_timestamp(new_group_timestamp),
config(std::move(config)) {}
utils::UUID main_uuid;
std::string epoch_id;
uint64_t expected_group_timestamp;
uint64_t new_group_timestamp;
@ -65,13 +67,15 @@ struct DropDatabaseReq {
static void Load(DropDatabaseReq *self, memgraph::slk::Reader *reader);
static void Save(const DropDatabaseReq &self, memgraph::slk::Builder *builder);
DropDatabaseReq() = default;
DropDatabaseReq(std::string_view epoch_id, uint64_t expected_group_timestamp, uint64_t new_group_timestamp,
const utils::UUID &uuid)
: epoch_id(std::string(epoch_id)),
DropDatabaseReq(const utils::UUID &main_uuid, std::string epoch_id, uint64_t expected_group_timestamp,
uint64_t new_group_timestamp, const utils::UUID &uuid)
: main_uuid(main_uuid),
epoch_id(std::move(epoch_id)),
expected_group_timestamp{expected_group_timestamp},
new_group_timestamp(new_group_timestamp),
uuid(uuid) {}
utils::UUID main_uuid;
std::string epoch_id;
uint64_t expected_group_timestamp;
uint64_t new_group_timestamp;

0
src/dbms/utils.hpp Normal file
View File

View File

@ -327,7 +327,7 @@ class ReplQueryHandler {
.port = static_cast<uint16_t>(*port),
};
if (!handler_->SetReplicationRoleReplica(config)) {
if (!handler_->SetReplicationRoleReplica(config, std::nullopt)) {
throw QueryRuntimeException("Couldn't set role to replica!");
}
}
@ -368,7 +368,7 @@ class ReplQueryHandler {
.replica_check_frequency = replica_check_frequency,
.ssl = std::nullopt};
const auto error = handler_->TryRegisterReplica(replication_config).HasError();
const auto error = handler_->TryRegisterReplica(replication_config, true).HasError();
if (error) {
throw QueryRuntimeException(fmt::format("Couldn't register replica '{}'!", name));
@ -518,7 +518,9 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
throw QueryRuntimeException("SET INSTANCE TO MAIN query can only be run on a coordinator!");
case COULD_NOT_PROMOTE_TO_MAIN:
throw QueryRuntimeException(
"Couldn't set replica instance to main!. Check coordinator and replica for more logs");
"Couldn't set replica instance to main! Check coordinator and replica for more logs");
case SWAP_UUID_FAILED:
throw QueryRuntimeException("Couldn't set replica instance to main. Replicas didn't swap uuid of new main.");
case SUCCESS:
break;
}

View File

@ -13,6 +13,7 @@
#include "replication_coordination_glue/role.hpp"
#include "utils/result.hpp"
#include "utils/uuid.hpp"
// BEGIN fwd declares
namespace memgraph::replication {
@ -23,7 +24,13 @@ struct ReplicationClientConfig;
namespace memgraph::query {
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED };
enum class RegisterReplicaError : uint8_t {
NAME_EXISTS,
ENDPOINT_EXISTS,
CONNECTION_FAILED,
COULD_NOT_BE_PERSISTED,
ERROR_ACCEPTING_MAIN
};
enum class UnregisterReplicaResult : uint8_t {
NOT_MAIN,
COULD_NOT_BE_PERSISTED,
@ -39,13 +46,14 @@ struct ReplicationQueryHandler {
virtual bool SetReplicationRoleMain() = 0;
// as MAIN, become REPLICA
virtual bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) = 0;
virtual bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) = 0;
// as MAIN, define and connect to REPLICAs
virtual auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
virtual auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> utils::BasicResult<RegisterReplicaError> = 0;
virtual auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
virtual auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> utils::BasicResult<RegisterReplicaError> = 0;
// as MAIN, remove a REPLICA connection

View File

@ -21,6 +21,6 @@ target_include_directories(mg-replication PUBLIC include)
find_package(fmt REQUIRED)
target_link_libraries(mg-replication
PUBLIC mg::utils mg::kvstore lib::json mg::rpc mg::slk mg::io mg::repl_coord_glue
PUBLIC mg::utils mg::kvstore lib::json mg::rpc mg::slk mg::io mg::repl_coord_glue mg-flags
PRIVATE fmt::fmt
)

View File

@ -54,7 +54,7 @@ struct ReplicationClient {
} catch (const rpc::RpcFailedException &) {
// Nothing to do...wait for a reconnect
// NOTE: Here we are communicating with the instance connection.
// We don't have access to the undelying client; so the only thing we can do it
// We don't have access to the underlying client; so the only thing we can do it
// tell the callback that this is a reconnection and to check the state
reconnect = true;
}
@ -106,6 +106,9 @@ struct ReplicationClient {
communication::ClientContext rpc_context_;
rpc::Client rpc_client_;
std::chrono::seconds replica_check_frequency_;
// True only when we are migrating from V1 or V2 to V3 in replication durability
// and we want to set replica to listen to main
bool try_set_uuid{false};
// TODO: Better, this was the easiest place to put this
enum class State {

View File

@ -21,10 +21,12 @@
#include "status.hpp"
#include "utils/result.hpp"
#include "utils/synchronized.hpp"
#include "utils/uuid.hpp"
#include <atomic>
#include <cstdint>
#include <list>
#include <optional>
#include <variant>
#include <vector>
@ -37,7 +39,11 @@ enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, COULD_
struct RoleMainData {
RoleMainData() = default;
explicit RoleMainData(ReplicationEpoch e) : epoch_(std::move(e)) {}
explicit RoleMainData(ReplicationEpoch e, std::optional<utils::UUID> uuid = std::nullopt) : epoch_(std::move(e)) {
if (uuid) {
uuid_ = *uuid;
}
}
~RoleMainData() = default;
RoleMainData(RoleMainData const &) = delete;
@ -47,11 +53,14 @@ struct RoleMainData {
ReplicationEpoch epoch_;
std::list<ReplicationClient> registered_replicas_{}; // TODO: data race issues
utils::UUID uuid_;
};
struct RoleReplicaData {
ReplicationServerConfig config;
std::unique_ptr<ReplicationServer> server;
// uuid of main replica is listening to
std::optional<utils::UUID> uuid_;
};
// Global (instance) level object
@ -83,18 +92,19 @@ struct ReplicationState {
bool HasDurability() const { return nullptr != durability_; }
bool TryPersistRoleMain(std::string new_epoch);
bool TryPersistRoleReplica(const ReplicationServerConfig &config);
bool TryPersistRoleMain(std::string new_epoch, utils::UUID main_uuid);
bool TryPersistRoleReplica(const ReplicationServerConfig &config, const std::optional<utils::UUID> &main_uuid);
bool TryPersistUnregisterReplica(std::string_view name);
bool TryPersistRegisteredReplica(const ReplicationClientConfig &config);
bool TryPersistRegisteredReplica(const ReplicationClientConfig &config, utils::UUID main_uuid);
// TODO: locked access
auto ReplicationData() -> ReplicationData_t & { return replication_data_; }
auto ReplicationData() const -> ReplicationData_t const & { return replication_data_; }
utils::BasicResult<RegisterReplicaError, ReplicationClient *> RegisterReplica(const ReplicationClientConfig &config);
bool SetReplicationRoleMain();
bool SetReplicationRoleReplica(const ReplicationServerConfig &config);
bool SetReplicationRoleMain(const utils::UUID &main_uuid);
bool SetReplicationRoleReplica(const ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid = std::nullopt);
private:
bool HandleVersionMigration(durability::ReplicationRoleEntry &data) const;

View File

@ -31,25 +31,28 @@ constexpr auto *kReplicationReplicaPrefix{"__replication_replica:"}; // introdu
enum class DurabilityVersion : uint8_t {
V1, // no distinct key for replicas
V2, // this version, epoch, replica prefix introduced
V2, // epoch, replica prefix introduced
V3, // this version, main uuid introduced
};
// fragment of key: "__replication_role"
struct MainRole {
ReplicationEpoch epoch{};
std::optional<utils::UUID> main_uuid{};
friend bool operator==(MainRole const &, MainRole const &) = default;
};
// fragment of key: "__replication_role"
struct ReplicaRole {
ReplicationServerConfig config{};
std::optional<utils::UUID> main_uuid{};
friend bool operator==(ReplicaRole const &, ReplicaRole const &) = default;
};
// from key: "__replication_role"
struct ReplicationRoleEntry {
DurabilityVersion version =
DurabilityVersion::V2; // if not latest then migration required for kReplicationReplicaPrefix
DurabilityVersion::V3; // if not latest then migration required for kReplicationReplicaPrefix
std::variant<MainRole, ReplicaRole> role;
friend bool operator==(ReplicationRoleEntry const &, ReplicationRoleEntry const &) = default;

View File

View File

@ -10,7 +10,7 @@
// licenses/APL.txt.
#include "replication/replication_server.hpp"
#include "replication_coordination_glue/messages.hpp"
#include "replication_coordination_glue/handler.hpp"
namespace memgraph::replication {
namespace {

View File

@ -10,12 +10,15 @@
// licenses/APL.txt.
#include "replication/state.hpp"
#include <optional>
#include "flags/replication.hpp"
#include "replication/replication_client.hpp"
#include "replication/replication_server.hpp"
#include "replication/status.hpp"
#include "utils/file.hpp"
#include "utils/result.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp"
constexpr auto kReplicationDirectory = std::string_view{"replication"};
@ -36,9 +39,9 @@ ReplicationState::ReplicationState(std::optional<std::filesystem::path> durabili
durability_ = std::make_unique<kvstore::KVStore>(std::move(repl_dir));
spdlog::info("Replication configuration will be stored and will be automatically restored in case of a crash.");
auto replicationData = FetchReplicationData();
if (replicationData.HasError()) {
switch (replicationData.GetError()) {
auto fetched_replication_data = FetchReplicationData();
if (fetched_replication_data.HasError()) {
switch (fetched_replication_data.GetError()) {
using enum ReplicationState::FetchReplicationError;
case NOTHING_FETCHED: {
spdlog::debug("Cannot find data needed for restore replication role in persisted metadata.");
@ -51,15 +54,21 @@ ReplicationState::ReplicationState(std::optional<std::filesystem::path> durabili
}
}
}
replication_data_ = std::move(replicationData).GetValue();
auto replication_data = std::move(fetched_replication_data).GetValue();
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port && std::holds_alternative<RoleReplicaData>(replication_data)) {
std::get<RoleReplicaData>(replication_data).uuid_.reset();
}
#endif
replication_data_ = std::move(replication_data);
}
bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &config) {
bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) {
if (!HasDurability()) return true;
auto data = durability::ReplicationRoleEntry{.role = durability::ReplicaRole{
.config = config,
}};
auto data =
durability::ReplicationRoleEntry{.role = durability::ReplicaRole{.config = config, .main_uuid = main_uuid}};
if (!durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) {
spdlog::error("Error when saving REPLICA replication role in settings.");
@ -78,11 +87,11 @@ bool ReplicationState::TryPersistRoleReplica(const ReplicationServerConfig &conf
return true;
}
bool ReplicationState::TryPersistRoleMain(std::string new_epoch) {
bool ReplicationState::TryPersistRoleMain(std::string new_epoch, utils::UUID main_uuid) {
if (!HasDurability()) return true;
auto data =
durability::ReplicationRoleEntry{.role = durability::MainRole{.epoch = ReplicationEpoch{std::move(new_epoch)}}};
auto data = durability::ReplicationRoleEntry{
.role = durability::MainRole{.epoch = ReplicationEpoch{std::move(new_epoch)}, .main_uuid = main_uuid}};
if (durability_->Put(durability::kReplicationRoleName, nlohmann::json(data).dump())) {
role_persisted = RolePersisted::YES;
@ -128,7 +137,8 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
return std::visit(
utils::Overloaded{
[&](durability::MainRole &&r) -> FetchReplicationResult_t {
auto res = RoleMainData{std::move(r.epoch)};
auto res =
RoleMainData{std::move(r.epoch), r.main_uuid.has_value() ? r.main_uuid.value() : utils::UUID{}};
auto b = durability_->begin(durability::kReplicationReplicaPrefix);
auto e = durability_->end(durability::kReplicationReplicaPrefix);
for (; b != e; ++b) {
@ -143,6 +153,8 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
}
// Instance clients
res.registered_replicas_.emplace_back(data.config);
// Bump for each replica uuid
res.registered_replicas_.back().try_set_uuid = !r.main_uuid.has_value();
} catch (...) {
return FetchReplicationError::PARSE_ERROR;
}
@ -150,7 +162,9 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t {
return {std::move(res)};
},
[&](durability::ReplicaRole &&r) -> FetchReplicationResult_t {
return {RoleReplicaData{r.config, std::make_unique<ReplicationServer>(r.config)}};
// False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
return {RoleReplicaData{r.config, std::make_unique<ReplicationServer>(r.config), r.main_uuid}};
},
},
std::move(data.role));
@ -192,21 +206,29 @@ bool ReplicationState::HandleVersionMigration(durability::ReplicationRoleEntry &
[[fallthrough]];
}
case durability::DurabilityVersion::V2: {
// do nothing - add code if V3 ever happens
if (std::holds_alternative<durability::MainRole>(data.role)) {
auto &main = std::get<durability::MainRole>(data.role);
main.main_uuid = utils::UUID{};
}
data.version = durability::DurabilityVersion::V3;
break;
}
case durability::DurabilityVersion::V3: {
// do nothing - add code if V4 ever happens
break;
}
}
return true;
}
bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig &config) {
bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig &config, utils::UUID main_uuid) {
if (!HasDurability()) return true;
// If any replicas are persisted then Role must be persisted
if (role_persisted != RolePersisted::YES) {
DMG_ASSERT(IsMain(), "MAIN is expected");
auto epoch_str = std::string(std::get<RoleMainData>(replication_data_).epoch_.id());
if (!TryPersistRoleMain(std::move(epoch_str))) return false;
if (!TryPersistRoleMain(std::move(epoch_str), main_uuid)) return false;
}
auto data = durability::ReplicationReplicaEntry{.config = config};
@ -217,22 +239,28 @@ bool ReplicationState::TryPersistRegisteredReplica(const ReplicationClientConfig
return false;
}
bool ReplicationState::SetReplicationRoleMain() {
bool ReplicationState::SetReplicationRoleMain(const utils::UUID &main_uuid) {
auto new_epoch = utils::GenerateUUID();
if (!TryPersistRoleMain(new_epoch)) {
if (!TryPersistRoleMain(new_epoch, main_uuid)) {
return false;
}
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}};
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}, main_uuid};
return true;
}
bool ReplicationState::SetReplicationRoleReplica(const ReplicationServerConfig &config) {
if (!TryPersistRoleReplica(config)) {
bool ReplicationState::SetReplicationRoleReplica(const ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) {
// False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
if (!TryPersistRoleReplica(config, main_uuid)) {
return false;
}
replication_data_ = RoleReplicaData{config, std::make_unique<ReplicationServer>(config)};
// False positive report for the std::make_unique
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
replication_data_ = RoleReplicaData{config, std::make_unique<ReplicationServer>(config), std::nullopt};
return true;
}
@ -264,7 +292,7 @@ utils::BasicResult<RegisterReplicaError, ReplicationClient *> ReplicationState::
}
// Durability
if (!TryPersistRegisteredReplica(config)) {
if (!TryPersistRegisteredReplica(config, mainData.uuid_)) {
return RegisterReplicaError::COULD_NOT_BE_PERSISTED;
}

View File

@ -26,21 +26,28 @@ constexpr auto *kSSLCertFile = "replica_ssl_cert_file";
constexpr auto *kReplicationRole = "replication_role";
constexpr auto *kEpoch = "epoch";
constexpr auto *kVersion = "durability_version";
constexpr auto *kMainUUID = "main_uuid";
void to_json(nlohmann::json &j, const ReplicationRoleEntry &p) {
auto processMAIN = [&](MainRole const &main) {
j = nlohmann::json{{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN},
{kEpoch, main.epoch.id()}};
auto common = nlohmann::json{{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN},
{kEpoch, main.epoch.id()}};
if (p.version != DurabilityVersion::V1 && p.version != DurabilityVersion::V2) {
MG_ASSERT(main.main_uuid.has_value(), "Main should have id ready on version >= V3");
common[kMainUUID] = main.main_uuid.value();
}
j = std::move(common);
};
auto processREPLICA = [&](ReplicaRole const &replica) {
j = nlohmann::json{
{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA},
{kIpAddress, replica.config.ip_address},
{kPort, replica.config.port}
// TODO: SSL
};
auto common = nlohmann::json{{kVersion, p.version},
{kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA},
{kIpAddress, replica.config.ip_address},
{kPort, replica.config.port}};
if (replica.main_uuid.has_value()) {
common[kMainUUID] = replica.main_uuid.value();
}
j = std::move(common);
};
std::visit(utils::Overloaded{processMAIN, processREPLICA}, p.role);
}
@ -56,7 +63,12 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) {
auto json_epoch = j.value(kEpoch, std::string{});
auto epoch = ReplicationEpoch{};
if (!json_epoch.empty()) epoch.SetEpoch(json_epoch);
p = ReplicationRoleEntry{.version = version, .role = MainRole{.epoch = std::move(epoch)}};
auto main_role = MainRole{.epoch = std::move(epoch)};
if (j.contains(kMainUUID)) {
main_role.main_uuid = j.at(kMainUUID);
}
p = ReplicationRoleEntry{.version = version, .role = std::move(main_role)};
break;
}
case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: {
@ -66,7 +78,13 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) {
j.at(kIpAddress).get_to(ip_address);
j.at(kPort).get_to(port);
auto config = ReplicationServerConfig{.ip_address = std::move(ip_address), .port = port};
p = ReplicationRoleEntry{.version = version, .role = ReplicaRole{.config = std::move(config)}};
auto replica_role = ReplicaRole{.config = std::move(config)};
if (j.contains(kMainUUID)) {
replica_role.main_uuid = j.at(kMainUUID);
}
p = ReplicationRoleEntry{.version = version, .role = std::move(replica_role)};
break;
}
}

View File

@ -6,6 +6,7 @@ target_sources(mg-repl_coord_glue
messages.hpp
mode.hpp
role.hpp
handler.hpp
PRIVATE
messages.cpp

View File

@ -0,0 +1,41 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include "rpc/client.hpp"
#include "utils/uuid.hpp"
#include "messages.hpp"
#include "rpc/messages.hpp"
namespace memgraph::replication_coordination_glue {
inline bool SendSwapMainUUIDRpc(memgraph::rpc::Client &rpc_client_, const memgraph::utils::UUID &uuid) {
try {
auto stream{rpc_client_.Stream<SwapMainUUIDRpc>(uuid)};
if (!stream.AwaitResponse().success) {
spdlog::error("Failed to receive successful RPC swapping of uuid response!");
return false;
}
return true;
} catch (const memgraph::rpc::RpcFailedException &) {
spdlog::error("RPC error occurred while sending swapping uuid RPC!");
}
return false;
}
inline void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) {
FrequentHeartbeatReq req;
FrequentHeartbeatReq::Load(&req, req_reader);
memgraph::slk::Load(&req, req_reader);
FrequentHeartbeatRes res{};
memgraph::slk::Save(res, res_builder);
}
} // namespace memgraph::replication_coordination_glue

View File

@ -29,6 +29,25 @@ void Load(memgraph::replication_coordination_glue::FrequentHeartbeatReq * /*self
/* Nothing to serialize */
}
// Serialize code for SwapMainUUIDRes
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDRes &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.success, builder);
}
void Load(memgraph::replication_coordination_glue::SwapMainUUIDRes *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->success, reader);
}
// Serialize code for SwapMainUUIDReq
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.uuid, builder);
}
void Load(memgraph::replication_coordination_glue::SwapMainUUIDReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->uuid, reader);
}
} // namespace memgraph::slk
namespace memgraph::replication_coordination_glue {
@ -39,6 +58,10 @@ constexpr utils::TypeInfo FrequentHeartbeatReq::kType{utils::TypeId::REP_FREQUEN
constexpr utils::TypeInfo FrequentHeartbeatRes::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_RES, "FrequentHeartbeatRes",
nullptr};
constexpr utils::TypeInfo SwapMainUUIDReq::kType{utils::TypeId::COORD_SWAP_UUID_REQ, "SwapUUIDReq", nullptr};
constexpr utils::TypeInfo SwapMainUUIDRes::kType{utils::TypeId::COORD_SWAP_UUID_RES, "SwapUUIDRes", nullptr};
void FrequentHeartbeatReq::Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self, builder);
}
@ -52,12 +75,16 @@ void FrequentHeartbeatRes::Load(FrequentHeartbeatRes *self, memgraph::slk::Reade
memgraph::slk::Load(self, reader);
}
void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) {
FrequentHeartbeatReq req;
FrequentHeartbeatReq::Load(&req, req_reader);
memgraph::slk::Load(&req, req_reader);
FrequentHeartbeatRes res{};
memgraph::slk::Save(res, res_builder);
void SwapMainUUIDReq::Save(const SwapMainUUIDReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self, builder);
}
void SwapMainUUIDReq::Load(SwapMainUUIDReq *self, memgraph::slk::Reader *reader) { memgraph::slk::Load(self, reader); }
void SwapMainUUIDRes::Save(const SwapMainUUIDRes &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self, builder);
}
void SwapMainUUIDRes::Load(SwapMainUUIDRes *self, memgraph::slk::Reader *reader) { memgraph::slk::Load(self, reader); }
} // namespace memgraph::replication_coordination_glue

View File

@ -13,6 +13,7 @@
#include "rpc/messages.hpp"
#include "slk/serialization.hpp"
#include "utils/uuid.hpp"
namespace memgraph::replication_coordination_glue {
@ -36,7 +37,34 @@ struct FrequentHeartbeatRes {
using FrequentHeartbeatRpc = rpc::RequestResponse<FrequentHeartbeatReq, FrequentHeartbeatRes>;
void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder);
struct SwapMainUUIDReq {
static const utils::TypeInfo kType;
static const utils::TypeInfo &GetTypeInfo() { return kType; }
static void Load(SwapMainUUIDReq *self, memgraph::slk::Reader *reader);
static void Save(const SwapMainUUIDReq &self, memgraph::slk::Builder *builder);
explicit SwapMainUUIDReq(const utils::UUID &uuid) : uuid(uuid) {}
SwapMainUUIDReq() = default;
utils::UUID uuid;
};
struct SwapMainUUIDRes {
static const utils::TypeInfo kType;
static const utils::TypeInfo &GetTypeInfo() { return kType; }
static void Load(SwapMainUUIDRes *self, memgraph::slk::Reader *reader);
static void Save(const SwapMainUUIDRes &self, memgraph::slk::Builder *builder);
explicit SwapMainUUIDRes(bool success) : success(success) {}
SwapMainUUIDRes() = default;
bool success;
};
using SwapMainUUIDRpc = rpc::RequestResponse<SwapMainUUIDReq, SwapMainUUIDRes>;
} // namespace memgraph::replication_coordination_glue
@ -46,4 +74,10 @@ void Load(memgraph::replication_coordination_glue::FrequentHeartbeatRes *self, m
void Save(const memgraph::replication_coordination_glue::FrequentHeartbeatReq & /*self*/,
memgraph::slk::Builder * /*builder*/);
void Load(memgraph::replication_coordination_glue::FrequentHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/);
// SwapMainUUIDRpc
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDReq &self, memgraph::slk::Builder *builder);
void Load(memgraph::replication_coordination_glue::SwapMainUUIDReq *self, memgraph::slk::Reader *reader);
void Save(const memgraph::replication_coordination_glue::SwapMainUUIDRes &self, memgraph::slk::Builder *builder);
void Load(memgraph::replication_coordination_glue::SwapMainUUIDRes *self, memgraph::slk::Reader *reader);
} // namespace memgraph::slk

View File

@ -7,8 +7,8 @@ target_sources(mg-replication_handler
include/replication_handler/system_rpc.hpp
PRIVATE
replication_handler.cpp
system_replication.cpp
replication_handler.cpp
system_rpc.cpp
)
target_include_directories(mg-replication_handler PUBLIC include)

View File

@ -22,10 +22,10 @@ inline std::optional<query::RegisterReplicaError> HandleRegisterReplicaStatus(
utils::BasicResult<replication::RegisterReplicaError, replication::ReplicationClient *> &instance_client);
#ifdef MG_ENTERPRISE
void StartReplicaClient(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth);
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid,
system::System *system, auth::SynchedAuth &auth);
#else
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler);
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid);
#endif
#ifdef MG_ENTERPRISE
@ -33,8 +33,8 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
// When being called by interpreter no need to gain lock, it should already be under a system transaction
// But concurrently the FrequentCheck is running and will need to lock before reading last_committed_system_timestamp_
template <bool REQUIRE_LOCK = false>
void SystemRestore(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth) {
void SystemRestore(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler,
const utils::UUID &main_uuid, system::System *system, auth::SynchedAuth &auth) {
// Check if system is up to date
if (client.state_.WithLock(
[](auto &state) { return state == memgraph::replication::ReplicationClient::State::READY; }))
@ -69,12 +69,12 @@ void SystemRestore(replication::ReplicationClient &client, system::System *syste
// Handle only default database is no license
if (!license::global_license_checker.IsEnterpriseValidFast()) {
return client.rpc_client_.Stream<replication::SystemRecoveryRpc>(
db_info.last_committed_timestamp, std::move(db_info.configs), auth::Auth::Config{},
main_uuid, db_info.last_committed_timestamp, std::move(db_info.configs), auth::Auth::Config{},
std::vector<auth::User>{}, std::vector<auth::Role>{});
}
return auth.WithLock([&](auto &locked_auth) {
return client.rpc_client_.Stream<replication::SystemRecoveryRpc>(
db_info.last_committed_timestamp, std::move(db_info.configs), locked_auth.GetConfig(),
main_uuid, db_info.last_committed_timestamp, std::move(db_info.configs), locked_auth.GetConfig(),
locked_auth.AllUsers(), locked_auth.AllRoles());
});
});
@ -109,28 +109,32 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
bool SetReplicationRoleMain() override;
// as MAIN, become REPLICA
bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) override;
bool SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) override;
// as MAIN, define and connect to REPLICAs
auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
auto TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override;
auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
auto RegisterReplica(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> override;
// as MAIN, remove a REPLICA connection
auto UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult override;
bool DoReplicaToMainPromotion();
bool DoReplicaToMainPromotion(const utils::UUID &main_uuid);
// Helper pass-through (TODO: remove)
auto GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole override;
bool IsMain() const override;
bool IsReplica() const override;
auto GetReplState() const -> const memgraph::replication::ReplicationState &;
auto GetReplState() -> memgraph::replication::ReplicationState &;
private:
template <bool HandleFailure>
auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config)
auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
MG_ASSERT(repl_state_.IsMain(), "Only main instance can register a replica!");
@ -154,10 +158,19 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
if (!memgraph::dbms::allow_mt_repl && dbms_handler_.All().size() > 1) {
spdlog::warn("Multi-tenant replication is currently not supported!");
}
const auto main_uuid =
std::get<memgraph::replication::RoleMainData>(dbms_handler_.ReplicationState().ReplicationData()).uuid_;
if (send_swap_uuid) {
if (!memgraph::replication_coordination_glue::SendSwapMainUUIDRpc(maybe_client.GetValue()->rpc_client_,
main_uuid)) {
return memgraph::query::RegisterReplicaError::ERROR_ACCEPTING_MAIN;
}
}
#ifdef MG_ENTERPRISE
// Update system before enabling individual storage <-> replica clients
SystemRestore(*maybe_client.GetValue(), system_, dbms_handler_, auth_);
SystemRestore(*maybe_client.GetValue(), dbms_handler_, main_uuid, system_, auth_);
#endif
const auto dbms_error = HandleRegisterReplicaStatus(maybe_client);
@ -177,8 +190,9 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
if (storage->storage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return;
all_clients_good &= storage->repl_storage_state_.replication_clients_.WithLock(
[storage, &instance_client_ptr, db_acc = std::move(db_acc)](auto &storage_clients) mutable { // NOLINT
auto client = std::make_unique<storage::ReplicationStorageClient>(*instance_client_ptr);
[storage, &instance_client_ptr, db_acc = std::move(db_acc),
main_uuid](auto &storage_clients) mutable { // NOLINT
auto client = std::make_unique<storage::ReplicationStorageClient>(*instance_client_ptr, main_uuid);
// All good, start replica client
client->Start(storage, std::move(db_acc));
// After start the storage <-> replica state should be READY or RECOVERING (if correctly started)
@ -201,9 +215,9 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
// No client error, start instance level client
#ifdef MG_ENTERPRISE
StartReplicaClient(*instance_client_ptr, system_, dbms_handler_, auth_);
StartReplicaClient(*instance_client_ptr, dbms_handler_, main_uuid, system_, auth_);
#else
StartReplicaClient(*instance_client_ptr, dbms_handler_);
StartReplicaClient(*instance_client_ptr, dbms_handler_, main_uuid);
#endif
return {};
}

View File

@ -17,15 +17,23 @@
#include "system/state.hpp"
namespace memgraph::replication {
inline void LogWrongMain(const std::optional<utils::UUID> &current_main_uuid, const utils::UUID &main_req_id,
std::string_view rpc_req) {
spdlog::error("Received {} with main_id: {} != current_main_uuid: {}", rpc_req, std::string(main_req_id),
current_main_uuid.has_value() ? std::string(current_main_uuid.value()) : "");
}
#ifdef MG_ENTERPRISE
void SystemHeartbeatHandler(uint64_t ts, slk::Reader *req_reader, slk::Builder *res_builder);
void SystemHeartbeatHandler(uint64_t ts, const std::optional<utils::UUID> &current_main_uuid, slk::Reader *req_reader,
slk::Builder *res_builder);
void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth, slk::Reader *req_reader,
slk::Builder *res_builder);
std::optional<utils::UUID> &current_main_uuid, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth, slk::Reader *req_reader, slk::Builder *res_builder);
void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth);
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data, auth::SynchedAuth &auth);
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data, auth::SynchedAuth &auth);
#else
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data);
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data);
#endif
} // namespace memgraph::replication

View File

@ -27,6 +27,8 @@ struct SystemHeartbeatReq {
static void Load(SystemHeartbeatReq *self, memgraph::slk::Reader *reader);
static void Save(const SystemHeartbeatReq &self, memgraph::slk::Builder *builder);
SystemHeartbeatReq() = default;
explicit SystemHeartbeatReq(const utils::UUID &main_uuid) : main_uuid(main_uuid) {}
utils::UUID main_uuid;
};
struct SystemHeartbeatRes {
@ -50,14 +52,17 @@ struct SystemRecoveryReq {
static void Load(SystemRecoveryReq *self, memgraph::slk::Reader *reader);
static void Save(const SystemRecoveryReq &self, memgraph::slk::Builder *builder);
SystemRecoveryReq() = default;
SystemRecoveryReq(uint64_t forced_group_timestamp, std::vector<storage::SalientConfig> database_configs,
auth::Auth::Config auth_config, std::vector<auth::User> users, std::vector<auth::Role> roles)
: forced_group_timestamp{forced_group_timestamp},
SystemRecoveryReq(const utils::UUID &main_uuid, uint64_t forced_group_timestamp,
std::vector<storage::SalientConfig> database_configs, auth::Auth::Config auth_config,
std::vector<auth::User> users, std::vector<auth::Role> roles)
: main_uuid(main_uuid),
forced_group_timestamp{forced_group_timestamp},
database_configs(std::move(database_configs)),
auth_config(std::move(auth_config)),
users{std::move(users)},
roles{std::move(roles)} {}
utils::UUID main_uuid;
uint64_t forced_group_timestamp;
std::vector<storage::SalientConfig> database_configs;
auth::Auth::Config auth_config;

View File

@ -24,14 +24,18 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
*/
// Startup replication state (if recovered at startup)
auto replica = [&dbms_handler, &auth](memgraph::replication::RoleReplicaData const &data) {
return memgraph::replication::StartRpcServer(dbms_handler, data, auth);
auto replica = [&dbms_handler, &auth](memgraph::replication::RoleReplicaData &data) {
return StartRpcServer(dbms_handler, data, auth);
};
// Replication recovery and frequent check start
auto main = [system, &dbms_handler, &auth](memgraph::replication::RoleMainData &mainData) {
for (auto &client : mainData.registered_replicas_) {
memgraph::replication::SystemRestore(client, system, dbms_handler, auth);
if (client.try_set_uuid &&
replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, mainData.uuid_)) {
client.try_set_uuid = false;
}
SystemRestore(client, dbms_handler, mainData.uuid_, system, auth);
}
// DBMS here
dbms_handler.ForEach([&mainData](memgraph::dbms::DatabaseAccess db_acc) {
@ -39,7 +43,7 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
});
for (auto &client : mainData.registered_replicas_) {
memgraph::replication::StartReplicaClient(client, system, dbms_handler, auth);
StartReplicaClient(client, dbms_handler, mainData.uuid_, system, auth);
}
// Warning
@ -62,7 +66,7 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state, mem
void RecoverReplication(memgraph::replication::ReplicationState &repl_state,
memgraph::dbms::DbmsHandler &dbms_handler) {
// Startup replication state (if recovered at startup)
auto replica = [&dbms_handler](memgraph::replication::RoleReplicaData const &data) {
auto replica = [&dbms_handler](memgraph::replication::RoleReplicaData &data) {
return memgraph::replication::StartRpcServer(dbms_handler, data);
};
@ -71,7 +75,11 @@ void RecoverReplication(memgraph::replication::ReplicationState &repl_state,
dbms::DbmsHandler::RecoverStorageReplication(dbms_handler.Get(), mainData);
for (auto &client : mainData.registered_replicas_) {
memgraph::replication::StartReplicaClient(client, dbms_handler);
if (client.try_set_uuid &&
replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, mainData.uuid_)) {
client.try_set_uuid = false;
}
memgraph::replication::StartReplicaClient(client, dbms_handler, mainData.uuid_);
}
// Warning
@ -112,10 +120,11 @@ inline std::optional<query::RegisterReplicaError> HandleRegisterReplicaStatus(
}
#ifdef MG_ENTERPRISE
void StartReplicaClient(replication::ReplicationClient &client, system::System *system, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth) {
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler, utils::UUID main_uuid,
system::System *system, auth::SynchedAuth &auth) {
#else
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler) {
void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandler &dbms_handler,
utils::UUID main_uuid) {
#endif
// No client error, start instance level client
auto const &endpoint = client.rpc_client_.Endpoint();
@ -124,8 +133,12 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
#ifdef MG_ENTERPRISE
system = system,
#endif
license = license::global_license_checker.IsEnterpriseValidFast()](
bool reconnect, replication::ReplicationClient &client) mutable {
license = license::global_license_checker.IsEnterpriseValidFast(),
main_uuid](bool reconnect, replication::ReplicationClient &client) mutable {
if (client.try_set_uuid &&
memgraph::replication_coordination_glue::SendSwapMainUUIDRpc(client.rpc_client_, main_uuid)) {
client.try_set_uuid = false;
}
// Working connection
// Check if system needs restoration
if (reconnect) {
@ -138,7 +151,7 @@ void StartReplicaClient(replication::ReplicationClient &client, dbms::DbmsHandle
client.state_.WithLock([](auto &state) { state = memgraph::replication::ReplicationClient::State::BEHIND; });
}
#ifdef MG_ENTERPRISE
SystemRestore<true>(client, system, dbms_handler, auth);
SystemRestore<true>(client, dbms_handler, main_uuid, system, auth);
#endif
// Check if any database has been left behind
dbms_handler.ForEach([&name = client.name_, reconnect](dbms::DatabaseAccess db_acc) {
@ -174,14 +187,15 @@ bool ReplicationHandler::SetReplicationRoleMain() {
};
auto const replica_handler = [this](memgraph::replication::RoleReplicaData const &) {
return DoReplicaToMainPromotion();
return DoReplicaToMainPromotion(utils::UUID{});
};
// TODO: under lock
return std::visit(memgraph::utils::Overloaded{main_handler, replica_handler}, repl_state_.ReplicationData());
}
bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) {
bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config,
const std::optional<utils::UUID> &main_uuid) {
// We don't want to restart the server if we're already a REPLICA
if (repl_state_.IsReplica()) {
return false;
@ -198,27 +212,26 @@ bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::
std::get<memgraph::replication::RoleMainData>(repl_state_.ReplicationData()).registered_replicas_.clear();
// Creates the server
repl_state_.SetReplicationRoleReplica(config);
repl_state_.SetReplicationRoleReplica(config, main_uuid);
// Start
const auto success =
std::visit(memgraph::utils::Overloaded{[](memgraph::replication::RoleMainData const &) {
// ASSERT
return false;
},
[this](memgraph::replication::RoleReplicaData const &data) {
const auto success = std::visit(memgraph::utils::Overloaded{[](memgraph::replication::RoleMainData &) {
// ASSERT
return false;
},
[this](memgraph::replication::RoleReplicaData &data) {
#ifdef MG_ENTERPRISE
return StartRpcServer(dbms_handler_, data, auth_);
return StartRpcServer(dbms_handler_, data, auth_);
#else
return StartRpcServer(dbms_handler_, data);
return StartRpcServer(dbms_handler_, data);
#endif
}},
repl_state_.ReplicationData());
}},
repl_state_.ReplicationData());
// TODO Handle error (restore to main?)
return success;
}
bool ReplicationHandler::DoReplicaToMainPromotion() {
bool ReplicationHandler::DoReplicaToMainPromotion(const utils::UUID &main_uuid) {
// STEP 1) bring down all REPLICA servers
dbms_handler_.ForEach([](dbms::DatabaseAccess db_acc) {
auto *storage = db_acc->storage();
@ -228,7 +241,7 @@ bool ReplicationHandler::DoReplicaToMainPromotion() {
// STEP 2) Change to MAIN
// TODO: restore replication servers if false?
if (!repl_state_.SetReplicationRoleMain()) {
if (!repl_state_.SetReplicationRoleMain(main_uuid)) {
// TODO: Handle recovery on failure???
return false;
}
@ -244,14 +257,16 @@ bool ReplicationHandler::DoReplicaToMainPromotion() {
};
// as MAIN, define and connect to REPLICAs
auto ReplicationHandler::TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
auto ReplicationHandler::TryRegisterReplica(const memgraph::replication::ReplicationClientConfig &config,
bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
return RegisterReplica_<false>(config);
return RegisterReplica_<false>(config, send_swap_uuid);
}
auto ReplicationHandler::RegisterReplica(const memgraph::replication::ReplicationClientConfig &config)
auto ReplicationHandler::RegisterReplica(const memgraph::replication::ReplicationClientConfig &config,
bool send_swap_uuid)
-> memgraph::utils::BasicResult<memgraph::query::RegisterReplicaError> {
return RegisterReplica_<true>(config);
return RegisterReplica_<true>(config, send_swap_uuid);
}
auto ReplicationHandler::UnregisterReplica(std::string_view name) -> memgraph::query::UnregisterReplicaResult {
@ -284,6 +299,10 @@ auto ReplicationHandler::GetRole() const -> memgraph::replication_coordination_g
return repl_state_.GetRole();
}
auto ReplicationHandler::GetReplState() const -> const memgraph::replication::ReplicationState & { return repl_state_; }
auto ReplicationHandler::GetReplState() -> memgraph::replication::ReplicationState & { return repl_state_; }
bool ReplicationHandler::IsMain() const { return repl_state_.IsMain(); }
bool ReplicationHandler::IsReplica() const { return repl_state_.IsReplica(); }

View File

@ -21,7 +21,8 @@
namespace memgraph::replication {
#ifdef MG_ENTERPRISE
void SystemHeartbeatHandler(const uint64_t ts, slk::Reader *req_reader, slk::Builder *res_builder) {
void SystemHeartbeatHandler(const uint64_t ts, const std::optional<utils::UUID> &current_main_uuid,
slk::Reader *req_reader, slk::Builder *res_builder) {
replication::SystemHeartbeatRes res{0};
// Ignore if no license
@ -30,17 +31,23 @@ void SystemHeartbeatHandler(const uint64_t ts, slk::Reader *req_reader, slk::Bui
memgraph::slk::Save(res, res_builder);
return;
}
replication::SystemHeartbeatReq req;
replication::SystemHeartbeatReq::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, replication::SystemHeartbeatRes::kType.name);
replication::SystemHeartbeatRes res(-1);
memgraph::slk::Save(res, res_builder);
return;
}
res = replication::SystemHeartbeatRes{ts};
memgraph::slk::Save(res, res_builder);
}
void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system_state_access,
dbms::DbmsHandler &dbms_handler, auth::SynchedAuth &auth, slk::Reader *req_reader,
slk::Builder *res_builder) {
const std::optional<utils::UUID> &current_main_uuid, dbms::DbmsHandler &dbms_handler,
auth::SynchedAuth &auth, slk::Reader *req_reader, slk::Builder *res_builder) {
using memgraph::replication::SystemRecoveryRes;
SystemRecoveryRes res(SystemRecoveryRes::Result::FAILURE);
@ -49,6 +56,11 @@ void SystemRecoveryHandler(memgraph::system::ReplicaHandlerAccessToState &system
memgraph::replication::SystemRecoveryReq req;
memgraph::slk::Load(&req, req_reader);
if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] {
LogWrongMain(current_main_uuid, req.main_uuid, SystemRecoveryReq::kType.name);
return;
}
/*
* DBMS
*/
@ -74,15 +86,16 @@ void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_
auto system_state_access = dbms_handler.system_->CreateSystemStateAccess();
// System
// TODO: remove, as this is not used
data.server->rpc_server_.Register<replication::SystemHeartbeatRpc>(
[system_state_access](auto *req_reader, auto *res_builder) {
[&data, system_state_access](auto *req_reader, auto *res_builder) {
spdlog::debug("Received SystemHeartbeatRpc");
SystemHeartbeatHandler(system_state_access.LastCommitedTS(), req_reader, res_builder);
SystemHeartbeatHandler(system_state_access.LastCommitedTS(), data.uuid_, req_reader, res_builder);
});
data.server->rpc_server_.Register<replication::SystemRecoveryRpc>(
[system_state_access, &dbms_handler, &auth](auto *req_reader, auto *res_builder) mutable {
[&data, system_state_access, &dbms_handler, &auth](auto *req_reader, auto *res_builder) mutable {
spdlog::debug("Received SystemRecoveryRpc");
SystemRecoveryHandler(system_state_access, dbms_handler, auth, req_reader, res_builder);
SystemRecoveryHandler(system_state_access, data.uuid_, dbms_handler, auth, req_reader, res_builder);
});
// DBMS
@ -94,13 +107,12 @@ void Register(replication::RoleReplicaData const &data, dbms::DbmsHandler &dbms_
#endif
#ifdef MG_ENTERPRISE
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data,
auth::SynchedAuth &auth) {
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data, auth::SynchedAuth &auth) {
#else
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleReplicaData &data) {
bool StartRpcServer(dbms::DbmsHandler &dbms_handler, replication::RoleReplicaData &data) {
#endif
// Register storage handlers
dbms::InMemoryReplicationHandlers::Register(&dbms_handler, *data.server);
dbms::InMemoryReplicationHandlers::Register(&dbms_handler, data);
#ifdef MG_ENTERPRISE
// Register system handlers
Register(data, dbms_handler, auth);
@ -112,4 +124,5 @@ bool StartRpcServer(dbms::DbmsHandler &dbms_handler, const replication::RoleRepl
}
return true;
}
} // namespace memgraph::replication

View File

@ -29,15 +29,16 @@ void Load(memgraph::replication::SystemHeartbeatRes *self, memgraph::slk::Reader
}
// Serialize code for SystemHeartbeatReq
void Save(const memgraph::replication::SystemHeartbeatReq & /*self*/, memgraph::slk::Builder * /*builder*/) {
/* Nothing to serialize */
void Save(const memgraph::replication::SystemHeartbeatReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
}
void Load(memgraph::replication::SystemHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/) {
/* Nothing to serialize */
void Load(memgraph::replication::SystemHeartbeatReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
}
// Serialize code for SystemRecoveryReq
void Save(const memgraph::replication::SystemRecoveryReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.forced_group_timestamp, builder);
memgraph::slk::Save(self.database_configs, builder);
memgraph::slk::Save(self.auth_config, builder);
@ -46,6 +47,7 @@ void Save(const memgraph::replication::SystemRecoveryReq &self, memgraph::slk::B
}
void Load(memgraph::replication::SystemRecoveryReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->forced_group_timestamp, reader);
memgraph::slk::Load(&self->database_configs, reader);
memgraph::slk::Load(&self->auth_config, reader);

View File

@ -214,7 +214,6 @@ class Client {
// Build and send the request.
slk::Save(req_type.id, handler.GetBuilder());
slk::Save(rpc::current_version, handler.GetBuilder());
TRequestResponse::Request::Save(request, handler.GetBuilder());
// Return the handler to the user.

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -28,6 +28,9 @@ constexpr auto v1 = Version{2023'10'30'0'2'13};
// for any TypeIds that get added.
constexpr auto v2 = Version{2023'12'07'0'2'14};
constexpr auto current_version = v2;
// To each RPC main uuid was added
constexpr auto v3 = Version{2024'02'02'0'2'14};
constexpr auto current_version = v3;
} // namespace memgraph::rpc

View File

@ -18,6 +18,7 @@
#include "storage/v2/inmemory/storage.hpp"
#include "storage/v2/replication/recovery.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp"
namespace memgraph::storage {
@ -26,7 +27,8 @@ namespace memgraph::storage {
// contained in the internal buffer and the file.
class InMemoryCurrentWalHandler {
public:
explicit InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client);
explicit InMemoryCurrentWalHandler(const utils::UUID &main_uuid, InMemoryStorage const *storage,
rpc::Client &rpc_client);
void AppendFilename(const std::string &filename);
void AppendSize(size_t size);
@ -43,8 +45,9 @@ class InMemoryCurrentWalHandler {
};
////// CurrentWalHandler //////
InMemoryCurrentWalHandler::InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client)
: stream_(rpc_client.Stream<replication::CurrentWalRpc>(storage->uuid())) {}
InMemoryCurrentWalHandler::InMemoryCurrentWalHandler(const utils::UUID &main_uuid, InMemoryStorage const *storage,
rpc::Client &rpc_client)
: stream_(rpc_client.Stream<replication::CurrentWalRpc>(main_uuid, storage->uuid())) {}
void InMemoryCurrentWalHandler::AppendFilename(const std::string &filename) {
replication::Encoder encoder(stream_.GetBuilder());
@ -69,10 +72,10 @@ void InMemoryCurrentWalHandler::AppendBufferData(const uint8_t *buffer, const si
replication::CurrentWalRes InMemoryCurrentWalHandler::Finalize() { return stream_.AwaitResponse(); }
////// ReplicationClient Helpers //////
replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &client,
replication::WalFilesRes TransferWalFiles(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::vector<std::filesystem::path> &wal_files) {
MG_ASSERT(!wal_files.empty(), "Wal files list is empty!");
auto stream = client.Stream<replication::WalFilesRpc>(uuid, wal_files.size());
auto stream = client.Stream<replication::WalFilesRpc>(main_uuid, uuid, wal_files.size());
replication::Encoder encoder(stream.GetBuilder());
for (const auto &wal : wal_files) {
spdlog::debug("Sending wal file: {}", wal);
@ -81,16 +84,17 @@ replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &
return stream.AwaitResponse();
}
replication::SnapshotRes TransferSnapshot(const utils::UUID &uuid, rpc::Client &client,
replication::SnapshotRes TransferSnapshot(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::filesystem::path &path) {
auto stream = client.Stream<replication::SnapshotRpc>(uuid);
auto stream = client.Stream<replication::SnapshotRpc>(main_uuid, uuid);
replication::Encoder encoder(stream.GetBuilder());
encoder.WriteFile(path);
return stream.AwaitResponse();
}
uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file) {
InMemoryCurrentWalHandler stream{storage, client};
uint64_t ReplicateCurrentWal(const utils::UUID &main_uuid, const InMemoryStorage *storage, rpc::Client &client,
durability::WalFile const &wal_file) {
InMemoryCurrentWalHandler stream{main_uuid, storage, client};
stream.AppendFilename(wal_file.Path().filename());
utils::InputFile file;
MG_ASSERT(file.Open(wal_file.Path()), "Failed to open current WAL file at {}!", wal_file.Path());

View File

@ -19,13 +19,14 @@ class InMemoryStorage;
////// ReplicationClient Helpers //////
replication::WalFilesRes TransferWalFiles(const utils::UUID &uuid, rpc::Client &client,
replication::WalFilesRes TransferWalFiles(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::vector<std::filesystem::path> &wal_files);
replication::SnapshotRes TransferSnapshot(const utils::UUID &uuid, rpc::Client &client,
replication::SnapshotRes TransferSnapshot(const utils::UUID &main_uuid, const utils::UUID &uuid, rpc::Client &client,
const std::filesystem::path &path);
uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file);
uint64_t ReplicateCurrentWal(const utils::UUID &main_uuid, const InMemoryStorage *storage, rpc::Client &client,
durability::WalFile const &wal_file);
auto GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker,
const InMemoryStorage *storage) -> std::vector<RecoveryStep>;

View File

@ -1847,6 +1847,7 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
// A single transaction will always be contained in a single WAL file.
auto current_commit_timestamp = transaction.commit_timestamp->load(std::memory_order_acquire);
//////// AF only this calls initialize transaction
repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber(), this, db_acc);
auto append_deltas = [&](auto callback) {

View File

@ -14,6 +14,7 @@
#include "storage/v2/storage.hpp"
#include "utils/exceptions.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/uuid.hpp"
#include "utils/variant_helpers.hpp"
#include <algorithm>
@ -25,8 +26,9 @@ template <typename>
namespace memgraph::storage {
ReplicationStorageClient::ReplicationStorageClient(::memgraph::replication::ReplicationClient &client)
: client_{client} {}
ReplicationStorageClient::ReplicationStorageClient(::memgraph::replication::ReplicationClient &client,
utils::UUID main_uuid)
: client_{client}, main_uuid_(main_uuid) {}
void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAccessProtector db_acc) {
uint64_t current_commit_timestamp{kTimestampInitialId};
@ -34,14 +36,13 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce
auto &replStorageState = storage->repl_storage_state_;
auto hb_stream{client_.rpc_client_.Stream<replication::HeartbeatRpc>(
storage->uuid(), replStorageState.last_commit_timestamp_, std::string{replStorageState.epoch_.id()})};
main_uuid_, storage->uuid(), replStorageState.last_commit_timestamp_, std::string{replStorageState.epoch_.id()})};
const auto replica = hb_stream.AwaitResponse();
#ifdef MG_ENTERPRISE // Multi-tenancy is only supported in enterprise
if (!replica.success) { // Replica is missing the current database
client_.state_.WithLock([&](auto &state) {
spdlog::debug("Replica '{}' missing database '{}' - '{}'", client_.name_, storage->name(),
spdlog::debug("Replica '{}' can't respond or missing database '{}' - '{}'", client_.name_, storage->name(),
std::string{storage->uuid()});
state = memgraph::replication::ReplicationClient::State::BEHIND;
});
@ -95,7 +96,7 @@ TimestampInfo ReplicationStorageClient::GetTimestampInfo(Storage const *storage)
info.current_number_of_timestamp_behind_master = 0;
try {
auto stream{client_.rpc_client_.Stream<replication::TimestampRpc>(storage->uuid())};
auto stream{client_.rpc_client_.Stream<replication::TimestampRpc>(main_uuid_, storage->uuid())};
const auto response = stream.AwaitResponse();
const auto is_success = response.success;
@ -173,7 +174,7 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren
case READY:
MG_ASSERT(!replica_stream_);
try {
replica_stream_.emplace(storage, client_.rpc_client_, current_wal_seq_num);
replica_stream_.emplace(storage, client_.rpc_client_, current_wal_seq_num, main_uuid_);
*locked_state = REPLICATING;
} catch (const rpc::RpcFailedException &) {
*locked_state = MAYBE_BEHIND;
@ -183,6 +184,9 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren
}
}
//////// AF: you can't finialize transaction replication if you are not replicating
/////// AF: if there is no stream or it is Defunct than we need to set replica in MAYBE_BEHIND -> is that even used
/////// AF:
bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, DatabaseAccessProtector db_acc) {
// We can only check the state because it guarantees to be only
// valid during a single transaction replication (if the assumption
@ -256,36 +260,38 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph:
spdlog::trace("Recovering in step: {}", i++);
try {
rpc::Client &rpcClient = client_.rpc_client_;
std::visit(utils::Overloaded{
[&replica_commit, mem_storage, &rpcClient](RecoverySnapshot const &snapshot) {
spdlog::debug("Sending the latest snapshot file: {}", snapshot);
auto response = TransferSnapshot(mem_storage->uuid(), rpcClient, snapshot);
replica_commit = response.current_commit_timestamp;
},
[&replica_commit, mem_storage, &rpcClient](RecoveryWals const &wals) {
spdlog::debug("Sending the latest wal files");
auto response = TransferWalFiles(mem_storage->uuid(), rpcClient, wals);
replica_commit = response.current_commit_timestamp;
spdlog::debug("Wal files successfully transferred.");
},
[&replica_commit, mem_storage, &rpcClient](RecoveryCurrentWal const &current_wal) {
std::unique_lock transaction_guard(mem_storage->engine_lock_);
if (mem_storage->wal_file_ &&
mem_storage->wal_file_->SequenceNumber() == current_wal.current_wal_seq_num) {
utils::OnScopeExit on_exit([mem_storage]() { mem_storage->wal_file_->EnableFlushing(); });
mem_storage->wal_file_->DisableFlushing();
transaction_guard.unlock();
spdlog::debug("Sending current wal file");
replica_commit = ReplicateCurrentWal(mem_storage, rpcClient, *mem_storage->wal_file_);
} else {
spdlog::debug("Cannot recover using current wal file");
}
},
[](auto const &in) {
static_assert(always_false_v<decltype(in)>, "Missing type from variant visitor");
},
},
recovery_step);
std::visit(
utils::Overloaded{
[&replica_commit, mem_storage, &rpcClient, main_uuid = main_uuid_](RecoverySnapshot const &snapshot) {
spdlog::debug("Sending the latest snapshot file: {}", snapshot);
auto response = TransferSnapshot(main_uuid, mem_storage->uuid(), rpcClient, snapshot);
replica_commit = response.current_commit_timestamp;
},
[&replica_commit, mem_storage, &rpcClient, main_uuid = main_uuid_](RecoveryWals const &wals) {
spdlog::debug("Sending the latest wal files");
auto response = TransferWalFiles(main_uuid, mem_storage->uuid(), rpcClient, wals);
replica_commit = response.current_commit_timestamp;
spdlog::debug("Wal files successfully transferred.");
},
[&replica_commit, mem_storage, &rpcClient,
main_uuid = main_uuid_](RecoveryCurrentWal const &current_wal) {
std::unique_lock transaction_guard(mem_storage->engine_lock_);
if (mem_storage->wal_file_ &&
mem_storage->wal_file_->SequenceNumber() == current_wal.current_wal_seq_num) {
utils::OnScopeExit on_exit([mem_storage]() { mem_storage->wal_file_->EnableFlushing(); });
mem_storage->wal_file_->DisableFlushing();
transaction_guard.unlock();
spdlog::debug("Sending current wal file");
replica_commit = ReplicateCurrentWal(main_uuid, mem_storage, rpcClient, *mem_storage->wal_file_);
} else {
spdlog::debug("Cannot recover using current wal file");
}
},
[](auto const &in) {
static_assert(always_false_v<decltype(in)>, "Missing type from variant visitor");
},
},
recovery_step);
} catch (const rpc::RpcFailedException &) {
replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; });
LogRpcFailure();
@ -314,10 +320,12 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph:
}
////// ReplicaStream //////
ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num)
ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num,
utils::UUID main_uuid)
: storage_{storage},
stream_(rpc_client.Stream<replication::AppendDeltasRpc>(
storage->uuid(), storage->repl_storage_state_.last_commit_timestamp_.load(), current_seq_num)) {
main_uuid, storage->uuid(), storage->repl_storage_state_.last_commit_timestamp_.load(), current_seq_num)),
main_uuid_(main_uuid) {
replication::Encoder encoder{stream_.GetBuilder()};
encoder.WriteString(storage->repl_storage_state_.epoch_.id());
}

View File

@ -28,6 +28,7 @@
#include "utils/scheduler.hpp"
#include "utils/synchronized.hpp"
#include "utils/thread_pool.hpp"
#include "utils/uuid.hpp"
#include <atomic>
#include <concepts>
@ -48,7 +49,7 @@ class ReplicationStorageClient;
// Handler used for transferring the current transaction.
class ReplicaStream {
public:
explicit ReplicaStream(Storage *storage, rpc::Client &rpc_client, uint64_t current_seq_num);
explicit ReplicaStream(Storage *storage, rpc::Client &rpc_client, uint64_t current_seq_num, utils::UUID main_uuid);
/// @throw rpc::RpcFailedException
void AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t final_commit_timestamp);
@ -72,6 +73,7 @@ class ReplicaStream {
private:
Storage *storage_;
rpc::Client::StreamHandler<replication::AppendDeltasRpc> stream_;
utils::UUID main_uuid_;
};
template <typename F>
@ -84,7 +86,7 @@ class ReplicationStorageClient {
friend struct ::memgraph::replication::ReplicationClient;
public:
explicit ReplicationStorageClient(::memgraph::replication::ReplicationClient &client);
explicit ReplicationStorageClient(::memgraph::replication::ReplicationClient &client, utils::UUID main_uuid);
ReplicationStorageClient(ReplicationStorageClient const &) = delete;
ReplicationStorageClient &operator=(ReplicationStorageClient const &) = delete;
@ -202,6 +204,8 @@ class ReplicationStorageClient {
replica_stream_; // Currently active stream (nullopt if not in use), note: a single stream per rpc client
mutable utils::Synchronized<replication::ReplicaState, utils::SpinLock> replica_state_{
replication::ReplicaState::MAYBE_BEHIND};
const utils::UUID main_uuid_;
};
} // namespace memgraph::storage

View File

@ -114,10 +114,12 @@ void Load(memgraph::storage::replication::TimestampRes *self, memgraph::slk::Rea
// Serialize code for TimestampReq
void Save(const memgraph::storage::replication::TimestampReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
}
void Load(memgraph::storage::replication::TimestampReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
}
@ -136,10 +138,12 @@ void Load(memgraph::storage::replication::CurrentWalRes *self, memgraph::slk::Re
// Serialize code for CurrentWalReq
void Save(const memgraph::storage::replication::CurrentWalReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
}
void Load(memgraph::storage::replication::CurrentWalReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
}
@ -158,11 +162,13 @@ void Load(memgraph::storage::replication::WalFilesRes *self, memgraph::slk::Read
// Serialize code for WalFilesReq
void Save(const memgraph::storage::replication::WalFilesReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.file_number, builder);
}
void Load(memgraph::storage::replication::WalFilesReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->file_number, reader);
}
@ -182,10 +188,12 @@ void Load(memgraph::storage::replication::SnapshotRes *self, memgraph::slk::Read
// Serialize code for SnapshotReq
void Save(const memgraph::storage::replication::SnapshotReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
}
void Load(memgraph::storage::replication::SnapshotReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
}
@ -206,12 +214,14 @@ void Load(memgraph::storage::replication::HeartbeatRes *self, memgraph::slk::Rea
// Serialize code for HeartbeatReq
void Save(const memgraph::storage::replication::HeartbeatReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.main_commit_timestamp, builder);
memgraph::slk::Save(self.epoch_id, builder);
}
void Load(memgraph::storage::replication::HeartbeatReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->main_commit_timestamp, reader);
memgraph::slk::Load(&self->epoch_id, reader);
@ -232,12 +242,14 @@ void Load(memgraph::storage::replication::AppendDeltasRes *self, memgraph::slk::
// Serialize code for AppendDeltasReq
void Save(const memgraph::storage::replication::AppendDeltasReq &self, memgraph::slk::Builder *builder) {
memgraph::slk::Save(self.main_uuid, builder);
memgraph::slk::Save(self.uuid, builder);
memgraph::slk::Save(self.previous_commit_timestamp, builder);
memgraph::slk::Save(self.seq_num, builder);
}
void Load(memgraph::storage::replication::AppendDeltasReq *self, memgraph::slk::Reader *reader) {
memgraph::slk::Load(&self->main_uuid, reader);
memgraph::slk::Load(&self->uuid, reader);
memgraph::slk::Load(&self->previous_commit_timestamp, reader);
memgraph::slk::Load(&self->seq_num, reader);

View File

@ -32,9 +32,11 @@ struct AppendDeltasReq {
static void Load(AppendDeltasReq *self, memgraph::slk::Reader *reader);
static void Save(const AppendDeltasReq &self, memgraph::slk::Builder *builder);
AppendDeltasReq() = default;
AppendDeltasReq(const utils::UUID &uuid, uint64_t previous_commit_timestamp, uint64_t seq_num)
: uuid{uuid}, previous_commit_timestamp(previous_commit_timestamp), seq_num(seq_num) {}
AppendDeltasReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t previous_commit_timestamp,
uint64_t seq_num)
: main_uuid{main_uuid}, uuid{uuid}, previous_commit_timestamp(previous_commit_timestamp), seq_num(seq_num) {}
utils::UUID main_uuid;
utils::UUID uuid;
uint64_t previous_commit_timestamp;
uint64_t seq_num;
@ -63,9 +65,11 @@ struct HeartbeatReq {
static void Load(HeartbeatReq *self, memgraph::slk::Reader *reader);
static void Save(const HeartbeatReq &self, memgraph::slk::Builder *builder);
HeartbeatReq() = default;
HeartbeatReq(const utils::UUID &uuid, uint64_t main_commit_timestamp, std::string epoch_id)
: uuid{uuid}, main_commit_timestamp(main_commit_timestamp), epoch_id(std::move(epoch_id)) {}
HeartbeatReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t main_commit_timestamp,
std::string epoch_id)
: main_uuid(main_uuid), uuid{uuid}, main_commit_timestamp(main_commit_timestamp), epoch_id(std::move(epoch_id)) {}
utils::UUID main_uuid;
utils::UUID uuid;
uint64_t main_commit_timestamp;
std::string epoch_id;
@ -95,8 +99,9 @@ struct SnapshotReq {
static void Load(SnapshotReq *self, memgraph::slk::Reader *reader);
static void Save(const SnapshotReq &self, memgraph::slk::Builder *builder);
SnapshotReq() = default;
explicit SnapshotReq(const utils::UUID &uuid) : uuid{uuid} {}
explicit SnapshotReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid{main_uuid}, uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid;
};
@ -123,8 +128,10 @@ struct WalFilesReq {
static void Load(WalFilesReq *self, memgraph::slk::Reader *reader);
static void Save(const WalFilesReq &self, memgraph::slk::Builder *builder);
WalFilesReq() = default;
explicit WalFilesReq(const utils::UUID &uuid, uint64_t file_number) : uuid{uuid}, file_number(file_number) {}
explicit WalFilesReq(const utils::UUID &main_uuid, const utils::UUID &uuid, uint64_t file_number)
: main_uuid{main_uuid}, uuid{uuid}, file_number(file_number) {}
utils::UUID main_uuid;
utils::UUID uuid;
uint64_t file_number;
};
@ -152,8 +159,9 @@ struct CurrentWalReq {
static void Load(CurrentWalReq *self, memgraph::slk::Reader *reader);
static void Save(const CurrentWalReq &self, memgraph::slk::Builder *builder);
CurrentWalReq() = default;
explicit CurrentWalReq(const utils::UUID &uuid) : uuid{uuid} {}
explicit CurrentWalReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid(main_uuid), uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid;
};
@ -180,8 +188,9 @@ struct TimestampReq {
static void Load(TimestampReq *self, memgraph::slk::Reader *reader);
static void Save(const TimestampReq &self, memgraph::slk::Builder *builder);
TimestampReq() = default;
explicit TimestampReq(const utils::UUID &uuid) : uuid{uuid} {}
explicit TimestampReq(const utils::UUID &main_uuid, const utils::UUID &uuid) : main_uuid(main_uuid), uuid{uuid} {}
utils::UUID main_uuid;
utils::UUID uuid;
};

View File

@ -1,4 +1,4 @@
// Copyright 2024 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -27,7 +27,7 @@ struct ISystemAction {
virtual void DoDurability() = 0;
/// Prepare the RPC payload that will be sent to all replicas clients
virtual bool DoReplication(memgraph::replication::ReplicationClient &client,
virtual bool DoReplication(memgraph::replication::ReplicationClient &client, const utils::UUID &main_uuid,
memgraph::replication::ReplicationEpoch const &epoch,
Transaction const &system_tx) const = 0;

View File

@ -99,7 +99,7 @@ struct DoReplication {
auto sync_status = AllSyncReplicaStatus::AllCommitsConfirmed;
for (auto &client : main_data_.registered_replicas_) {
bool completed = action.DoReplication(client, main_data_.epoch_, system_tx);
bool completed = action.DoReplication(client, main_data_.uuid_, main_data_.epoch_, system_tx);
if (!completed && client.mode_ == replication_coordination_glue::ReplicationMode::SYNC) {
sync_status = AllSyncReplicaStatus::SomeCommitsUnconfirmed;
}

View File

@ -97,12 +97,16 @@ enum class TypeId : uint64_t {
REP_UPDATE_AUTH_DATA_RES,
REP_DROP_AUTH_DATA_REQ,
REP_DROP_AUTH_DATA_RES,
REP_TRY_SET_MAIN_UUID_REQ,
REP_TRY_SET_MAIN_UUID_RES,
// Coordinator
COORD_FAILOVER_REQ,
COORD_FAILOVER_RES,
COORD_SET_REPL_MAIN_REQ,
COORD_SET_REPL_MAIN_RES,
COORD_SWAP_UUID_REQ,
COORD_SWAP_UUID_RES,
// AST
AST_LABELIX = 3000,

View File

@ -3,6 +3,7 @@ find_package(gflags REQUIRED)
copy_e2e_python_files(ha_experimental coordinator.py)
copy_e2e_python_files(ha_experimental automatic_failover.py)
copy_e2e_python_files(ha_experimental manual_setting_replicas.py)
copy_e2e_python_files(ha_experimental not_replicate_from_old_main.py)
copy_e2e_python_files(ha_experimental common.py)
copy_e2e_python_files(ha_experimental workloads.yaml)

View File

@ -13,6 +13,7 @@ import os
import shutil
import sys
import tempfile
import time
import interactive_mg_runner
import pytest
@ -131,6 +132,7 @@ def test_replication_works_on_failover():
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_on_new_main = [
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
("instance_3", "127.0.0.1:10003", "sync", 0, 0, "ready"),
@ -141,8 +143,8 @@ def test_replication_works_on_failover():
execute_and_fetch_all(new_main_cursor, "CREATE ();")
# 6
alive_replica_cursror = connect(host="localhost", port=7689).cursor()
res = execute_and_fetch_all(alive_replica_cursror, "MATCH (n) RETURN count(n) as count;")[0][0]
alive_replica_cursor = connect(host="localhost", port=7689).cursor()
res = execute_and_fetch_all(alive_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be replicated"
interactive_mg_runner.stop_all(MEMGRAPH_INSTANCES_DESCRIPTION)
@ -344,65 +346,60 @@ def test_automatic_failover_main_back_as_replica():
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance3)
def test_automatic_failover_main_back_as_main():
def test_replica_instance_restarts_replication_works():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
cursor = connect(host="localhost", port=7690).cursor()
coord_cursor = connect(host="localhost", port=7690).cursor()
def show_repl_cluster():
return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")))
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
expected_data_all_down = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", False, "unknown"),
("instance_3", "127.0.0.1:10013", False, "unknown"),
]
mg_sleep_and_assert(expected_data_all_down, retrieve_data_show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_data_main_back = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", False, "unknown"),
("instance_3", "127.0.0.1:10013", True, "main"),
]
mg_sleep_and_assert(expected_data_main_back, retrieve_data_show_repl_cluster)
instance3_cursor = connect(host="localhost", port=7687).cursor()
def retrieve_data_show_repl_role_instance3():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
expected_data_replicas_back = [
expected_data_up = [
("instance_1", "127.0.0.1:10011", True, "replica"),
("instance_2", "127.0.0.1:10012", True, "replica"),
("instance_3", "127.0.0.1:10013", True, "main"),
]
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
mg_sleep_and_assert(expected_data_replicas_back, retrieve_data_show_repl_cluster)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
expected_data_down = [
("instance_1", "127.0.0.1:10011", False, "unknown"),
("instance_2", "127.0.0.1:10012", True, "replica"),
("instance_3", "127.0.0.1:10013", True, "main"),
]
mg_sleep_and_assert(expected_data_down, show_repl_cluster)
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
expected_data_on_main_show_replicas = [
("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"),
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
]
instance3_cursor = connect(host="localhost", port=7687).cursor()
instance1_cursor = connect(host="localhost", port=7688).cursor()
instance2_cursor = connect(host="localhost", port=7689).cursor()
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICAS;")))
mg_sleep_and_assert(expected_data_on_main_show_replicas, retrieve_data_show_repl_role_instance1)
def retrieve_data_show_repl_role_instance1():
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
def retrieve_data_show_repl_role_instance2():
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;")))
expected_data_replica = [("replica",)]
mg_sleep_and_assert(expected_data_replica, retrieve_data_show_repl_role_instance1)
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1)
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
execute_and_fetch_all(instance3_cursor, "CREATE ();")
def retrieve_data_replica():
return execute_and_fetch_all(instance1_cursor, "MATCH (n) RETURN count(n);")[0][0]
expected_data_replica = 1
mg_sleep_and_assert(expected_data_replica, retrieve_data_replica)
if __name__ == "__main__":

View File

@ -0,0 +1,117 @@
# Copyright 2024 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import os
import sys
import interactive_mg_runner
import pytest
from common import execute_and_fetch_all
from mg_utils import mg_sleep_and_assert
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
interactive_mg_runner.PROJECT_DIR = os.path.normpath(
os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..")
)
interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build"))
interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph"))
MEMGRAPH_FIRST_CLUSTER_DESCRIPTION = {
"shared_replica": {
"args": ["--bolt-port", "7688", "--log-level", "TRACE"],
"log_file": "replica2.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
},
"main1": {
"args": ["--bolt-port", "7687", "--log-level", "TRACE"],
"log_file": "main.log",
"setup_queries": ["REGISTER REPLICA shared_replica SYNC TO '127.0.0.1:10001' ;"],
},
}
MEMGRAPH_INSTANCES_DESCRIPTION = {
"replica": {
"args": ["--bolt-port", "7689", "--log-level", "TRACE"],
"log_file": "replica.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
},
"main_2": {
"args": ["--bolt-port", "7690", "--log-level", "TRACE"],
"log_file": "main_2.log",
"setup_queries": [
"REGISTER REPLICA shared_replica SYNC TO '127.0.0.1:10001' ;",
"REGISTER REPLICA replica SYNC TO '127.0.0.1:10002' ; ",
],
},
}
def test_replication_works_on_failover(connection):
# Goal of this test is to check that after changing `shared_replica`
# to be part of new cluster, `main` (old cluster) can't write any more to it
# 1
interactive_mg_runner.start_all_keep_others(MEMGRAPH_FIRST_CLUSTER_DESCRIPTION)
# 2
main_cursor = connection(7687, "main1").cursor()
expected_data_on_main = [
("shared_replica", "127.0.0.1:10001", "sync", 0, 0, "ready"),
]
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
assert actual_data_on_main == expected_data_on_main
# 3
interactive_mg_runner.start_all_keep_others(MEMGRAPH_INSTANCES_DESCRIPTION)
# 4
new_main_cursor = connection(7690, "main_2").cursor()
def retrieve_data_show_replicas():
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
expected_data_on_new_main = [
("replica", "127.0.0.1:10002", "sync", 0, 0, "ready"),
("shared_replica", "127.0.0.1:10001", "sync", 0, 0, "ready"),
]
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
# 5
shared_replica_cursor = connection(7688, "shared_replica").cursor()
with pytest.raises(Exception) as e:
execute_and_fetch_all(main_cursor, "CREATE ();")
assert (
str(e.value)
== "Replication Exception: At least one SYNC replica has not confirmed committing last transaction. Check the status of the replicas using 'SHOW REPLICAS' query."
)
res = execute_and_fetch_all(main_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be created"
res = execute_and_fetch_all(shared_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 0, "Vertex shouldn't be replicated"
# 7
execute_and_fetch_all(new_main_cursor, "CREATE ();")
res = execute_and_fetch_all(new_main_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be created"
res = execute_and_fetch_all(shared_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0]
assert res == 1, "Vertex should be replicated"
interactive_mg_runner.stop_all()
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -35,3 +35,7 @@ workloads:
- name: "Disabled manual setting of replication cluster"
binary: "tests/e2e/pytest_runner.sh"
args: ["high_availability_experimental/manual_setting_replicas.py"]
- name: "Not replicate from old main"
binary: "tests/e2e/pytest_runner.sh"
args: ["high_availability_experimental/not_replicate_from_old_main.py"]

View File

@ -208,6 +208,11 @@ def start_all(context, procdir="", keep_directories=True):
start_instance(context, key, procdir)
def start_all_keep_others(context, procdir="", keep_directories=True):
for key, _ in context.items():
start_instance(context, key, procdir)
def start(context, name, procdir=""):
if name != "all":
start_instance(context, name, procdir)

View File

@ -13,6 +13,7 @@
#include "replication/state.hpp"
#include "replication/status.hpp"
#include "utils/logging.hpp"
#include "utils/uuid.hpp"
#include <gtest/gtest.h>
#include <fstream>
@ -48,6 +49,17 @@ TEST(ReplicationDurability, V2Main) {
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V3Main) {
auto const role_entry = ReplicationRoleEntry{
.version = DurabilityVersion::V3,
.role = MainRole{.epoch = ReplicationEpoch{"TEST_STRING"}, .main_uuid = memgraph::utils::UUID{}}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V1Replica) {
auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V1,
@ -74,6 +86,33 @@ TEST(ReplicationDurability, V2Replica) {
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V3ReplicaNoMain) {
auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V3,
.role = ReplicaRole{
.config = ReplicationServerConfig{.ip_address = "000.123.456.789", .port = 2023},
}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, V3ReplicaMain) {
auto const role_entry =
ReplicationRoleEntry{.version = DurabilityVersion::V2,
.role = ReplicaRole{
.config = ReplicationServerConfig{.ip_address = "000.123.456.789", .port = 2023},
.main_uuid = memgraph::utils::UUID{},
}};
nlohmann::json j;
to_json(j, role_entry);
ReplicationRoleEntry deser;
from_json(j, deser);
ASSERT_EQ(role_entry, deser);
}
TEST(ReplicationDurability, ReplicaEntrySync) {
using namespace std::chrono_literals;
using namespace std::string_literals;

View File

@ -142,17 +142,21 @@ TEST_F(ReplicationTest, BasicSynchronousReplicationTest) {
MinMemgraph replica(repl_conf);
auto replica_store_handler = replica.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
const auto &reg = main.repl_handler.TryRegisterReplica(ReplicationClientConfig{
.name = "REPLICA",
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
});
const auto &reg = main.repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA",
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true);
ASSERT_FALSE(reg.HasError()) << (int)reg.GetError();
// vertex create
@ -435,30 +439,38 @@ TEST_F(ReplicationTest, MultipleSynchronousReplicationTest) {
MinMemgraph replica1(repl_conf);
MinMemgraph replica2(repl2_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true)
.HasError());
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
},
true)
.HasError());
const auto *vertex_label = "label";
@ -585,17 +597,21 @@ TEST_F(ReplicationTest, RecoveryProcess) {
MinMemgraph replica(repl_conf);
auto replica_store_handler = replica.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true)
.HasError());
ASSERT_EQ(main.db.storage()->GetReplicaState(replicas[0]), ReplicaState::RECOVERY);
@ -660,18 +676,22 @@ TEST_F(ReplicationTest, BasicAsynchronousReplicationTest) {
MinMemgraph replica_async(repl_conf);
auto replica_store_handler = replica_async.repl_handler;
replica_store_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
});
replica_store_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = "REPLICA_ASYNC",
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = ports[1],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA_ASYNC",
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = ports[1],
},
true)
.HasError());
static constexpr size_t vertices_create_num = 10;
@ -706,33 +726,41 @@ TEST_F(ReplicationTest, EpochTest) {
MinMemgraph main(main_conf);
MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = 10001,
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = 10001,
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true)
.HasError());
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = 10001,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = 10001,
},
true)
.HasError());
std::optional<Gid> vertex_gid;
@ -761,12 +789,14 @@ TEST_F(ReplicationTest, EpochTest) {
ASSERT_TRUE(replica1.repl_handler.SetReplicationRoleMain());
ASSERT_FALSE(replica1.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = 10001,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = 10001,
},
true)
.HasError());
@ -789,17 +819,21 @@ TEST_F(ReplicationTest, EpochTest) {
ASSERT_FALSE(acc->Commit().HasError());
}
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true)
.HasError());
@ -824,35 +858,43 @@ TEST_F(ReplicationTest, ReplicationInformation) {
MinMemgraph replica1(repl_conf);
uint16_t replica1_port = 10001;
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = replica1_port,
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = replica1_port,
},
std::nullopt);
uint16_t replica2_port = 10002;
MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = replica2_port,
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = replica2_port,
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = replica1_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = replica1_port,
},
true)
.HasError());
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = replica2_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = replica2_port,
},
true)
.HasError());
@ -881,33 +923,41 @@ TEST_F(ReplicationTest, ReplicationReplicaWithExistingName) {
MinMemgraph replica1(repl_conf);
uint16_t replica1_port = 10001;
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = replica1_port,
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = replica1_port,
},
std::nullopt);
uint16_t replica2_port = 10002;
MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = replica2_port,
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = replica2_port,
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = replica1_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = replica1_port,
},
true)
.HasError());
ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = replica2_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = replica2_port,
},
true)
.GetError() == RegisterReplicaError::NAME_EXISTS);
}
@ -916,33 +966,41 @@ TEST_F(ReplicationTest, ReplicationReplicaWithExistingEndPoint) {
MinMemgraph main(main_conf);
MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = common_port,
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = common_port,
},
std::nullopt);
MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = common_port,
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = common_port,
},
std::nullopt);
ASSERT_FALSE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = common_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = common_port,
},
true)
.HasError());
ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = common_port,
})
.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::ASYNC,
.ip_address = local_host,
.port = common_port,
},
true)
.GetError() == RegisterReplicaError::ENDPOINT_EXISTS);
}
@ -965,30 +1023,38 @@ TEST_F(ReplicationTest, RestoringReplicationAtStartupAfterDroppingReplica) {
std::optional<MinMemgraph> main(main_config);
MinMemgraph replica1(replica1_config);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
MinMemgraph replica2(replica2_config);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
},
std::nullopt);
auto res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
});
auto res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true);
ASSERT_FALSE(res.HasError()) << (int)res.GetError();
res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
});
res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
},
true);
ASSERT_FALSE(res.HasError()) << (int)res.GetError();
auto replica_infos = main->db.storage()->ReplicasInfo();
@ -1022,30 +1088,38 @@ TEST_F(ReplicationTest, RestoringReplicationAtStartup) {
std::optional<MinMemgraph> main(main_config);
MinMemgraph replica1(repl_conf);
replica1.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
});
replica1.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[0],
},
std::nullopt);
MinMemgraph replica2(repl2_conf);
replica2.repl_handler.SetReplicationRoleReplica(ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
});
auto res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
});
replica2.repl_handler.SetReplicationRoleReplica(
ReplicationServerConfig{
.ip_address = local_host,
.port = ports[1],
},
std::nullopt);
auto res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[0],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true);
ASSERT_FALSE(res.HasError());
res = main->repl_handler.TryRegisterReplica(ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
});
res = main->repl_handler.TryRegisterReplica(
ReplicationClientConfig{
.name = replicas[1],
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[1],
},
true);
ASSERT_FALSE(res.HasError());
auto replica_infos = main->db.storage()->ReplicasInfo();
@ -1083,11 +1157,13 @@ TEST_F(ReplicationTest, AddingInvalidReplica) {
MinMemgraph main(main_conf);
ASSERT_TRUE(main.repl_handler
.TryRegisterReplica(ReplicationClientConfig{
.name = "REPLICA",
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
})
.GetError() == RegisterReplicaError::CONNECTION_FAILED);
.TryRegisterReplica(
ReplicationClientConfig{
.name = "REPLICA",
.mode = ReplicationMode::SYNC,
.ip_address = local_host,
.port = ports[0],
},
true)
.GetError() == RegisterReplicaError::ERROR_ACCEPTING_MAIN);
}