Shared (Un)Registration networking part with raft
This commit is contained in:
parent
56da8dbb7d
commit
2069cfefbd
@ -35,30 +35,30 @@ auto CoordinatorClusterState::InsertInstance(std::string const &instance_name, R
|
|||||||
instance_roles[instance_name] = role;
|
instance_roles[instance_name] = role;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto CoordinatorClusterState::DoAction(std::string const &instance_name, RaftLogAction log_action) -> void {
|
auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void {
|
||||||
switch (log_action) {
|
switch (log_action) {
|
||||||
case RaftLogAction::REGISTER_REPLICATION_INSTANCE: {
|
case RaftLogAction::REGISTER_REPLICATION_INSTANCE: {
|
||||||
|
auto const instance_name = std::get<CoordinatorClientConfig>(log_entry).instance_name;
|
||||||
instance_roles[instance_name] = ReplicationRole::REPLICA;
|
instance_roles[instance_name] = ReplicationRole::REPLICA;
|
||||||
spdlog::info("Instance {} registered", instance_name);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: {
|
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: {
|
||||||
|
auto const instance_name = std::get<std::string>(log_entry);
|
||||||
instance_roles.erase(instance_name);
|
instance_roles.erase(instance_name);
|
||||||
spdlog::info("Instance {} unregistered", instance_name);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case RaftLogAction::SET_INSTANCE_AS_MAIN: {
|
case RaftLogAction::SET_INSTANCE_AS_MAIN: {
|
||||||
|
auto const instance_name = std::get<std::string>(log_entry);
|
||||||
auto it = instance_roles.find(instance_name);
|
auto it = instance_roles.find(instance_name);
|
||||||
MG_ASSERT(it != instance_roles.end(), "Instance does not exist as part of raft state!");
|
MG_ASSERT(it != instance_roles.end(), "Instance does not exist as part of raft state!");
|
||||||
it->second = ReplicationRole::MAIN;
|
it->second = ReplicationRole::MAIN;
|
||||||
spdlog::info("Instance {} set as main", instance_name);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case RaftLogAction::SET_INSTANCE_AS_REPLICA: {
|
case RaftLogAction::SET_INSTANCE_AS_REPLICA: {
|
||||||
|
auto const instance_name = std::get<std::string>(log_entry);
|
||||||
auto it = instance_roles.find(instance_name);
|
auto it = instance_roles.find(instance_name);
|
||||||
MG_ASSERT(it != instance_roles.end(), "Instance does not exist as part of raft state!");
|
MG_ASSERT(it != instance_roles.end(), "Instance does not exist as part of raft state!");
|
||||||
it->second = ReplicationRole::REPLICA;
|
it->second = ReplicationRole::REPLICA;
|
||||||
spdlog::info("Instance {} set as replica", instance_name);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,7 @@ using nuraft::srv_config;
|
|||||||
CoordinatorInstance::CoordinatorInstance()
|
CoordinatorInstance::CoordinatorInstance()
|
||||||
: raft_state_(RaftState::MakeRaftState(
|
: raft_state_(RaftState::MakeRaftState(
|
||||||
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StartFrequentCheck); },
|
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StartFrequentCheck); },
|
||||||
|
<<<<<<< HEAD
|
||||||
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StopFrequentCheck); })) {
|
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StopFrequentCheck); })) {
|
||||||
<<<<<<< HEAD
|
<<<<<<< HEAD
|
||||||
<<<<<<< HEAD
|
<<<<<<< HEAD
|
||||||
@ -307,10 +308,17 @@ CoordinatorInstance::CoordinatorInstance()
|
|||||||
>>>>>>> 99c53148c (registration backed-up by raft)
|
>>>>>>> 99c53148c (registration backed-up by raft)
|
||||||
||||||| parent of 9081c5c24 (Optional main on unregistering)
|
||||||| parent of 9081c5c24 (Optional main on unregistering)
|
||||||
=======
|
=======
|
||||||
|
||||||| parent of fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
|
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StopFrequentCheck); })) {
|
||||||
|
=======
|
||||||
|
[this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StopFrequentCheck); },
|
||||||
|
[this](TRaftLog const &log_entry, RaftLogAction log_action) {
|
||||||
|
OnRaftCommitCallback(log_entry, log_action);
|
||||||
|
})) {
|
||||||
|
>>>>>>> fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
client_succ_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void {
|
client_succ_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void {
|
||||||
auto lock = std::unique_lock{self->coord_instance_lock_};
|
auto lock = std::unique_lock{self->coord_instance_lock_};
|
||||||
auto &repl_instance = self->FindReplicationInstance(repl_instance_name);
|
auto &repl_instance = self->FindReplicationInstance(repl_instance_name);
|
||||||
|
|
||||||
std::invoke(repl_instance.GetSuccessCallback(), self, repl_instance_name, std::move(lock));
|
std::invoke(repl_instance.GetSuccessCallback(), self, repl_instance_name, std::move(lock));
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -563,6 +571,7 @@ auto CoordinatorInstance::TryFailover() -> void {
|
|||||||
spdlog::info("Failover successful! Instance {} promoted to main.", new_main->InstanceName());
|
spdlog::info("Failover successful! Instance {} promoted to main.", new_main->InstanceName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
// TODO: (andi) Make sure you cannot put coordinator instance to the main
|
// TODO: (andi) Make sure you cannot put coordinator instance to the main
|
||||||
auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name)
|
auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name)
|
||||||
-> SetInstanceToMainCoordinatorStatus {
|
-> SetInstanceToMainCoordinatorStatus {
|
||||||
@ -768,6 +777,132 @@ void CoordinatorInstance::ReplicaFailCallback(std::string_view repl_instance_nam
|
|||||||
}
|
}
|
||||||
|
|
||||||
||||||| parent of b1af5ceeb (Move ReplRole to ClusterState)
|
||||||| parent of b1af5ceeb (Move ReplRole to ClusterState)
|
||||||
|
||||||| parent of fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
|
// TODO: (andi) Make sure you cannot put coordinator instance to the main
|
||||||
|
auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name)
|
||||||
|
-> SetInstanceToMainCoordinatorStatus {
|
||||||
|
auto lock = std::lock_guard{coord_instance_lock_};
|
||||||
|
|
||||||
|
if (raft_state_.MainExists()) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::MAIN_ALREADY_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const is_new_main = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() == instance_name;
|
||||||
|
};
|
||||||
|
auto new_main = std::ranges::find_if(repl_instances_, is_new_main);
|
||||||
|
|
||||||
|
if (new_main == repl_instances_.end()) {
|
||||||
|
spdlog::error("Instance {} not registered. Please register it using REGISTER INSTANCE {}", instance_name,
|
||||||
|
instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::NO_INSTANCE_WITH_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!raft_state_.RequestLeadership()) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::NOT_LEADER;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const res = raft_state_.AppendSetInstanceAsMain(instance_name);
|
||||||
|
if (!res->get_accepted()) {
|
||||||
|
spdlog::error(
|
||||||
|
"Failed to accept request for promoting instance {}. Most likely the reason is that the instance is not "
|
||||||
|
"the leader.",
|
||||||
|
instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::RAFT_COULD_NOT_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
|
||||||
|
spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code());
|
||||||
|
return SetInstanceToMainCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_main->PauseFrequentCheck();
|
||||||
|
utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }};
|
||||||
|
|
||||||
|
auto const is_not_new_main = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() != instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto const new_main_uuid = utils::UUID{};
|
||||||
|
|
||||||
|
for (auto &other_instance : repl_instances_ | ranges::views::filter(is_not_new_main)) {
|
||||||
|
if (!other_instance.SendSwapAndUpdateUUID(new_main_uuid)) {
|
||||||
|
spdlog::error(
|
||||||
|
fmt::format("Failed to swap uuid for instance {}, aborting failover", other_instance.InstanceName()));
|
||||||
|
return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto repl_clients_info = repl_instances_ | ranges::views::filter(is_not_new_main) |
|
||||||
|
ranges::views::transform(&ReplicationInstance::ReplicationClientInfo) |
|
||||||
|
ranges::to<ReplicationClientsInfo>();
|
||||||
|
|
||||||
|
if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), &CoordinatorInstance::MainSuccessCallback,
|
||||||
|
&CoordinatorInstance::MainFailCallback)) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
main_uuid_ = new_main_uuid;
|
||||||
|
spdlog::info("Instance {} promoted to main", instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig config)
|
||||||
|
-> RegisterInstanceCoordinatorStatus {
|
||||||
|
auto lock = std::lock_guard{coord_instance_lock_};
|
||||||
|
|
||||||
|
auto instance_name = config.instance_name;
|
||||||
|
|
||||||
|
auto const name_matches = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() == instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, name_matches)) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::NAME_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const socket_address_matches = [&config](ReplicationInstance const &instance) {
|
||||||
|
return instance.SocketAddress() == config.SocketAddress();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, socket_address_matches)) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::ENDPOINT_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!raft_state_.RequestLeadership()) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::NOT_LEADER;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const res = raft_state_.AppendRegisterReplicationInstance(instance_name);
|
||||||
|
if (!res->get_accepted()) {
|
||||||
|
spdlog::error(
|
||||||
|
"Failed to accept request for registering instance {}. Most likely the reason is that the instance is not "
|
||||||
|
"the "
|
||||||
|
"leader.",
|
||||||
|
config.instance_name);
|
||||||
|
return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
spdlog::info("Request for registering instance {} accepted", instance_name);
|
||||||
|
try {
|
||||||
|
repl_instances_.emplace_back(this, std::move(config), client_succ_cb_, client_fail_cb_,
|
||||||
|
&CoordinatorInstance::ReplicaSuccessCallback,
|
||||||
|
&CoordinatorInstance::ReplicaFailCallback);
|
||||||
|
} catch (CoordinatorRegisterInstanceException const &) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::RPC_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
|
||||||
|
spdlog::error("Failed to register instance {} with error code {}", instance_name, res->get_result_code());
|
||||||
|
return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
spdlog::info("Instance {} registered", instance_name);
|
||||||
|
return RegisterInstanceCoordinatorStatus::SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
=======
|
||||||
|
>>>>>>> fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name,
|
void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name,
|
||||||
std::unique_lock<utils::ResourceLock> lock) {
|
std::unique_lock<utils::ResourceLock> lock) {
|
||||||
MG_ASSERT(lock.owns_lock(), "Callback doesn't own lock");
|
MG_ASSERT(lock.owns_lock(), "Callback doesn't own lock");
|
||||||
@ -890,6 +1025,7 @@ void CoordinatorInstance::ReplicaFailCallback(std::string_view repl_instance_nam
|
|||||||
repl_instance.OnFailPing();
|
repl_instance.OnFailPing();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
=======
|
=======
|
||||||
>>>>>>> b1af5ceeb (Move ReplRole to ClusterState)
|
>>>>>>> b1af5ceeb (Move ReplRole to ClusterState)
|
||||||
||||||| parent of 9081c5c24 (Optional main on unregistering)
|
||||||| parent of 9081c5c24 (Optional main on unregistering)
|
||||||
@ -984,6 +1120,124 @@ void CoordinatorInstance::ReplicaFailCallback(std::string_view repl_instance_nam
|
|||||||
}
|
}
|
||||||
|
|
||||||
>>>>>>> 9081c5c24 (Optional main on unregistering)
|
>>>>>>> 9081c5c24 (Optional main on unregistering)
|
||||||
|
||||||| parent of fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
|
=======
|
||||||
|
// TODO: (andi) Make sure you cannot put coordinator instance to the main
|
||||||
|
auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name)
|
||||||
|
-> SetInstanceToMainCoordinatorStatus {
|
||||||
|
auto lock = std::lock_guard{coord_instance_lock_};
|
||||||
|
|
||||||
|
if (raft_state_.MainExists()) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::MAIN_ALREADY_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const is_new_main = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() == instance_name;
|
||||||
|
};
|
||||||
|
auto new_main = std::ranges::find_if(repl_instances_, is_new_main);
|
||||||
|
|
||||||
|
if (new_main == repl_instances_.end()) {
|
||||||
|
spdlog::error("Instance {} not registered. Please register it using REGISTER INSTANCE {}", instance_name,
|
||||||
|
instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::NO_INSTANCE_WITH_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!raft_state_.RequestLeadership()) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::NOT_LEADER;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const res = raft_state_.AppendSetInstanceAsMain(instance_name);
|
||||||
|
if (!res->get_accepted()) {
|
||||||
|
spdlog::error(
|
||||||
|
"Failed to accept request for promoting instance {}. Most likely the reason is that the instance is not "
|
||||||
|
"the leader.",
|
||||||
|
instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::RAFT_COULD_NOT_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
|
||||||
|
spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code());
|
||||||
|
return SetInstanceToMainCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_main->PauseFrequentCheck();
|
||||||
|
utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }};
|
||||||
|
|
||||||
|
auto const is_not_new_main = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() != instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto const new_main_uuid = utils::UUID{};
|
||||||
|
|
||||||
|
for (auto &other_instance : repl_instances_ | ranges::views::filter(is_not_new_main)) {
|
||||||
|
if (!other_instance.SendSwapAndUpdateUUID(new_main_uuid)) {
|
||||||
|
spdlog::error(
|
||||||
|
fmt::format("Failed to swap uuid for instance {}, aborting failover", other_instance.InstanceName()));
|
||||||
|
return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto repl_clients_info = repl_instances_ | ranges::views::filter(is_not_new_main) |
|
||||||
|
ranges::views::transform(&ReplicationInstance::ReplicationClientInfo) |
|
||||||
|
ranges::to<ReplicationClientsInfo>();
|
||||||
|
|
||||||
|
if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), &CoordinatorInstance::MainSuccessCallback,
|
||||||
|
&CoordinatorInstance::MainFailCallback)) {
|
||||||
|
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
main_uuid_ = new_main_uuid;
|
||||||
|
spdlog::info("Instance {} promoted to main", instance_name);
|
||||||
|
return SetInstanceToMainCoordinatorStatus::SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: (andi) Status of registration, maybe not all needed.
|
||||||
|
// Incorporate checking of replication socket address.
|
||||||
|
auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig config)
|
||||||
|
-> RegisterInstanceCoordinatorStatus {
|
||||||
|
auto lock = std::lock_guard{coord_instance_lock_};
|
||||||
|
|
||||||
|
auto const name_matches = [&config](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() == config.instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, name_matches)) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::NAME_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const socket_address_matches = [&config](ReplicationInstance const &instance) {
|
||||||
|
return instance.SocketAddress() == config.SocketAddress();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, socket_address_matches)) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::ENDPOINT_EXISTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!raft_state_.RequestLeadership()) {
|
||||||
|
return RegisterInstanceCoordinatorStatus::NOT_LEADER;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const res = raft_state_.AppendRegisterReplicationInstance(config);
|
||||||
|
if (!res->get_accepted()) {
|
||||||
|
spdlog::error(
|
||||||
|
"Failed to accept request for registering instance {}. Most likely the reason is that the instance is not "
|
||||||
|
"the "
|
||||||
|
"leader.",
|
||||||
|
config.instance_name);
|
||||||
|
return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
spdlog::info("Request for registering instance {} accepted", config.instance_name);
|
||||||
|
|
||||||
|
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
|
||||||
|
spdlog::error("Failed to register instance {} with error code {}", config.instance_name, res->get_result_code());
|
||||||
|
return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
return RegisterInstanceCoordinatorStatus::SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
>>>>>>> fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
auto CoordinatorInstance::UnregisterReplicationInstance(std::string instance_name)
|
auto CoordinatorInstance::UnregisterReplicationInstance(std::string instance_name)
|
||||||
-> UnregisterInstanceCoordinatorStatus {
|
-> UnregisterInstanceCoordinatorStatus {
|
||||||
auto lock = std::lock_guard{coord_instance_lock_};
|
auto lock = std::lock_guard{coord_instance_lock_};
|
||||||
@ -1024,16 +1278,6 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string instance_nam
|
|||||||
return UnregisterInstanceCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
return UnregisterInstanceCoordinatorStatus::RAFT_COULD_NOT_APPEND;
|
||||||
}
|
}
|
||||||
|
|
||||||
inst_to_remove->StopFrequentCheck();
|
|
||||||
if (auto curr_main = std::ranges::find_if(repl_instances_, is_main); curr_main != repl_instances_.end()) {
|
|
||||||
if (!curr_main->SendUnregisterReplicaRpc(instance_name)) {
|
|
||||||
// TODO: (andi) Restore state in the RAFT log if needed.
|
|
||||||
inst_to_remove->StartFrequentCheck();
|
|
||||||
return UnregisterInstanceCoordinatorStatus::RPC_FAILED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::erase_if(repl_instances_, name_matches);
|
|
||||||
|
|
||||||
return UnregisterInstanceCoordinatorStatus::SUCCESS;
|
return UnregisterInstanceCoordinatorStatus::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1045,6 +1289,7 @@ auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32
|
|||||||
<<<<<<< HEAD
|
<<<<<<< HEAD
|
||||||
<<<<<<< HEAD
|
<<<<<<< HEAD
|
||||||
<<<<<<< HEAD
|
<<<<<<< HEAD
|
||||||
|
<<<<<<< HEAD
|
||||||
auto CoordinatorInstance::GetMainUUID() const -> utils::UUID { return main_uuid_; }
|
auto CoordinatorInstance::GetMainUUID() const -> utils::UUID { return main_uuid_; }
|
||||||
|
|
||||||
// TODO: (andi) Add to the RAFT log.
|
// TODO: (andi) Add to the RAFT log.
|
||||||
@ -1142,5 +1387,67 @@ auto CoordinatorInstance::SetMainUUID(utils::UUID new_uuid) -> void { main_uuid_
|
|||||||
|
|
||||||
=======
|
=======
|
||||||
>>>>>>> 1b150ee92 (Address PR comments)
|
>>>>>>> 1b150ee92 (Address PR comments)
|
||||||
|
||||||| parent of fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
|
=======
|
||||||
|
auto CoordinatorInstance::OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void {
|
||||||
|
// TODO: (andi) Solve it locking scheme and write comment.
|
||||||
|
switch (log_action) {
|
||||||
|
case RaftLogAction::REGISTER_REPLICATION_INSTANCE: {
|
||||||
|
auto config = std::get<CoordinatorClientConfig>(log_entry);
|
||||||
|
auto *new_instance = &repl_instances_.emplace_back(this, std::move(config), client_succ_cb_, client_fail_cb_,
|
||||||
|
&CoordinatorInstance::ReplicaSuccessCallback,
|
||||||
|
&CoordinatorInstance::ReplicaFailCallback);
|
||||||
|
|
||||||
|
if (raft_state_.IsLeader()) {
|
||||||
|
if (!new_instance->SendDemoteToReplicaRpc()) {
|
||||||
|
throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica on registration.",
|
||||||
|
new_instance->InstanceName());
|
||||||
|
}
|
||||||
|
|
||||||
|
new_instance->StartFrequentCheck();
|
||||||
|
// TODO: (andi) Pinging for InstanceName() raft_state?
|
||||||
|
spdlog::info("Leader instance {} started frequent check on ", raft_state_.InstanceName(),
|
||||||
|
new_instance->InstanceName());
|
||||||
|
}
|
||||||
|
|
||||||
|
spdlog::info("Instance {} registered", new_instance->InstanceName());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: {
|
||||||
|
auto const instance_name = std::get<std::string>(log_entry);
|
||||||
|
|
||||||
|
auto &inst_to_remove = FindReplicationInstance(instance_name);
|
||||||
|
inst_to_remove.StopFrequentCheck();
|
||||||
|
|
||||||
|
auto const is_main = [this](ReplicationInstance const &instance) {
|
||||||
|
return raft_state_.IsMain(instance.InstanceName());
|
||||||
|
};
|
||||||
|
|
||||||
|
if (auto curr_main = std::ranges::find_if(repl_instances_, is_main); curr_main != repl_instances_.end()) {
|
||||||
|
if (!curr_main->SendUnregisterReplicaRpc(instance_name)) {
|
||||||
|
// TODO: (andi) Restore state in the RAFT log if needed.
|
||||||
|
inst_to_remove.StartFrequentCheck();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const name_matches = [&instance_name](ReplicationInstance const &instance) {
|
||||||
|
return instance.InstanceName() == instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::erase_if(repl_instances_, name_matches);
|
||||||
|
|
||||||
|
spdlog::info("Instance {} unregistered", instance_name);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case RaftLogAction::SET_INSTANCE_AS_MAIN: {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case RaftLogAction::SET_INSTANCE_AS_REPLICA: {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
>>>>>>> fab8d3d76 (Shared (Un)Registration networking part with raft)
|
||||||
} // namespace memgraph::coordination
|
} // namespace memgraph::coordination
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
|
CoordinatorStateMachine::CoordinatorStateMachine(OnRaftCommitCb raft_commit_cb) : raft_commit_cb_(raft_commit_cb) {}
|
||||||
|
|
||||||
auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); }
|
auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); }
|
||||||
|
|
||||||
auto CoordinatorStateMachine::IsMain(std::string const &instance_name) const -> bool {
|
auto CoordinatorStateMachine::IsMain(std::string const &instance_name) const -> bool {
|
||||||
@ -25,37 +27,54 @@ auto CoordinatorStateMachine::IsReplica(std::string const &instance_name) const
|
|||||||
return cluster_state_.IsReplica(instance_name);
|
return cluster_state_.IsReplica(instance_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto CoordinatorStateMachine::EncodeLogAction(std::string const &name, RaftLogAction log_action) -> ptr<buffer> {
|
auto CoordinatorStateMachine::CreateLog(std::string const &log) -> ptr<buffer> {
|
||||||
auto const str_log = [&name, log_action] {
|
ptr<buffer> log_buf = buffer::alloc(sizeof(uint32_t) + log.size());
|
||||||
switch (log_action) {
|
buffer_serializer bs(log_buf);
|
||||||
case RaftLogAction::REGISTER_REPLICATION_INSTANCE:
|
bs.put_str(log);
|
||||||
return "register_" + name;
|
return log_buf;
|
||||||
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE:
|
|
||||||
return "unregister_" + name;
|
|
||||||
case RaftLogAction::SET_INSTANCE_AS_MAIN:
|
|
||||||
return "promote_" + name;
|
|
||||||
case RaftLogAction::SET_INSTANCE_AS_REPLICA:
|
|
||||||
return "demote_" + name;
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
|
|
||||||
ptr<buffer> log = buffer::alloc(sizeof(uint32_t) + str_log.size());
|
|
||||||
buffer_serializer bs(log);
|
|
||||||
bs.put_str(str_log);
|
|
||||||
return log;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair<std::string, RaftLogAction> {
|
auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer> {
|
||||||
|
auto const str_log = fmt::format("{}*register", config.ToString());
|
||||||
|
return CreateLog(str_log);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto CoordinatorStateMachine::SerializeUnregisterInstance(std::string_view instance_name) -> ptr<buffer> {
|
||||||
|
auto const str_log = fmt::format("{}*unregister", instance_name);
|
||||||
|
return CreateLog(str_log);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto CoordinatorStateMachine::SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer> {
|
||||||
|
auto const str_log = fmt::format("{}*promote", instance_name);
|
||||||
|
return CreateLog(str_log);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto CoordinatorStateMachine::SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer> {
|
||||||
|
auto const str_log = fmt::format("{}*demote", instance_name);
|
||||||
|
return CreateLog(str_log);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction> {
|
||||||
buffer_serializer bs(data);
|
buffer_serializer bs(data);
|
||||||
|
|
||||||
auto const log_str = bs.get_str();
|
auto const log_str = bs.get_str();
|
||||||
auto const sep = log_str.find('_');
|
auto const sep = log_str.find('*');
|
||||||
auto const action = log_str.substr(0, sep);
|
auto const action = log_str.substr(sep + 1);
|
||||||
auto const name = log_str.substr(sep + 1);
|
auto const info = log_str.substr(0, sep);
|
||||||
|
|
||||||
spdlog::info("Decoding log: {} {}", name, action);
|
if (action == "register") {
|
||||||
|
return {CoordinatorClientConfig::FromString(info), RaftLogAction::REGISTER_REPLICATION_INSTANCE};
|
||||||
return {name, ParseRaftLogAction(action)};
|
}
|
||||||
|
if (action == "unregister") {
|
||||||
|
return {info, RaftLogAction::UNREGISTER_REPLICATION_INSTANCE};
|
||||||
|
}
|
||||||
|
if (action == "promote") {
|
||||||
|
return {info, RaftLogAction::SET_INSTANCE_AS_MAIN};
|
||||||
|
}
|
||||||
|
if (action == "demote") {
|
||||||
|
return {info, RaftLogAction::SET_INSTANCE_AS_REPLICA};
|
||||||
|
}
|
||||||
|
throw std::runtime_error("Unknown action");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto CoordinatorStateMachine::pre_commit(ulong const /*log_idx*/, buffer & /*data*/) -> ptr<buffer> { return nullptr; }
|
auto CoordinatorStateMachine::pre_commit(ulong const /*log_idx*/, buffer & /*data*/) -> ptr<buffer> { return nullptr; }
|
||||||
@ -64,12 +83,12 @@ auto CoordinatorStateMachine::commit(ulong const log_idx, buffer &data) -> ptr<b
|
|||||||
// TODO: (andi) think about locking scheme
|
// TODO: (andi) think about locking scheme
|
||||||
buffer_serializer bs(data);
|
buffer_serializer bs(data);
|
||||||
|
|
||||||
auto const [instance_name, log_action] = DecodeLog(data);
|
auto const [parsed_data, log_action] = DecodeLog(data);
|
||||||
spdlog::info("commit {} : {} {}", log_idx, instance_name, log_action);
|
cluster_state_.DoAction(parsed_data, log_action);
|
||||||
cluster_state_.DoAction(instance_name, log_action);
|
std::invoke(raft_commit_cb_, parsed_data, log_action);
|
||||||
// push_back(ReplicationInstance)
|
|
||||||
|
|
||||||
last_committed_idx_ = log_idx;
|
last_committed_idx_ = log_idx;
|
||||||
|
// TODO: (andi) Don't return nullptr
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,12 +14,15 @@
|
|||||||
#ifdef MG_ENTERPRISE
|
#ifdef MG_ENTERPRISE
|
||||||
|
|
||||||
#include "replication_coordination_glue/mode.hpp"
|
#include "replication_coordination_glue/mode.hpp"
|
||||||
|
#include "utils/string.hpp"
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
|
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
|
||||||
@ -32,14 +35,31 @@ struct CoordinatorClientConfig {
|
|||||||
std::chrono::seconds instance_down_timeout_sec{5};
|
std::chrono::seconds instance_down_timeout_sec{5};
|
||||||
std::chrono::seconds instance_get_uuid_frequency_sec{10};
|
std::chrono::seconds instance_get_uuid_frequency_sec{10};
|
||||||
|
|
||||||
auto SocketAddress() const -> std::string { return ip_address + ":" + std::to_string(port); }
|
auto SocketAddress() const -> std::string { return fmt::format("{}:{}", ip_address, port); }
|
||||||
|
|
||||||
struct ReplicationClientInfo {
|
struct ReplicationClientInfo {
|
||||||
|
// TODO: (andi) Do we even need here instance_name for this struct?
|
||||||
std::string instance_name;
|
std::string instance_name;
|
||||||
replication_coordination_glue::ReplicationMode replication_mode{};
|
replication_coordination_glue::ReplicationMode replication_mode{};
|
||||||
std::string replication_ip_address;
|
std::string replication_ip_address;
|
||||||
uint16_t replication_port{};
|
uint16_t replication_port{};
|
||||||
|
|
||||||
|
auto ToString() const -> std::string {
|
||||||
|
return fmt::format("{}#{}#{}#{}", instance_name, replication_ip_address, replication_port,
|
||||||
|
replication_coordination_glue::ReplicationModeToString(replication_mode));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: (andi) How can I make use of monadic parsers here?
|
||||||
|
static auto FromString(std::string_view log) -> ReplicationClientInfo {
|
||||||
|
ReplicationClientInfo replication_client_info;
|
||||||
|
auto splitted = utils::Split(log, "#");
|
||||||
|
replication_client_info.instance_name = splitted[0];
|
||||||
|
replication_client_info.replication_ip_address = splitted[1];
|
||||||
|
replication_client_info.replication_port = std::stoi(splitted[2]);
|
||||||
|
replication_client_info.replication_mode = replication_coordination_glue::ReplicationModeFromString(splitted[3]);
|
||||||
|
return replication_client_info;
|
||||||
|
}
|
||||||
|
|
||||||
friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default;
|
friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -54,6 +74,25 @@ struct CoordinatorClientConfig {
|
|||||||
|
|
||||||
std::optional<SSL> ssl;
|
std::optional<SSL> ssl;
|
||||||
|
|
||||||
|
auto ToString() const -> std::string {
|
||||||
|
return fmt::format("{}|{}|{}|{}|{}|{}|{}", instance_name, ip_address, port,
|
||||||
|
instance_health_check_frequency_sec.count(), instance_down_timeout_sec.count(),
|
||||||
|
instance_get_uuid_frequency_sec.count(), replication_client_info.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
static auto FromString(std::string_view log) -> CoordinatorClientConfig {
|
||||||
|
CoordinatorClientConfig config;
|
||||||
|
auto splitted = utils::Split(log, "|");
|
||||||
|
config.instance_name = splitted[0];
|
||||||
|
config.ip_address = splitted[1];
|
||||||
|
config.port = std::stoi(splitted[2]);
|
||||||
|
config.instance_health_check_frequency_sec = std::chrono::seconds(std::stoi(splitted[3]));
|
||||||
|
config.instance_down_timeout_sec = std::chrono::seconds(std::stoi(splitted[4]));
|
||||||
|
config.instance_get_uuid_frequency_sec = std::chrono::seconds(std::stoi(splitted[5]));
|
||||||
|
config.replication_client_info = ReplicationClientInfo::FromString(splitted[6]);
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default;
|
friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -48,6 +48,11 @@ class CoordinatorInstance {
|
|||||||
|
|
||||||
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void;
|
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void;
|
||||||
|
|
||||||
|
private:
|
||||||
|
HealthCheckClientCallback client_succ_cb_, client_fail_cb_;
|
||||||
|
|
||||||
|
auto OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void;
|
||||||
|
|
||||||
auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &;
|
auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &;
|
||||||
|
|
||||||
void MainFailCallback(std::string_view);
|
void MainFailCallback(std::string_view);
|
||||||
@ -60,9 +65,6 @@ class CoordinatorInstance {
|
|||||||
|
|
||||||
static auto ChooseMostUpToDateInstance(const std::vector<InstanceNameDbHistories> &) -> NewMainRes;
|
static auto ChooseMostUpToDateInstance(const std::vector<InstanceNameDbHistories> &) -> NewMainRes;
|
||||||
|
|
||||||
private:
|
|
||||||
HealthCheckClientCallback client_succ_cb_, client_fail_cb_;
|
|
||||||
|
|
||||||
// NOTE: Only leader should have repl_instances_, not followers.
|
// NOTE: Only leader should have repl_instances_, not followers.
|
||||||
// NOTE: Must be std::list because we rely on pointer stability.
|
// NOTE: Must be std::list because we rely on pointer stability.
|
||||||
std::list<ReplicationInstance> repl_instances_;
|
std::list<ReplicationInstance> repl_instances_;
|
||||||
|
@ -21,6 +21,9 @@
|
|||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
|
class CoordinatorInstance;
|
||||||
|
struct CoordinatorClientConfig;
|
||||||
|
|
||||||
using BecomeLeaderCb = std::function<void()>;
|
using BecomeLeaderCb = std::function<void()>;
|
||||||
using BecomeFollowerCb = std::function<void()>;
|
using BecomeFollowerCb = std::function<void()>;
|
||||||
|
|
||||||
@ -36,8 +39,9 @@ using raft_result = nuraft::cmd_result<ptr<buffer>>;
|
|||||||
|
|
||||||
class RaftState {
|
class RaftState {
|
||||||
private:
|
private:
|
||||||
explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id,
|
explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb,
|
||||||
uint32_t raft_port, std::string raft_address);
|
OnRaftCommitCb raft_commit_cb, uint32_t raft_server_id, uint32_t raft_port,
|
||||||
|
std::string raft_address);
|
||||||
|
|
||||||
auto InitRaftServer() -> void;
|
auto InitRaftServer() -> void;
|
||||||
|
|
||||||
@ -49,7 +53,8 @@ class RaftState {
|
|||||||
RaftState &operator=(RaftState &&other) noexcept = default;
|
RaftState &operator=(RaftState &&other) noexcept = default;
|
||||||
~RaftState();
|
~RaftState();
|
||||||
|
|
||||||
static auto MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb) -> RaftState;
|
static auto MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb,
|
||||||
|
OnRaftCommitCb raft_commit_cb) -> RaftState;
|
||||||
|
|
||||||
auto InstanceName() const -> std::string;
|
auto InstanceName() const -> std::string;
|
||||||
auto RaftSocketAddress() const -> std::string;
|
auto RaftSocketAddress() const -> std::string;
|
||||||
@ -65,10 +70,10 @@ class RaftState {
|
|||||||
auto IsReplica(std::string const &instance_name) const -> bool;
|
auto IsReplica(std::string const &instance_name) const -> bool;
|
||||||
|
|
||||||
/// TODO: (andi) Add log in the name of methods
|
/// TODO: (andi) Add log in the name of methods
|
||||||
auto AppendRegisterReplicationInstance(std::string const &instance_name) -> ptr<raft_result>;
|
auto AppendRegisterReplicationInstance(CoordinatorClientConfig const &config) -> ptr<raft_result>;
|
||||||
auto AppendUnregisterReplicationInstance(std::string const &instance_name) -> ptr<raft_result>;
|
auto AppendUnregisterReplicationInstance(std::string_view instance_name) -> ptr<raft_result>;
|
||||||
auto AppendSetInstanceAsMain(std::string const &instance_name) -> ptr<raft_result>;
|
auto AppendSetInstanceAsMain(std::string_view instance_name) -> ptr<raft_result>;
|
||||||
auto AppendSetInstanceAsReplica(std::string const &instance_name) -> ptr<raft_result>;
|
auto AppendSetInstanceAsReplica(std::string_view instance_name) -> ptr<raft_result>;
|
||||||
|
|
||||||
auto GetInstances() const -> std::vector<std::pair<std::string, std::string>>;
|
auto GetInstances() const -> std::vector<std::pair<std::string, std::string>>;
|
||||||
|
|
||||||
|
@ -17,11 +17,12 @@
|
|||||||
#include "coordination/coordinator_exceptions.hpp"
|
#include "coordination/coordinator_exceptions.hpp"
|
||||||
#include "replication_coordination_glue/role.hpp"
|
#include "replication_coordination_glue/role.hpp"
|
||||||
|
|
||||||
#include <libnuraft/nuraft.hxx>
|
|
||||||
#include "utils/resource_lock.hpp"
|
#include "utils/resource_lock.hpp"
|
||||||
#include "utils/result.hpp"
|
#include "utils/result.hpp"
|
||||||
#include "utils/uuid.hpp"
|
#include "utils/uuid.hpp"
|
||||||
|
|
||||||
|
#include <libnuraft/nuraft.hxx>
|
||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
class CoordinatorInstance;
|
class CoordinatorInstance;
|
||||||
@ -54,6 +55,8 @@ class ReplicationInstance {
|
|||||||
|
|
||||||
auto PromoteToMain(utils::UUID uuid, ReplicationClientsInfo repl_clients_info,
|
auto PromoteToMain(utils::UUID uuid, ReplicationClientsInfo repl_clients_info,
|
||||||
HealthCheckInstanceCallback main_succ_cb, HealthCheckInstanceCallback main_fail_cb) -> bool;
|
HealthCheckInstanceCallback main_succ_cb, HealthCheckInstanceCallback main_fail_cb) -> bool;
|
||||||
|
|
||||||
|
auto SendDemoteToReplicaRpc() -> bool;
|
||||||
auto DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb, HealthCheckInstanceCallback replica_fail_cb)
|
auto DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb, HealthCheckInstanceCallback replica_fail_cb)
|
||||||
-> bool;
|
-> bool;
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#ifdef MG_ENTERPRISE
|
#ifdef MG_ENTERPRISE
|
||||||
|
|
||||||
|
#include "coordination/coordinator_config.hpp"
|
||||||
#include "nuraft/raft_log_action.hpp"
|
#include "nuraft/raft_log_action.hpp"
|
||||||
#include "replication_coordination_glue/role.hpp"
|
#include "replication_coordination_glue/role.hpp"
|
||||||
#include "utils/rw_lock.hpp"
|
#include "utils/rw_lock.hpp"
|
||||||
@ -23,9 +24,12 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
|
using TRaftLog = std::variant<CoordinatorClientConfig, std::string>;
|
||||||
|
|
||||||
using nuraft::buffer;
|
using nuraft::buffer;
|
||||||
using nuraft::buffer_serializer;
|
using nuraft::buffer_serializer;
|
||||||
using nuraft::ptr;
|
using nuraft::ptr;
|
||||||
@ -40,7 +44,7 @@ class CoordinatorClusterState {
|
|||||||
|
|
||||||
auto InsertInstance(std::string const &instance_name, replication_coordination_glue::ReplicationRole role) -> void;
|
auto InsertInstance(std::string const &instance_name, replication_coordination_glue::ReplicationRole role) -> void;
|
||||||
|
|
||||||
auto DoAction(std::string const &instance_name, RaftLogAction log_action) -> void;
|
auto DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void;
|
||||||
|
|
||||||
auto Serialize(ptr<buffer> &data) -> void;
|
auto Serialize(ptr<buffer> &data) -> void;
|
||||||
|
|
||||||
|
@ -13,14 +13,19 @@
|
|||||||
|
|
||||||
#ifdef MG_ENTERPRISE
|
#ifdef MG_ENTERPRISE
|
||||||
|
|
||||||
|
#include "coordination/coordinator_config.hpp"
|
||||||
#include "nuraft/coordinator_cluster_state.hpp"
|
#include "nuraft/coordinator_cluster_state.hpp"
|
||||||
#include "nuraft/raft_log_action.hpp"
|
#include "nuraft/raft_log_action.hpp"
|
||||||
|
|
||||||
#include <spdlog/spdlog.h>
|
#include <spdlog/spdlog.h>
|
||||||
#include <libnuraft/nuraft.hxx>
|
#include <libnuraft/nuraft.hxx>
|
||||||
|
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
namespace memgraph::coordination {
|
namespace memgraph::coordination {
|
||||||
|
|
||||||
|
using OnRaftCommitCb = std::function<void(TRaftLog, RaftLogAction)>;
|
||||||
|
|
||||||
using nuraft::async_result;
|
using nuraft::async_result;
|
||||||
using nuraft::buffer;
|
using nuraft::buffer;
|
||||||
using nuraft::buffer_serializer;
|
using nuraft::buffer_serializer;
|
||||||
@ -32,7 +37,7 @@ using nuraft::state_machine;
|
|||||||
|
|
||||||
class CoordinatorStateMachine : public state_machine {
|
class CoordinatorStateMachine : public state_machine {
|
||||||
public:
|
public:
|
||||||
CoordinatorStateMachine() = default;
|
explicit CoordinatorStateMachine(OnRaftCommitCb raft_commit_cb);
|
||||||
CoordinatorStateMachine(CoordinatorStateMachine const &) = delete;
|
CoordinatorStateMachine(CoordinatorStateMachine const &) = delete;
|
||||||
CoordinatorStateMachine &operator=(CoordinatorStateMachine const &) = delete;
|
CoordinatorStateMachine &operator=(CoordinatorStateMachine const &) = delete;
|
||||||
CoordinatorStateMachine(CoordinatorStateMachine &&) = delete;
|
CoordinatorStateMachine(CoordinatorStateMachine &&) = delete;
|
||||||
@ -43,9 +48,13 @@ class CoordinatorStateMachine : public state_machine {
|
|||||||
auto IsMain(std::string const &instance_name) const -> bool;
|
auto IsMain(std::string const &instance_name) const -> bool;
|
||||||
auto IsReplica(std::string const &instance_name) const -> bool;
|
auto IsReplica(std::string const &instance_name) const -> bool;
|
||||||
|
|
||||||
static auto EncodeLogAction(std::string const &instance_name, RaftLogAction log_action) -> ptr<buffer>;
|
static auto CreateLog(std::string const &log) -> ptr<buffer>;
|
||||||
|
static auto SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer>;
|
||||||
|
static auto SerializeUnregisterInstance(std::string_view instance_name) -> ptr<buffer>;
|
||||||
|
static auto SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer>;
|
||||||
|
static auto SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer>;
|
||||||
|
|
||||||
static auto DecodeLog(buffer &data) -> std::pair<std::string, RaftLogAction>;
|
static auto DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction>;
|
||||||
|
|
||||||
auto pre_commit(ulong log_idx, buffer &data) -> ptr<buffer> override;
|
auto pre_commit(ulong log_idx, buffer &data) -> ptr<buffer> override;
|
||||||
|
|
||||||
@ -86,8 +95,7 @@ class CoordinatorStateMachine : public state_machine {
|
|||||||
|
|
||||||
CoordinatorClusterState cluster_state_;
|
CoordinatorClusterState cluster_state_;
|
||||||
|
|
||||||
|
// mutable utils::RWLock lock{utils::RWLock::Priority::READ};
|
||||||
//mutable utils::RWLock lock{utils::RWLock::Priority::READ};
|
|
||||||
|
|
||||||
std::atomic<uint64_t> last_committed_idx_{0};
|
std::atomic<uint64_t> last_committed_idx_{0};
|
||||||
|
|
||||||
@ -95,6 +103,8 @@ class CoordinatorStateMachine : public state_machine {
|
|||||||
std::map<uint64_t, ptr<SnapshotCtx>> snapshots_;
|
std::map<uint64_t, ptr<SnapshotCtx>> snapshots_;
|
||||||
std::mutex snapshots_lock_;
|
std::mutex snapshots_lock_;
|
||||||
|
|
||||||
|
OnRaftCommitCb raft_commit_cb_;
|
||||||
|
|
||||||
ptr<snapshot> last_snapshot_;
|
ptr<snapshot> last_snapshot_;
|
||||||
std::mutex last_snapshot_lock_;
|
std::mutex last_snapshot_lock_;
|
||||||
};
|
};
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "coordination/raft_state.hpp"
|
#include "coordination/raft_state.hpp"
|
||||||
|
|
||||||
|
#include "coordination/coordinator_config.hpp"
|
||||||
#include "coordination/coordinator_exceptions.hpp"
|
#include "coordination/coordinator_exceptions.hpp"
|
||||||
#include "utils/counter.hpp"
|
#include "utils/counter.hpp"
|
||||||
|
|
||||||
@ -29,12 +30,13 @@ using nuraft::raft_server;
|
|||||||
using nuraft::srv_config;
|
using nuraft::srv_config;
|
||||||
using raft_result = cmd_result<ptr<buffer>>;
|
using raft_result = cmd_result<ptr<buffer>>;
|
||||||
|
|
||||||
RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id,
|
RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb,
|
||||||
uint32_t raft_port, std::string raft_address)
|
OnRaftCommitCb raft_commit_cb, uint32_t raft_server_id, uint32_t raft_port,
|
||||||
|
std::string raft_address)
|
||||||
: raft_server_id_(raft_server_id),
|
: raft_server_id_(raft_server_id),
|
||||||
raft_port_(raft_port),
|
raft_port_(raft_port),
|
||||||
raft_address_(std::move(raft_address)),
|
raft_address_(std::move(raft_address)),
|
||||||
state_machine_(cs_new<CoordinatorStateMachine>()),
|
state_machine_(cs_new<CoordinatorStateMachine>(raft_commit_cb)),
|
||||||
state_manager_(
|
state_manager_(
|
||||||
cs_new<CoordinatorStateManager>(raft_server_id_, raft_address_ + ":" + std::to_string(raft_port_))),
|
cs_new<CoordinatorStateManager>(raft_server_id_, raft_address_ + ":" + std::to_string(raft_port_))),
|
||||||
logger_(nullptr),
|
logger_(nullptr),
|
||||||
@ -88,7 +90,8 @@ auto RaftState::InitRaftServer() -> void {
|
|||||||
throw RaftServerStartException("Failed to initialize raft server on {}:{}", raft_address_, raft_port_);
|
throw RaftServerStartException("Failed to initialize raft server on {}:{}", raft_address_, raft_port_);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto RaftState::MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb) -> RaftState {
|
auto RaftState::MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb,
|
||||||
|
OnRaftCommitCb raft_commit_cb) -> RaftState {
|
||||||
uint32_t raft_server_id{0};
|
uint32_t raft_server_id{0};
|
||||||
uint32_t raft_port{0};
|
uint32_t raft_port{0};
|
||||||
try {
|
try {
|
||||||
@ -98,8 +101,8 @@ auto RaftState::MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb
|
|||||||
throw RaftCouldNotParseFlagsException("Failed to parse flags: {}", e.what());
|
throw RaftCouldNotParseFlagsException("Failed to parse flags: {}", e.what());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto raft_state =
|
auto raft_state = RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_commit_cb,
|
||||||
RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_server_id, raft_port, "127.0.0.1");
|
raft_server_id, raft_port, "127.0.0.1");
|
||||||
raft_state.InitRaftServer();
|
raft_state.InitRaftServer();
|
||||||
return raft_state;
|
return raft_state;
|
||||||
}
|
}
|
||||||
@ -129,24 +132,23 @@ auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); }
|
|||||||
|
|
||||||
auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); }
|
auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); }
|
||||||
|
|
||||||
auto RaftState::AppendRegisterReplicationInstance(std::string const &instance_name) -> ptr<raft_result> {
|
auto RaftState::AppendRegisterReplicationInstance(CoordinatorClientConfig const &config) -> ptr<raft_result> {
|
||||||
auto new_log = CoordinatorStateMachine::EncodeLogAction(instance_name, RaftLogAction::REGISTER_REPLICATION_INSTANCE);
|
auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config);
|
||||||
return raft_server_->append_entries({new_log});
|
return raft_server_->append_entries({new_log});
|
||||||
}
|
}
|
||||||
|
|
||||||
auto RaftState::AppendUnregisterReplicationInstance(std::string const &instance_name) -> ptr<raft_result> {
|
auto RaftState::AppendUnregisterReplicationInstance(std::string_view instance_name) -> ptr<raft_result> {
|
||||||
auto new_log =
|
auto new_log = CoordinatorStateMachine::SerializeUnregisterInstance(instance_name);
|
||||||
CoordinatorStateMachine::EncodeLogAction(instance_name, RaftLogAction::UNREGISTER_REPLICATION_INSTANCE);
|
|
||||||
return raft_server_->append_entries({new_log});
|
return raft_server_->append_entries({new_log});
|
||||||
}
|
}
|
||||||
|
|
||||||
auto RaftState::AppendSetInstanceAsMain(std::string const &instance_name) -> ptr<raft_result> {
|
auto RaftState::AppendSetInstanceAsMain(std::string_view instance_name) -> ptr<raft_result> {
|
||||||
auto new_log = CoordinatorStateMachine::EncodeLogAction(instance_name, RaftLogAction::SET_INSTANCE_AS_MAIN);
|
auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsMain(instance_name);
|
||||||
return raft_server_->append_entries({new_log});
|
return raft_server_->append_entries({new_log});
|
||||||
}
|
}
|
||||||
|
|
||||||
auto RaftState::AppendSetInstanceAsReplica(std::string const &instance_name) -> ptr<raft_result> {
|
auto RaftState::AppendSetInstanceAsReplica(std::string_view instance_name) -> ptr<raft_result> {
|
||||||
auto new_log = CoordinatorStateMachine::EncodeLogAction(instance_name, RaftLogAction::SET_INSTANCE_AS_REPLICA);
|
auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsReplica(instance_name);
|
||||||
return raft_server_->append_entries({new_log});
|
return raft_server_->append_entries({new_log});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,13 +26,7 @@ ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorC
|
|||||||
HealthCheckInstanceCallback fail_instance_cb)
|
HealthCheckInstanceCallback fail_instance_cb)
|
||||||
: client_(peer, std::move(config), std::move(succ_cb), std::move(fail_cb)),
|
: client_(peer, std::move(config), std::move(succ_cb), std::move(fail_cb)),
|
||||||
succ_cb_(succ_instance_cb),
|
succ_cb_(succ_instance_cb),
|
||||||
fail_cb_(fail_instance_cb) {
|
fail_cb_(fail_instance_cb) {}
|
||||||
if (!client_.DemoteToReplica()) {
|
|
||||||
throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica", client_.InstanceName());
|
|
||||||
}
|
|
||||||
|
|
||||||
client_.StartFrequentCheck();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ReplicationInstance::OnSuccessPing() -> void {
|
auto ReplicationInstance::OnSuccessPing() -> void {
|
||||||
last_response_time_ = std::chrono::system_clock::now();
|
last_response_time_ = std::chrono::system_clock::now();
|
||||||
@ -68,6 +62,8 @@ auto ReplicationInstance::PromoteToMain(utils::UUID new_uuid, ReplicationClients
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto ReplicationInstance::SendDemoteToReplicaRpc() -> bool { return client_.DemoteToReplica(); }
|
||||||
|
|
||||||
auto ReplicationInstance::DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb,
|
auto ReplicationInstance::DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb,
|
||||||
HealthCheckInstanceCallback replica_fail_cb) -> bool {
|
HealthCheckInstanceCallback replica_fail_cb) -> bool {
|
||||||
if (!client_.DemoteToReplica()) {
|
if (!client_.DemoteToReplica()) {
|
||||||
|
@ -473,7 +473,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
|
|||||||
.replication_client_info = repl_config,
|
.replication_client_info = repl_config,
|
||||||
.ssl = std::nullopt};
|
.ssl = std::nullopt};
|
||||||
|
|
||||||
auto status = coordinator_handler_.RegisterReplicationInstance(coordinator_client_config);
|
auto status = coordinator_handler_.RegisterReplicationInstance(std::move(coordinator_client_config));
|
||||||
switch (status) {
|
switch (status) {
|
||||||
using enum memgraph::coordination::RegisterInstanceCoordinatorStatus;
|
using enum memgraph::coordination::RegisterInstanceCoordinatorStatus;
|
||||||
case NAME_EXISTS:
|
case NAME_EXISTS:
|
||||||
|
@ -12,7 +12,32 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <map>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace memgraph::replication_coordination_glue {
|
namespace memgraph::replication_coordination_glue {
|
||||||
|
|
||||||
enum class ReplicationMode : std::uint8_t { SYNC, ASYNC };
|
enum class ReplicationMode : std::uint8_t { SYNC, ASYNC };
|
||||||
|
|
||||||
|
inline auto ReplicationModeToString(ReplicationMode mode) -> std::string {
|
||||||
|
switch (mode) {
|
||||||
|
case ReplicationMode::SYNC:
|
||||||
|
return "SYNC";
|
||||||
|
case ReplicationMode::ASYNC:
|
||||||
|
return "ASYNC";
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("Invalid replication mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
inline auto ReplicationModeFromString(std::string_view mode) -> ReplicationMode {
|
||||||
|
if (mode == "SYNC") {
|
||||||
|
return ReplicationMode::SYNC;
|
||||||
|
}
|
||||||
|
if (mode == "ASYNC") {
|
||||||
|
return ReplicationMode::ASYNC;
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("Invalid replication mode");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace memgraph::replication_coordination_glue
|
} // namespace memgraph::replication_coordination_glue
|
||||||
|
Loading…
Reference in New Issue
Block a user