// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source // License, and you may not use this file except in compliance with the Business Source License. // // As of the Change Date specified in that file, in accordance with // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. #ifdef MG_ENTERPRISE #include #include #include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_exceptions.hpp" #include "coordination/raft_state.hpp" #include "utils/counter.hpp" namespace memgraph::coordination { using nuraft::asio_service; using nuraft::cb_func; using nuraft::CbReturnCode; using nuraft::cmd_result; using nuraft::cs_new; using nuraft::ptr; using nuraft::raft_params; using nuraft::raft_server; using nuraft::srv_config; using raft_result = cmd_result>; RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id, uint32_t raft_port, std::string raft_address) : raft_endpoint_(raft_address, raft_port), coordinator_id_(coordinator_id), state_machine_(cs_new()), state_manager_(cs_new(coordinator_id_, raft_endpoint_.SocketAddress())), logger_(nullptr), become_leader_cb_(std::move(become_leader_cb)), become_follower_cb_(std::move(become_follower_cb)) {} auto RaftState::InitRaftServer() -> void { asio_service::options asio_opts; asio_opts.thread_pool_size_ = 1; raft_params params; params.heart_beat_interval_ = 100; params.election_timeout_lower_bound_ = 200; params.election_timeout_upper_bound_ = 400; params.reserved_log_items_ = 5; params.snapshot_distance_ = 5; params.client_req_timeout_ = 3000; params.return_method_ = raft_params::blocking; // If the leader doesn't receive any response from quorum nodes // in 200ms, it will step down. // This allows us to achieve strong consistency even if network partition // happens between the current leader and followers. // The value must be <= election_timeout_lower_bound_ so that cluster can never // have multiple leaders. params.leadership_expiry_ = 200; raft_server::init_options init_opts; init_opts.raft_callback_ = [this](cb_func::Type event_type, cb_func::Param *param) -> nuraft::CbReturnCode { if (event_type == cb_func::BecomeLeader) { spdlog::info("Node {} became leader", param->leaderId); become_leader_cb_(); } else if (event_type == cb_func::BecomeFollower) { spdlog::info("Node {} became follower", param->myId); become_follower_cb_(); } return CbReturnCode::Ok; }; raft_launcher launcher; raft_server_ = launcher.init(state_machine_, state_manager_, logger_, raft_endpoint_.port, asio_opts, params, init_opts); if (!raft_server_) { throw RaftServerStartException("Failed to launch raft server on {}", raft_endpoint_.SocketAddress()); } auto maybe_stop = utils::ResettableCounter<20>(); do { if (raft_server_->is_initialized()) { return; } std::this_thread::sleep_for(std::chrono::milliseconds(250)); } while (!maybe_stop()); throw RaftServerStartException("Failed to initialize raft server on {}", raft_endpoint_.SocketAddress()); } auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState { uint32_t coordinator_id = FLAGS_coordinator_id; uint32_t raft_port = FLAGS_coordinator_port; auto raft_state = RaftState(std::move(become_leader_cb), std::move(become_follower_cb), coordinator_id, raft_port, "127.0.0.1"); raft_state.InitRaftServer(); return raft_state; } RaftState::~RaftState() { launcher_.shutdown(); } auto RaftState::InstanceName() const -> std::string { return fmt::format("coordinator_{}", std::to_string(coordinator_id_)); } auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } auto RaftState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { auto const endpoint = config.coordinator_server.SocketAddress(); srv_config const srv_config_to_add(static_cast(config.coordinator_server_id), endpoint); auto cmd_result = raft_server_->add_srv(srv_config_to_add); if (cmd_result->get_result_code() == nuraft::cmd_result_code::OK) { spdlog::info("Request to add server {} to the cluster accepted", endpoint); } else { throw RaftAddServerException("Failed to accept request to add server {} to the cluster with error code {}", endpoint, int(cmd_result->get_result_code())); } // Waiting for server to join constexpr int max_tries{10}; auto maybe_stop = utils::ResettableCounter(); constexpr int waiting_period{200}; bool added{false}; while (!maybe_stop()) { std::this_thread::sleep_for(std::chrono::milliseconds(waiting_period)); const auto server_config = raft_server_->get_srv_config(static_cast(config.coordinator_server_id)); if (server_config) { spdlog::trace("Server with id {} added to cluster", config.coordinator_server_id); added = true; break; } } if (!added) { throw RaftAddServerException("Failed to add server {} to the cluster in {}ms", endpoint, max_tries * waiting_period); } } auto RaftState::GetAllCoordinators() const -> std::vector> { std::vector> all_srv_configs; raft_server_->get_srv_config_all(all_srv_configs); return all_srv_configs; } auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); } auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); } auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool { auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for registering instance {}. Most likely the reason is that the instance is not " "the " "leader.", config.instance_name); return false; } spdlog::info("Request for registering instance {} accepted", config.instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to register instance {} with error code {}", config.instance_name, int(res->get_result_code())); return false; } return true; } auto RaftState::AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool { auto new_log = CoordinatorStateMachine::SerializeUnregisterInstance(instance_name); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for unregistering instance {}. Most likely the reason is that the instance is not " "the leader.", instance_name); return false; } spdlog::info("Request for unregistering instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to unregister instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; } auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool { auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsMain(instance_name); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for promoting instance {}. Most likely the reason is that the instance is not " "the leader.", instance_name); return false; } spdlog::info("Request for promoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; } auto RaftState::AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool { auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsReplica(instance_name); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for demoting instance {}. Most likely the reason is that the instance is not " "the leader.", instance_name); return false; } spdlog::info("Request for demoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; } auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { auto new_log = CoordinatorStateMachine::SerializeUpdateUUID(uuid); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for updating UUID. Most likely the reason is that the instance is not " "the leader."); return false; } spdlog::info("Request for updating UUID accepted"); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to update UUID with error code {}", int(res->get_result_code())); return false; } return true; } auto RaftState::AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool { auto new_log = CoordinatorStateMachine::SerializeAddCoordinatorInstance(config); auto const res = raft_server_->append_entries({new_log}); if (!res->get_accepted()) { spdlog::error( "Failed to accept request for adding coordinator instance {}. Most likely the reason is that the instance is " "not the leader.", config.coordinator_server_id); return false; } spdlog::info("Request for adding coordinator instance {} accepted", config.coordinator_server_id); if (res->get_result_code() != nuraft::cmd_result_code::OK) { spdlog::error("Failed to add coordinator instance {} with error code {}", config.coordinator_server_id, static_cast(res->get_result_code())); return false; } return true; } auto RaftState::MainExists() const -> bool { return state_machine_->MainExists(); } auto RaftState::IsMain(std::string_view instance_name) const -> bool { return state_machine_->IsMain(instance_name); } auto RaftState::IsReplica(std::string_view instance_name) const -> bool { return state_machine_->IsReplica(instance_name); } auto RaftState::GetReplicationInstances() const -> std::vector { return state_machine_->GetReplicationInstances(); } auto RaftState::GetCoordinatorInstances() const -> std::vector { return state_machine_->GetCoordinatorInstances(); } auto RaftState::GetUUID() const -> utils::UUID { return state_machine_->GetUUID(); } } // namespace memgraph::coordination #endif