Refactor replication client/server (#1311)

This commit is contained in:
Gareth Andrew Lloyd 2023-09-29 11:21:42 +01:00 committed by GitHub
parent 61ac7e1b11
commit d71b6a5007
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 514 additions and 472 deletions

View File

@ -238,9 +238,10 @@ class ReplQueryHandler final : public query::ReplicationQueryHandler {
if (!port || *port < 0 || *port > std::numeric_limits<uint16_t>::max()) {
throw QueryRuntimeException("Port number invalid!");
}
if (!db_->SetReplicaRole(
io::network::Endpoint(storage::replication::kDefaultReplicationServerIp, static_cast<uint16_t>(*port)),
storage::replication::ReplicationServerConfig{})) {
if (!db_->SetReplicaRole(storage::replication::ReplicationServerConfig{
.ip_address = storage::replication::kDefaultReplicationServerIp,
.port = static_cast<uint16_t>(*port),
})) {
throw QueryRuntimeException("Couldn't set role to replica!");
}
}
@ -286,9 +287,14 @@ class ReplQueryHandler final : public query::ReplicationQueryHandler {
io::network::Endpoint::ParseSocketOrIpAddress(socket_address, storage::replication::kDefaultReplicationPort);
if (maybe_ip_and_port) {
auto [ip, port] = *maybe_ip_and_port;
auto ret = db_->RegisterReplica(name, {std::move(ip), port}, repl_mode,
auto ret = db_->RegisterReplica(
storage::replication::RegistrationMode::MUST_BE_INSTANTLY_VALID,
{.replica_check_frequency = replica_check_frequency, .ssl = std::nullopt});
storage::replication::ReplicationClientConfig{.name = name,
.mode = repl_mode,
.ip_address = ip,
.port = port,
.replica_check_frequency = replica_check_frequency,
.ssl = std::nullopt});
if (ret.HasError()) {
throw QueryRuntimeException(fmt::format("Couldn't register replica '{}'!", name));
}

View File

@ -375,13 +375,12 @@ class DiskStorage final : public Storage {
EdgeImportMode edge_import_status_{EdgeImportMode::INACTIVE};
std::unique_ptr<EdgeImportModeCache> edge_import_mode_cache_{nullptr};
auto CreateReplicationClient(std::string name, io::network::Endpoint endpoint, replication::ReplicationMode mode,
const replication::ReplicationClientConfig &config)
auto CreateReplicationClient(replication::ReplicationClientConfig const &config)
-> std::unique_ptr<ReplicationClient> override {
throw utils::BasicException("Disk storage mode does not support replication.");
}
auto CreateReplicationServer(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config)
auto CreateReplicationServer(const replication::ReplicationServerConfig &config)
-> std::unique_ptr<ReplicationServer> override {
throw utils::BasicException("Disk storage mode does not support replication.");
}

View File

@ -102,10 +102,9 @@ uint64_t ReplicateCurrentWal(CurrentWalHandler &stream, durability::WalFile cons
////// ReplicationClient //////
InMemoryReplicationClient::InMemoryReplicationClient(InMemoryStorage *storage, std::string name,
io::network::Endpoint endpoint, replication::ReplicationMode mode,
InMemoryReplicationClient::InMemoryReplicationClient(InMemoryStorage *storage,
const replication::ReplicationClientConfig &config)
: ReplicationClient{storage, std::move(name), std::move(endpoint), mode, config} {}
: ReplicationClient{storage, config} {}
void InMemoryReplicationClient::RecoverReplica(uint64_t replica_commit) {
spdlog::debug("Starting replica recover");

View File

@ -18,8 +18,7 @@ class InMemoryStorage;
class InMemoryReplicationClient : public ReplicationClient {
public:
InMemoryReplicationClient(InMemoryStorage *storage, std::string name, io::network::Endpoint endpoint,
replication::ReplicationMode mode, const replication::ReplicationClientConfig &config = {});
InMemoryReplicationClient(InMemoryStorage *storage, const replication::ReplicationClientConfig &config);
protected:
void RecoverReplica(uint64_t replica_commit) override;

View File

@ -32,9 +32,9 @@ std::pair<uint64_t, durability::WalDeltaData> ReadDelta(durability::BaseDecoder
};
} // namespace
InMemoryReplicationServer::InMemoryReplicationServer(InMemoryStorage *storage, memgraph::io::network::Endpoint endpoint,
InMemoryReplicationServer::InMemoryReplicationServer(InMemoryStorage *storage,
const replication::ReplicationServerConfig &config)
: ReplicationServer{std::move(endpoint), config}, storage_(storage) {
: ReplicationServer{config}, storage_(storage) {
rpc_server_.Register<replication::HeartbeatRpc>([this](auto *req_reader, auto *res_builder) {
spdlog::debug("Received HeartbeatRpc");
this->HeartbeatHandler(req_reader, res_builder);

View File

@ -20,8 +20,7 @@ class InMemoryStorage;
class InMemoryReplicationServer : public ReplicationServer {
public:
explicit InMemoryReplicationServer(InMemoryStorage *storage, io::network::Endpoint endpoint,
const replication::ReplicationServerConfig &config);
explicit InMemoryReplicationServer(InMemoryStorage *storage, const replication::ReplicationServerConfig &config);
private:
// RPC handlers

View File

@ -1832,16 +1832,14 @@ utils::FileRetainer::FileLockerAccessor::ret_type InMemoryStorage::UnlockPath()
return true;
}
auto InMemoryStorage::CreateReplicationClient(std::string name, io::network::Endpoint endpoint,
replication::ReplicationMode mode,
replication::ReplicationClientConfig const &config)
auto InMemoryStorage::CreateReplicationClient(replication::ReplicationClientConfig const &config)
-> std::unique_ptr<ReplicationClient> {
return std::make_unique<InMemoryReplicationClient>(this, std::move(name), std::move(endpoint), mode, config);
return std::make_unique<InMemoryReplicationClient>(this, config);
}
std::unique_ptr<ReplicationServer> InMemoryStorage::CreateReplicationServer(
io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config) {
return std::make_unique<InMemoryReplicationServer>(this, std::move(endpoint), config);
const replication::ReplicationServerConfig &config) {
return std::make_unique<InMemoryReplicationServer>(this, config);
}
} // namespace memgraph::storage

View File

@ -371,11 +371,10 @@ class InMemoryStorage final : public Storage {
Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode) override;
auto CreateReplicationClient(std::string name, io::network::Endpoint endpoint, replication::ReplicationMode mode,
replication::ReplicationClientConfig const &config)
auto CreateReplicationClient(replication::ReplicationClientConfig const &config)
-> std::unique_ptr<ReplicationClient> override;
auto CreateReplicationServer(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config)
auto CreateReplicationServer(const replication::ReplicationServerConfig &config)
-> std::unique_ptr<ReplicationServer> override;
private:

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -15,8 +15,15 @@
#include <optional>
#include <string>
#include "storage/v2/replication/enums.hpp"
namespace memgraph::storage::replication {
struct ReplicationClientConfig {
std::string name;
ReplicationMode mode;
std::string ip_address;
uint16_t port;
// The default delay between main checking/pinging replicas is 1s because
// that seems like a reasonable timeframe in which main should notice a
// replica is down.
@ -33,6 +40,8 @@ struct ReplicationClientConfig {
};
struct ReplicationServerConfig {
std::string ip_address;
uint16_t port;
struct SSL {
std::string key_file;
std::string cert_file;

View File

@ -23,8 +23,8 @@ using OOMExceptionEnabler = utils::MemoryTracker::OutOfMemoryExceptionEnabler;
namespace {
std::string RegisterReplicaErrorToString(ReplicationState::RegisterReplicaError error) {
using enum ReplicationState::RegisterReplicaError;
std::string RegisterReplicaErrorToString(RegisterReplicaError error) {
using enum RegisterReplicaError;
switch (error) {
case NAME_EXISTS:
return "NAME_EXISTS";
@ -147,45 +147,36 @@ bool storage::ReplicationState::FinalizeTransaction(uint64_t timestamp) {
return finalized_on_all_replicas;
}
utils::BasicResult<ReplicationState::RegisterReplicaError> ReplicationState::RegisterReplica(
std::string name, io::network::Endpoint endpoint, const replication::ReplicationMode replication_mode,
utils::BasicResult<RegisterReplicaError> ReplicationState::RegisterReplica(
const replication::RegistrationMode registration_mode, const replication::ReplicationClientConfig &config,
Storage *storage) {
MG_ASSERT(GetRole() == replication::ReplicationRole::MAIN, "Only main instance can register a replica!");
const bool name_exists = replication_clients_.WithLock([&](auto &clients) {
return std::any_of(clients.begin(), clients.end(), [&name](const auto &client) { return client->Name() == name; });
});
auto name_check = [&config](auto &clients) {
auto name_matches = [&name = config.name](const auto &client) { return client->Name() == name; };
return std::any_of(clients.begin(), clients.end(), name_matches);
};
if (name_exists) {
auto desired_endpoint = io::network::Endpoint{config.ip_address, config.port};
auto endpoint_check = [&](auto &clients) {
auto endpoint_matches = [&](const auto &client) { return client->Endpoint() == desired_endpoint; };
return std::any_of(clients.begin(), clients.end(), endpoint_matches);
};
auto task = [&](auto &clients) -> utils::BasicResult<RegisterReplicaError> {
if (name_check(clients)) {
return RegisterReplicaError::NAME_EXISTS;
}
const auto end_point_exists = replication_clients_.WithLock([&endpoint](auto &clients) {
return std::any_of(clients.begin(), clients.end(),
[&endpoint](const auto &client) { return client->Endpoint() == endpoint; });
});
if (end_point_exists) {
if (endpoint_check(clients)) {
return RegisterReplicaError::END_POINT_EXISTS;
}
if (ShouldStoreAndRestoreReplicationState()) {
auto data = replication::ReplicationStatusToJSON(
replication::ReplicationStatus{.name = name,
.ip_address = endpoint.address,
.port = endpoint.port,
.sync_mode = replication_mode,
.replica_check_frequency = config.replica_check_frequency,
.ssl = config.ssl,
.role = replication::ReplicationRole::REPLICA});
if (!durability_->Put(name, data.dump())) {
spdlog::error("Error when saving replica {} in settings.", name);
if (!TryPersistReplicaClient(config)) {
return RegisterReplicaError::COULD_NOT_BE_PERSISTED;
}
}
auto client = storage->CreateReplicationClient(std::move(name), std::move(endpoint), replication_mode, config);
auto client = storage->CreateReplicationClient(config);
client->Start();
if (client->State() == replication::ReplicaState::INVALID) {
@ -196,35 +187,35 @@ utils::BasicResult<ReplicationState::RegisterReplicaError> ReplicationState::Reg
spdlog::warn("Connection failed when registering replica {}. Replica will still be registered.", client->Name());
}
return replication_clients_.WithLock(
[&](auto &clients) -> utils::BasicResult<ReplicationState::RegisterReplicaError> {
// Another thread could have added a client with same name while
// we were connecting to this client.
if (std::any_of(clients.begin(), clients.end(),
[&](const auto &other_client) { return client->Name() == other_client->Name(); })) {
return RegisterReplicaError::NAME_EXISTS;
}
if (std::any_of(clients.begin(), clients.end(), [&client](const auto &other_client) {
return client->Endpoint() == other_client->Endpoint();
})) {
return RegisterReplicaError::END_POINT_EXISTS;
}
clients.push_back(std::move(client));
return {};
});
};
return replication_clients_.WithLock(task);
}
bool ReplicationState::SetReplicaRole(io::network::Endpoint endpoint,
const replication::ReplicationServerConfig &config, Storage *storage) {
bool ReplicationState::TryPersistReplicaClient(const replication::ReplicationClientConfig &config) {
if (!ShouldStoreAndRestoreReplicationState()) return true;
auto data = replication::ReplicationStatusToJSON(
replication::ReplicationStatus{.name = config.name,
.ip_address = config.ip_address,
.port = config.port,
.sync_mode = config.mode,
.replica_check_frequency = config.replica_check_frequency,
.ssl = config.ssl,
.role = replication::ReplicationRole::REPLICA});
if (durability_->Put(config.name, data.dump())) return true;
spdlog::error("Error when saving replica {} in settings.", config.name);
return false;
}
bool ReplicationState::SetReplicaRole(const replication::ReplicationServerConfig &config, Storage *storage) {
// We don't want to restart the server if we're already a REPLICA
if (GetRole() == replication::ReplicationRole::REPLICA) {
return false;
}
auto port = endpoint.port; // assigning because we will move the endpoint
replication_server_ = storage->CreateReplicationServer(std::move(endpoint), config);
replication_server_ = storage->CreateReplicationServer(config);
bool res = replication_server_->Start();
if (!res) {
spdlog::error("Unable to start the replication server.");
@ -235,8 +226,8 @@ bool ReplicationState::SetReplicaRole(io::network::Endpoint endpoint,
// Only thing that matters here is the role saved as REPLICA and the listening port
auto data = replication::ReplicationStatusToJSON(
replication::ReplicationStatus{.name = replication::kReservedReplicationRoleName,
.ip_address = "",
.port = port,
.ip_address = config.ip_address,
.port = config.port,
.sync_mode = replication::ReplicationMode::SYNC,
.replica_check_frequency = std::chrono::seconds(0),
.ssl = std::nullopt,
@ -318,8 +309,10 @@ void ReplicationState::RestoreReplicationRole(Storage *storage) {
}
if (GetRole() == replication::ReplicationRole::REPLICA) {
io::network::Endpoint endpoint(replication::kDefaultReplicationServerIp, port);
replication_server_ = storage->CreateReplicationServer(std::move(endpoint), {});
replication_server_ = storage->CreateReplicationServer(replication::ReplicationServerConfig{
.ip_address = replication::kDefaultReplicationServerIp,
.port = port,
});
bool res = replication_server_->Start();
if (!res) {
LOG_FATAL("Unable to start the replication server.");
@ -352,10 +345,12 @@ void ReplicationState::RestoreReplicas(Storage *storage) {
continue;
}
auto ret =
RegisterReplica(std::move(replica_status.name), {std::move(replica_status.ip_address), replica_status.port},
replica_status.sync_mode, replication::RegistrationMode::CAN_BE_INVALID,
{
auto ret = RegisterReplica(replication::RegistrationMode::CAN_BE_INVALID,
replication::ReplicationClientConfig{
.name = replica_status.name,
.mode = replica_status.sync_mode,
.ip_address = replica_status.ip_address,
.port = replica_status.port,
.replica_check_frequency = replica_status.replica_check_frequency,
.ssl = replica_status.ssl,
},

View File

@ -32,14 +32,9 @@ class Storage;
class ReplicationServer;
class ReplicationClient;
struct ReplicationState {
enum class RegisterReplicaError : uint8_t {
NAME_EXISTS,
END_POINT_EXISTS,
CONNECTION_FAILED,
COULD_NOT_BE_PERSISTED
};
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED };
struct ReplicationState {
// TODO: This mirrors the logic in InMemoryConstructor; make it independent
ReplicationState(bool restore, std::filesystem::path durability_dir);
@ -50,7 +45,7 @@ struct ReplicationState {
bool SetMainReplicationRole(Storage *storage); // Set the instance to MAIN
// TODO: ReplicationServer/Client uses Storage* for RPC callbacks
bool SetReplicaRole(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config,
bool SetReplicaRole(const replication::ReplicationServerConfig &config,
Storage *storage); // Sets the instance to REPLICA
// Generic restoration
void RestoreReplicationRole(Storage *storage);
@ -64,9 +59,7 @@ struct ReplicationState {
bool FinalizeTransaction(uint64_t timestamp);
// MAIN connecting to replicas
utils::BasicResult<RegisterReplicaError> RegisterReplica(std::string name, io::network::Endpoint endpoint,
const replication::ReplicationMode replication_mode,
const replication::RegistrationMode registration_mode,
utils::BasicResult<RegisterReplicaError> RegisterReplica(const replication::RegistrationMode registration_mode,
const replication::ReplicationClientConfig &config,
Storage *storage);
bool UnregisterReplica(std::string_view name);
@ -97,8 +90,8 @@ struct ReplicationState {
void AppendEpoch(std::string new_epoch);
private:
bool TryPersistReplicaClient(const replication::ReplicationClientConfig &config);
bool ShouldStoreAndRestoreReplicationState() const { return nullptr != durability_; }
void SetRole(replication::ReplicationRole role) { return replication_role_.store(role); }
// NOTE: Server is not in MAIN it is in REPLICA

View File

@ -30,14 +30,12 @@ static auto CreateClientContext(const replication::ReplicationClientConfig &conf
: communication::ClientContext{};
}
ReplicationClient::ReplicationClient(Storage *storage, std::string name, memgraph::io::network::Endpoint endpoint,
replication::ReplicationMode mode,
replication::ReplicationClientConfig const &config)
: name_{std::move(name)},
ReplicationClient::ReplicationClient(Storage *storage, replication::ReplicationClientConfig const &config)
: name_{config.name},
rpc_context_{CreateClientContext(config)},
rpc_client_{std::move(endpoint), &rpc_context_},
rpc_client_{io::network::Endpoint(config.ip_address, config.port), &rpc_context_},
replica_check_frequency_{config.replica_check_frequency},
mode_{mode},
mode_{config.mode},
storage_{storage} {}
ReplicationClient::~ReplicationClient() {

View File

@ -66,8 +66,7 @@ class ReplicationClient {
friend class ReplicaStream;
public:
ReplicationClient(Storage *storage, std::string name, memgraph::io::network::Endpoint endpoint,
replication::ReplicationMode mode, const replication::ReplicationClientConfig &config);
ReplicationClient(Storage *storage, replication::ReplicationClientConfig const &config);
ReplicationClient(ReplicationClient const &) = delete;
ReplicationClient &operator=(ReplicationClient const &) = delete;

View File

@ -30,9 +30,10 @@ auto CreateServerContext(const replication::ReplicationServerConfig &config) ->
constexpr auto kReplictionServerThreads = 1;
} // namespace
ReplicationServer::ReplicationServer(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config)
ReplicationServer::ReplicationServer(const replication::ReplicationServerConfig &config)
: rpc_server_context_{CreateServerContext(config)},
rpc_server_{std::move(endpoint), &rpc_server_context_, kReplictionServerThreads} {
rpc_server_{io::network::Endpoint{config.ip_address, config.port}, &rpc_server_context_,
kReplictionServerThreads} {
rpc_server_.Register<replication::FrequentHeartbeatRpc>([](auto *req_reader, auto *res_builder) {
spdlog::debug("Received FrequentHeartbeatRpc");
FrequentHeartbeatHandler(req_reader, res_builder);

View File

@ -20,7 +20,7 @@ namespace memgraph::storage {
class ReplicationServer {
public:
explicit ReplicationServer(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config);
explicit ReplicationServer(const replication::ReplicationServerConfig &config);
ReplicationServer(const ReplicationServer &) = delete;
ReplicationServer(ReplicationServer &&) = delete;
ReplicationServer &operator=(const ReplicationServer &) = delete;

View File

@ -315,29 +315,23 @@ class Storage {
virtual void EstablishNewEpoch() = 0;
virtual auto CreateReplicationClient(std::string name, io::network::Endpoint endpoint,
replication::ReplicationMode mode,
replication::ReplicationClientConfig const &config)
virtual auto CreateReplicationClient(replication::ReplicationClientConfig const &config)
-> std::unique_ptr<ReplicationClient> = 0;
virtual auto CreateReplicationServer(io::network::Endpoint endpoint,
replication::ReplicationServerConfig const &config)
virtual auto CreateReplicationServer(const replication::ReplicationServerConfig &config)
-> std::unique_ptr<ReplicationServer> = 0;
/// REPLICATION
bool SetReplicaRole(io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config) {
return replication_state_.SetReplicaRole(std::move(endpoint), config, this);
bool SetReplicaRole(const replication::ReplicationServerConfig &config) {
return replication_state_.SetReplicaRole(config, this);
}
bool SetMainReplicationRole() { return replication_state_.SetMainReplicationRole(this); }
/// @pre The instance should have a MAIN role
/// @pre Timeout can only be set for SYNC replication
auto RegisterReplica(std::string name, io::network::Endpoint endpoint,
const replication::ReplicationMode replication_mode,
const replication::RegistrationMode registration_mode,
auto RegisterReplica(const replication::RegistrationMode registration_mode,
const replication::ReplicationClientConfig &config) {
return replication_state_.RegisterReplica(std::move(name), std::move(endpoint), replication_mode, registration_mode,
config, this);
return replication_state_.RegisterReplica(registration_mode, config, this);
}
/// @pre The instance should have a MAIN role
bool UnregisterReplica(const std::string &name) { return replication_state_.UnregisterReplica(name); }

File diff suppressed because it is too large Load Diff