From 8f9e044fcdf2b74e34806414d6aca8177802e885 Mon Sep 17 00:00:00 2001 From: Andi Skrgat Date: Fri, 9 Feb 2024 07:53:02 +0100 Subject: [PATCH] Add unreachable replica state --- src/query/frontend/ast/ast.hpp | 2 +- src/query/interpreter.cpp | 6 +++ .../replication_handler.hpp | 53 +++++++++---------- src/storage/v2/replication/enums.hpp | 4 +- .../v2/replication/replication_client.cpp | 6 +++ 5 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index 6fe6b8c9e..ff798fbd1 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -3034,7 +3034,7 @@ class ReplicationQuery : public memgraph::query::Query { enum class SyncMode { SYNC, ASYNC }; - enum class ReplicaState { READY, REPLICATING, RECOVERY, MAYBE_BEHIND }; + enum class ReplicaState { READY, REPLICATING, RECOVERY, MAYBE_BEHIND, UNREACHABLE }; ReplicationQuery() = default; diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index e9c3ec3f9..cd47e4525 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -437,6 +437,9 @@ class ReplQueryHandler { case storage::replication::ReplicaState::MAYBE_BEHIND: replica.state = ReplicationQuery::ReplicaState::MAYBE_BEHIND; break; + case storage::replication::ReplicaState::UNREACHABLE: + replica.state = ReplicationQuery::ReplicaState::UNREACHABLE; + break; } return replica; @@ -1082,6 +1085,9 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & case ReplicationQuery::ReplicaState::MAYBE_BEHIND: typed_replica.emplace_back("invalid"); break; + case ReplicationQuery::ReplicaState::UNREACHABLE: + typed_replica.emplace_back("unreachable"); + break; } typed_replicas.emplace_back(std::move(typed_replica)); diff --git a/src/replication_handler/include/replication_handler/replication_handler.hpp b/src/replication_handler/include/replication_handler/replication_handler.hpp index 663b30f54..c456d2880 100644 --- a/src/replication_handler/include/replication_handler/replication_handler.hpp +++ b/src/replication_handler/include/replication_handler/replication_handler.hpp @@ -133,39 +133,38 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { auto GetReplState() -> memgraph::replication::ReplicationState &; private: - template - auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config, bool send_swap_uuid) - -> memgraph::utils::BasicResult { + template + auto RegisterReplica_(const replication::ReplicationClientConfig &config, bool send_swap_uuid) + -> utils::BasicResult { MG_ASSERT(repl_state_.IsMain(), "Only main instance can register a replica!"); auto maybe_client = repl_state_.RegisterReplica(config); if (maybe_client.HasError()) { switch (maybe_client.GetError()) { - case memgraph::replication::RegisterReplicaError::NOT_MAIN: + case replication::RegisterReplicaError::NOT_MAIN: MG_ASSERT(false, "Only main instance can register a replica!"); return {}; - case memgraph::replication::RegisterReplicaError::NAME_EXISTS: - return memgraph::query::RegisterReplicaError::NAME_EXISTS; - case memgraph::replication::RegisterReplicaError::ENDPOINT_EXISTS: - return memgraph::query::RegisterReplicaError::ENDPOINT_EXISTS; - case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: - return memgraph::query::RegisterReplicaError::COULD_NOT_BE_PERSISTED; - case memgraph::replication::RegisterReplicaError::SUCCESS: + case replication::RegisterReplicaError::NAME_EXISTS: + return query::RegisterReplicaError::NAME_EXISTS; + case replication::RegisterReplicaError::ENDPOINT_EXISTS: + return query::RegisterReplicaError::ENDPOINT_EXISTS; + case replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: + return query::RegisterReplicaError::COULD_NOT_BE_PERSISTED; + case replication::RegisterReplicaError::SUCCESS: break; } } - if (!memgraph::dbms::allow_mt_repl && dbms_handler_.All().size() > 1) { + if (!dbms::allow_mt_repl && dbms_handler_.All().size() > 1) { spdlog::warn("Multi-tenant replication is currently not supported!"); } - const auto main_uuid = - std::get(dbms_handler_.ReplicationState().ReplicationData()).uuid_; - if (send_swap_uuid) { - if (!memgraph::replication_coordination_glue::SendSwapMainUUIDRpc(maybe_client.GetValue()->rpc_client_, - main_uuid)) { - return memgraph::query::RegisterReplicaError::ERROR_ACCEPTING_MAIN; - } + auto const main_uuid = + std::get(dbms_handler_.ReplicationState().ReplicationData()).uuid_; + + if (send_swap_uuid && + !replication_coordination_glue::SendSwapMainUUIDRpc(maybe_client.GetValue()->rpc_client_, main_uuid)) { + return query::RegisterReplicaError::ERROR_ACCEPTING_MAIN; } #ifdef MG_ENTERPRISE @@ -193,21 +192,21 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { [storage, &instance_client_ptr, db_acc = std::move(db_acc), main_uuid](auto &storage_clients) mutable { // NOLINT auto client = std::make_unique(*instance_client_ptr, main_uuid); - // All good, start replica client client->Start(storage, std::move(db_acc)); - // After start the storage <-> replica state should be READY or RECOVERING (if correctly started) - // MAYBE_BEHIND isn't a statement of the current state, this is the default value - // Failed to start due an error like branching of MAIN and REPLICA - const bool success = client->State() != storage::replication::ReplicaState::MAYBE_BEHIND; - if (HandleFailure || success) { + // After start the storage <-> replica state shouldn't be MAYBE_BEHIND. + // When part of coordinator cluster we allow replica to be UNREACHABLE. + auto state = client->State(); + bool const success = + (state != storage::replication::ReplicaState::MAYBE_BEHIND) || + (state == storage::replication::ReplicaState::UNREACHABLE && AllowReplicaToBeUnreachable); + if (success) { storage_clients.push_back(std::move(client)); } return success; }); }); - // NOTE Currently if any databases fails, we revert back - if (!HandleFailure && !all_clients_good) { + if (!all_clients_good) { spdlog::error("Failed to register all databases on the REPLICA \"{}\"", config.name); UnregisterReplica(config.name); return memgraph::query::RegisterReplicaError::CONNECTION_FAILED; diff --git a/src/storage/v2/replication/enums.hpp b/src/storage/v2/replication/enums.hpp index be16ca192..facc122c7 100644 --- a/src/storage/v2/replication/enums.hpp +++ b/src/storage/v2/replication/enums.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -14,6 +14,6 @@ namespace memgraph::storage::replication { -enum class ReplicaState : std::uint8_t { READY, REPLICATING, RECOVERY, MAYBE_BEHIND }; +enum class ReplicaState : std::uint8_t { READY, REPLICATING, RECOVERY, MAYBE_BEHIND, UNREACHABLE }; } // namespace memgraph::storage::replication diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 0c5ef8125..e0e0dfe8c 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -46,6 +46,9 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce std::string{storage->uuid()}); state = memgraph::replication::ReplicationClient::State::BEHIND; }); + + replica_state_.WithLock([](auto &state) { state = replication::ReplicaState::UNREACHABLE; }); + return; } #endif @@ -149,6 +152,9 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren auto locked_state = replica_state_.Lock(); switch (*locked_state) { using enum replication::ReplicaState; + case UNREACHABLE: + spdlog::debug("Replica {} is unreachable", client_.name_); + return; case RECOVERY: spdlog::debug("Replica {} is behind MAIN instance", client_.name_); return;