From 78a88737f88eec19b0ef2f7f3032c92cacaf9545 Mon Sep 17 00:00:00 2001 From: Andi Date: Mon, 29 Jan 2024 15:34:00 +0100 Subject: [PATCH] HA: Add automatic failover (#1646) Co-authored-by: antoniofilipovic --- .github/workflows/diff.yaml | 2 +- src/coordination/CMakeLists.txt | 8 +- src/coordination/coordinator_client.cpp | 96 ++++---- src/coordination/coordinator_data.cpp | 220 ++++++++++++++++++ src/coordination/coordinator_rpc.cpp | 38 +++ src/coordination/coordinator_state.cpp | 192 +++------------ .../coordination/coordinator_client.hpp | 52 +++-- .../coordinator_cluster_config.hpp | 22 ++ .../coordination/coordinator_config.hpp | 21 +- .../include/coordination/coordinator_data.hpp | 49 ++++ .../coordination/coordinator_instance.hpp | 77 ++++++ ...fo.hpp => coordinator_instance_status.hpp} | 15 +- .../include/coordination/coordinator_rpc.hpp | 38 +++ .../coordination/coordinator_state.hpp | 61 +---- .../include/coordination/failover_status.hpp | 21 ++ ...gister_main_replica_coordinator_status.hpp | 37 +++ src/dbms/coordinator_handler.cpp | 79 +------ src/dbms/coordinator_handler.hpp | 38 +-- src/dbms/coordinator_handlers.cpp | 93 ++++++-- src/dbms/coordinator_handlers.hpp | 1 + src/dbms/database.hpp | 2 +- src/dbms/inmemory/replication_handlers.cpp | 2 +- src/dbms/replication_handler.cpp | 24 +- src/dbms/replication_handler.hpp | 6 +- src/dbms/utils.hpp | 43 +++- src/mg_import_csv.cpp | 2 +- src/query/frontend/ast/ast.hpp | 14 +- .../frontend/ast/cypher_main_visitor.cpp | 73 ++---- .../frontend/ast/cypher_main_visitor.hpp | 15 +- .../opencypher/grammar/CypherLexer.g4 | 2 + .../opencypher/grammar/MemgraphCypher.g4 | 14 +- .../opencypher/grammar/MemgraphCypherLexer.g4 | 1 + .../frontend/stripped_lexer_constants.hpp | 5 +- src/query/interpreter.cpp | 214 ++++++----------- src/query/interpreter.hpp | 43 ++-- src/query/metadata.cpp | 2 - src/query/metadata.hpp | 1 - src/replication/CMakeLists.txt | 1 - .../include/replication/config.hpp | 2 +- src/replication/include/replication/state.hpp | 17 +- .../include/replication/status.hpp | 2 +- src/replication/state.cpp | 3 +- src/replication/status.cpp | 12 +- .../CMakeLists.txt | 1 + .../role.hpp | 4 +- src/storage/v2/disk/storage.cpp | 15 +- src/storage/v2/disk/storage.hpp | 9 +- src/storage/v2/inmemory/storage.cpp | 26 ++- src/storage/v2/inmemory/storage.hpp | 14 +- .../v2/replication/replication_client.cpp | 1 + src/storage/v2/storage.cpp | 6 +- src/storage/v2/storage.hpp | 20 +- src/utils/scheduler.hpp | 26 ++- src/utils/typeinfo.hpp | 2 + tests/benchmark/query/eval.cpp | 2 +- tests/benchmark/query/execution.cpp | 2 +- tests/benchmark/query/planner.cpp | 2 +- tests/benchmark/storage_v2_gc.cpp | 2 +- tests/benchmark/storage_v2_gc2.cpp | 2 +- tests/concurrent/storage_indices.cpp | 2 +- .../concurrent/storage_unique_constraints.cpp | 2 +- .../CMakeLists.txt | 4 +- ...ated_failover.py => automatic_failover.py} | 209 +++++++++-------- .../coordinator.py | 27 +-- .../manual_setting_replicas.py | 57 +++++ .../uninitialized_cluster.py | 26 --- .../workloads.yaml | 68 ++---- tests/manual/query_planner.cpp | 2 +- tests/property_based/random_graph.cpp | 2 +- tests/unit/auth_checker.cpp | 2 +- tests/unit/bfs_fine_grained.cpp | 2 +- tests/unit/bfs_single_node.cpp | 2 +- tests/unit/bolt_encoder.cpp | 2 +- tests/unit/clearing_old_disk_data.cpp | 2 +- tests/unit/cpp_api.cpp | 3 +- tests/unit/cypher_main_visitor.cpp | 71 ------ tests/unit/database_get_info.cpp | 4 +- tests/unit/plan_pretty_print.cpp | 2 +- tests/unit/query_cost_estimator.cpp | 2 +- tests/unit/query_dump.cpp | 4 +- tests/unit/query_expression_evaluator.cpp | 2 +- tests/unit/query_hint_provider.cpp | 2 +- .../unit/query_plan_accumulate_aggregate.cpp | 2 +- tests/unit/query_plan_bag_semantics.cpp | 2 +- .../query_plan_create_set_remove_delete.cpp | 2 +- tests/unit/query_plan_match_filter_return.cpp | 2 +- tests/unit/query_plan_operator_to_string.cpp | 2 +- .../unit/query_plan_read_write_typecheck.cpp | 2 +- ...query_plan_v2_create_set_remove_delete.cpp | 2 +- tests/unit/query_pretty_print.cpp | 2 +- tests/unit/query_procedure_mgp_type.cpp | 2 +- tests/unit/query_procedure_py_module.cpp | 2 +- tests/unit/query_procedures_mgp_graph.cpp | 2 +- tests/unit/query_semantic.cpp | 2 +- tests/unit/query_trigger.cpp | 2 +- tests/unit/query_variable_start_planner.cpp | 2 +- tests/unit/storage_rocks.cpp | 2 +- tests/unit/storage_v2.cpp | 2 +- tests/unit/storage_v2_constraints.cpp | 2 +- tests/unit/storage_v2_durability_inmemory.cpp | 2 +- tests/unit/storage_v2_edge_inmemory.cpp | 2 +- tests/unit/storage_v2_edge_ondisk.cpp | 2 +- tests/unit/storage_v2_gc.cpp | 2 +- tests/unit/storage_v2_get_info.cpp | 2 +- tests/unit/storage_v2_indices.cpp | 2 +- tests/unit/storage_v2_isolation_level.cpp | 2 +- tests/unit/storage_v2_replication.cpp | 4 +- tests/unit/storage_v2_show_storage_info.cpp | 2 +- tests/unit/storage_v2_storage_mode.cpp | 4 +- tests/unit/typed_value.cpp | 2 +- 110 files changed, 1262 insertions(+), 1091 deletions(-) create mode 100644 src/coordination/coordinator_data.cpp create mode 100644 src/coordination/include/coordination/coordinator_cluster_config.hpp create mode 100644 src/coordination/include/coordination/coordinator_data.hpp create mode 100644 src/coordination/include/coordination/coordinator_instance.hpp rename src/coordination/include/coordination/{coordinator_entity_info.hpp => coordinator_instance_status.hpp} (78%) create mode 100644 src/coordination/include/coordination/failover_status.hpp create mode 100644 src/coordination/include/coordination/register_main_replica_coordinator_status.hpp rename src/{replication/include/replication => replication_coordination_glue}/role.hpp (87%) rename tests/e2e/high_availability_experimental/{client_initiated_failover.py => automatic_failover.py} (55%) create mode 100644 tests/e2e/high_availability_experimental/manual_setting_replicas.py delete mode 100644 tests/e2e/high_availability_experimental/uninitialized_cluster.py diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index f6c7c1bef..4bdfc1bf2 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -96,7 +96,7 @@ jobs: - name: Python code analysis run: | - CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only) + CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only --diff-filter=d) for file in ${CHANGED_FILES}; do echo ${file} if [[ ${file} == *.py ]]; then diff --git a/src/coordination/CMakeLists.txt b/src/coordination/CMakeLists.txt index 20510e681..e8c4b3735 100644 --- a/src/coordination/CMakeLists.txt +++ b/src/coordination/CMakeLists.txt @@ -7,19 +7,23 @@ target_sources(mg-coordination include/coordination/coordinator_rpc.hpp include/coordination/coordinator_server.hpp include/coordination/coordinator_config.hpp - include/coordination/coordinator_entity_info.hpp include/coordination/coordinator_exceptions.hpp + include/coordination/coordinator_instance.hpp include/coordination/coordinator_slk.hpp + include/coordination/coordinator_data.hpp include/coordination/constants.hpp + include/coordination/failover_status.hpp + include/coordination/coordinator_cluster_config.hpp PRIVATE coordinator_client.cpp coordinator_state.cpp coordinator_rpc.cpp coordinator_server.cpp + coordinator_data.cpp ) target_include_directories(mg-coordination PUBLIC include) target_link_libraries(mg-coordination - PUBLIC mg::utils mg::rpc mg::slk mg::io mg::repl_coord_glue + PUBLIC mg::utils mg::rpc mg::slk mg::io mg::repl_coord_glue lib::rangev3 ) diff --git a/src/coordination/coordinator_client.cpp b/src/coordination/coordinator_client.cpp index e80368e7c..93ef3e3af 100644 --- a/src/coordination/coordinator_client.cpp +++ b/src/coordination/coordinator_client.cpp @@ -27,82 +27,78 @@ auto CreateClientContext(const memgraph::coordination::CoordinatorClientConfig & } } // namespace -CoordinatorClient::CoordinatorClient(const CoordinatorClientConfig &config) +CoordinatorClient::CoordinatorClient(CoordinatorData *coord_data, CoordinatorClientConfig config, + HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) : rpc_context_{CreateClientContext(config)}, rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), &rpc_context_}, - config_{config} {} + config_{std::move(config)}, + coord_data_{coord_data}, + succ_cb_{std::move(succ_cb)}, + fail_cb_{std::move(fail_cb)} {} -CoordinatorClient::~CoordinatorClient() { - auto exit_job = utils::OnScopeExit([&] { - StopFrequentCheck(); - thread_pool_.Shutdown(); - }); - const auto endpoint = rpc_client_.Endpoint(); - // Logging can throw - spdlog::trace("Closing replication client on {}:{}", endpoint.address, endpoint.port); -} +auto CoordinatorClient::InstanceName() const -> std::string { return config_.instance_name; } +auto CoordinatorClient::SocketAddress() const -> std::string { return rpc_client_.Endpoint().SocketAddress(); } void CoordinatorClient::StartFrequentCheck() { MG_ASSERT(config_.health_check_frequency_sec > std::chrono::seconds(0), "Health check frequency must be greater than 0"); - replica_checker_.Run( - "Coord checker", config_.health_check_frequency_sec, - [last_response_time = &last_response_time_, rpc_client = &rpc_client_] { + + instance_checker_.Run( + "Coord checker", config_.health_check_frequency_sec, [this, instance_name = config_.instance_name] { try { - { - auto stream{rpc_client->Stream()}; - stream.AwaitResponse(); - last_response_time->store(std::chrono::system_clock::now(), std::memory_order_acq_rel); - } + spdlog::trace("Sending frequent heartbeat to machine {} on {}", instance_name, + rpc_client_.Endpoint().SocketAddress()); + auto stream{rpc_client_.Stream()}; + stream.AwaitResponse(); + succ_cb_(coord_data_, instance_name); } catch (const rpc::RpcFailedException &) { - // Nothing to do...wait for a reconnect + fail_cb_(coord_data_, instance_name); } }); } -void CoordinatorClient::StopFrequentCheck() { replica_checker_.Stop(); } +void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); } -bool CoordinatorClient::DoHealthCheck() const { - auto current_time = std::chrono::system_clock::now(); - auto duration = std::chrono::duration_cast(current_time - - last_response_time_.load(std::memory_order_acquire)); - return duration.count() <= alive_response_time_difference_sec_; -} +void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); } +void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); } -auto CoordinatorClient::InstanceName() const -> std::string_view { return config_.instance_name; } -auto CoordinatorClient::Endpoint() const -> io::network::Endpoint const & { return rpc_client_.Endpoint(); } -auto CoordinatorClient::Config() const -> CoordinatorClientConfig const & { return config_; } +auto CoordinatorClient::SetSuccCallback(HealthCheckCallback succ_cb) -> void { succ_cb_ = std::move(succ_cb); } +auto CoordinatorClient::SetFailCallback(HealthCheckCallback fail_cb) -> void { fail_cb_ = std::move(fail_cb); } -auto CoordinatorClient::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo const & { - MG_ASSERT(config_.replication_client_info.has_value(), "No ReplicationClientInfo for MAIN instance!"); - return *config_.replication_client_info; -} - -////// AF design choice -auto CoordinatorClient::ReplicationClientInfo() -> std::optional & { - MG_ASSERT(config_.replication_client_info.has_value(), "No ReplicationClientInfo for MAIN instance!"); +auto CoordinatorClient::ReplicationClientInfo() const -> const CoordinatorClientConfig::ReplicationClientInfo & { return config_.replication_client_info; } -void CoordinatorClient::UpdateTimeCheck(const std::chrono::system_clock::time_point &last_checked_time) { - last_response_time_.store(last_checked_time, std::memory_order_acq_rel); +auto CoordinatorClient::ResetReplicationClientInfo() -> void { + // TODO (antoniofilipovic) Sync with Andi on this one + // config_.replication_client_info.reset(); } -auto CoordinatorClient::GetLastTimeResponse() -> std::chrono::system_clock::time_point { return last_response_time_; } - auto CoordinatorClient::SendPromoteReplicaToMainRpc( std::vector replication_clients_info) const -> bool { try { - { - auto stream{rpc_client_.Stream(std::move(replication_clients_info))}; - if (!stream.AwaitResponse().success) { - spdlog::error("Failed to perform failover!"); - return false; - } - spdlog::info("Sent failover RPC from coordinator to new main!"); - return true; + auto stream{rpc_client_.Stream(std::move(replication_clients_info))}; + if (!stream.AwaitResponse().success) { + spdlog::error("Failed to receive successful RPC failover response!"); + return false; } + return true; + } catch (const rpc::RpcFailedException &) { + spdlog::error("RPC error occurred while sending failover RPC!"); + } + return false; +} + +auto CoordinatorClient::SendSetToReplicaRpc(CoordinatorClient::ReplClientInfo replication_client_info) const -> bool { + try { + auto stream{rpc_client_.Stream(std::move(replication_client_info))}; + if (!stream.AwaitResponse().success) { + spdlog::error("Failed to set main to replica!"); + return false; + } + spdlog::info("Sent request RPC from coordinator to instance to set it as replica!"); + return true; } catch (const rpc::RpcFailedException &) { spdlog::error("Failed to send failover RPC from coordinator to new main!"); } diff --git a/src/coordination/coordinator_data.cpp b/src/coordination/coordinator_data.cpp new file mode 100644 index 000000000..c236cf753 --- /dev/null +++ b/src/coordination/coordinator_data.cpp @@ -0,0 +1,220 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "coordination/coordinator_instance.hpp" +#include "coordination/register_main_replica_coordinator_status.hpp" +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_data.hpp" + +#include +#include + +namespace memgraph::coordination { + +CoordinatorData::CoordinatorData() { + auto find_instance = [](CoordinatorData *coord_data, std::string_view instance_name) -> CoordinatorInstance & { + auto instance = std::ranges::find_if( + coord_data->registered_instances_, + [instance_name](const CoordinatorInstance &instance) { return instance.InstanceName() == instance_name; }); + + MG_ASSERT(instance != coord_data->registered_instances_.end(), "Instance {} not found during callback!", + instance_name); + return *instance; + }; + + replica_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { + auto lock = std::lock_guard{coord_data->coord_data_lock_}; + spdlog::trace("Instance {} performing replica successful callback", instance_name); + auto &instance = find_instance(coord_data, instance_name); + MG_ASSERT(instance.IsReplica(), "Instance {} is not a replica!", instance_name); + instance.UpdateLastResponseTime(); + }; + + replica_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { + auto lock = std::lock_guard{coord_data->coord_data_lock_}; + spdlog::trace("Instance {} performing replica failure callback", instance_name); + auto &instance = find_instance(coord_data, instance_name); + MG_ASSERT(instance.IsReplica(), "Instance {} is not a replica!", instance_name); + instance.UpdateInstanceStatus(); + }; + + main_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { + auto lock = std::lock_guard{coord_data->coord_data_lock_}; + spdlog::trace("Instance {} performing main successful callback", instance_name); + auto &instance = find_instance(coord_data, instance_name); + MG_ASSERT(instance.IsMain(), "Instance {} is not a main!", instance_name); + instance.UpdateLastResponseTime(); + }; + + main_fail_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void { + auto lock = std::lock_guard{coord_data->coord_data_lock_}; + spdlog::trace("Instance {} performing main failure callback", instance_name); + auto &instance = find_instance(coord_data, instance_name); + MG_ASSERT(instance.IsMain(), "Instance {} is not a main!", instance_name); + if (bool main_alive = instance.UpdateInstanceStatus(); !main_alive) { + spdlog::info("Main instance {} is not alive, starting automatic failover", instance_name); + switch (auto failover_status = DoFailover(); failover_status) { + using enum DoFailoverStatus; + case ALL_REPLICAS_DOWN: + spdlog::warn("Failover aborted since all replicas are down!"); + break; + case MAIN_ALIVE: + spdlog::warn("Failover aborted since main is alive!"); + break; + case RPC_FAILED: + spdlog::warn("Failover aborted since promoting replica to main failed!"); + break; + case SUCCESS: + break; + } + } + }; +} + +auto CoordinatorData::DoFailover() -> DoFailoverStatus { + using ReplicationClientInfo = CoordinatorClientConfig::ReplicationClientInfo; + + auto replica_instances = registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica); + + auto chosen_replica_instance = std::ranges::find_if(replica_instances, &CoordinatorInstance::IsAlive); + if (chosen_replica_instance == replica_instances.end()) { + return DoFailoverStatus::ALL_REPLICAS_DOWN; + } + + chosen_replica_instance->PrepareForFailover(); + + std::vector repl_clients_info; + repl_clients_info.reserve(std::ranges::distance(replica_instances)); + + auto const not_chosen_replica_instance = [&chosen_replica_instance](const CoordinatorInstance &instance) { + return instance != *chosen_replica_instance; + }; + auto const not_main = [](const CoordinatorInstance &instance) { return !instance.IsMain(); }; + + // TODO (antoniofilipovic): Should we send also data on old MAIN??? + // TODO: (andi) Don't send replicas which aren't alive + for (const auto &unchosen_replica_instance : + replica_instances | ranges::views::filter(not_chosen_replica_instance) | ranges::views::filter(not_main)) { + repl_clients_info.emplace_back(unchosen_replica_instance.client_.ReplicationClientInfo()); + } + + if (!chosen_replica_instance->client_.SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) { + chosen_replica_instance->RestoreAfterFailedFailover(); + return DoFailoverStatus::RPC_FAILED; + } + + auto old_main = std::ranges::find_if(registered_instances_, &CoordinatorInstance::IsMain); + // TODO: (andi) For performing restoration we will have to improve this + old_main->client_.PauseFrequentCheck(); + + chosen_replica_instance->PostFailover(main_succ_cb_, main_fail_cb_); + + return DoFailoverStatus::SUCCESS; +} + +auto CoordinatorData::ShowInstances() const -> std::vector { + std::vector instances_status; + instances_status.reserve(registered_instances_.size()); + + auto const stringify_repl_role = [](const CoordinatorInstance &instance) -> std::string { + if (!instance.IsAlive()) return ""; + if (instance.IsMain()) return "main"; + return "replica"; + }; + + auto const instance_to_status = + [&stringify_repl_role](const CoordinatorInstance &instance) -> CoordinatorInstanceStatus { + return {.instance_name = instance.InstanceName(), + .socket_address = instance.SocketAddress(), + .replication_role = stringify_repl_role(instance), + .is_alive = instance.IsAlive()}; + }; + + { + auto lock = std::shared_lock{coord_data_lock_}; + std::ranges::transform(registered_instances_, std::back_inserter(instances_status), instance_to_status); + } + + return instances_status; +} + +auto CoordinatorData::SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus { + auto lock = std::lock_guard{coord_data_lock_}; + + // Find replica we already registered + auto registered_replica = std::find_if( + registered_instances_.begin(), registered_instances_.end(), + [instance_name](const CoordinatorInstance &instance) { return instance.InstanceName() == instance_name; }); + + // if replica not found... + if (registered_replica == registered_instances_.end()) { + spdlog::error("You didn't register instance with given name {}", instance_name); + return SetInstanceToMainCoordinatorStatus::NO_INSTANCE_WITH_NAME; + } + + registered_replica->client_.PauseFrequentCheck(); + + std::vector repl_clients_info; + repl_clients_info.reserve(registered_instances_.size() - 1); + std::ranges::for_each(registered_instances_, + [registered_replica, &repl_clients_info](const CoordinatorInstance &replica) { + if (replica != *registered_replica) { + repl_clients_info.emplace_back(replica.client_.ReplicationClientInfo()); + } + }); + + // PROMOTE REPLICA TO MAIN + // THIS SHOULD FAIL HERE IF IT IS DOWN + if (auto result = registered_replica->client_.SendPromoteReplicaToMainRpc(std::move(repl_clients_info)); !result) { + registered_replica->client_.ResumeFrequentCheck(); + return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN; + } + + registered_replica->client_.SetSuccCallback(main_succ_cb_); + registered_replica->client_.SetFailCallback(main_fail_cb_); + registered_replica->replication_role_ = replication_coordination_glue::ReplicationRole::MAIN; + registered_replica->client_.ResumeFrequentCheck(); + + return SetInstanceToMainCoordinatorStatus::SUCCESS; +} + +auto CoordinatorData::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus { + auto lock = std::lock_guard{coord_data_lock_}; + if (std::ranges::any_of(registered_instances_, [&config](const CoordinatorInstance &instance) { + return instance.InstanceName() == config.instance_name; + })) { + return RegisterInstanceCoordinatorStatus::NAME_EXISTS; + } + + if (std::ranges::any_of(registered_instances_, [&config](const CoordinatorInstance &instance) { + spdlog::trace("Comparing {} with {}", instance.SocketAddress(), config.SocketAddress()); + return instance.SocketAddress() == config.SocketAddress(); + })) { + return RegisterInstanceCoordinatorStatus::END_POINT_EXISTS; + } + + CoordinatorClientConfig::ReplicationClientInfo replication_client_info_copy = config.replication_client_info; + + // TODO (antoniofilipovic) create and then push back + auto *instance = ®istered_instances_.emplace_back(this, std::move(config), replica_succ_cb_, replica_fail_cb_, + replication_coordination_glue::ReplicationRole::REPLICA); + if (auto res = instance->client_.SendSetToReplicaRpc(replication_client_info_copy); !res) { + return RegisterInstanceCoordinatorStatus::RPC_FAILED; + } + + instance->client_.StartFrequentCheck(); + + return RegisterInstanceCoordinatorStatus::SUCCESS; +} + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/coordinator_rpc.cpp b/src/coordination/coordinator_rpc.cpp index da0132a38..e8a16f0e2 100644 --- a/src/coordination/coordinator_rpc.cpp +++ b/src/coordination/coordinator_rpc.cpp @@ -36,6 +36,22 @@ void PromoteReplicaToMainRes::Load(PromoteReplicaToMainRes *self, memgraph::slk: memgraph::slk::Load(self, reader); } +void SetMainToReplicaReq::Save(const SetMainToReplicaReq &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} + +void SetMainToReplicaReq::Load(SetMainToReplicaReq *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} + +void SetMainToReplicaRes::Save(const SetMainToReplicaRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} + +void SetMainToReplicaRes::Load(SetMainToReplicaRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} + } // namespace coordination constexpr utils::TypeInfo coordination::PromoteReplicaToMainReq::kType{utils::TypeId::COORD_FAILOVER_REQ, @@ -44,6 +60,12 @@ constexpr utils::TypeInfo coordination::PromoteReplicaToMainReq::kType{utils::Ty constexpr utils::TypeInfo coordination::PromoteReplicaToMainRes::kType{utils::TypeId::COORD_FAILOVER_RES, "CoordPromoteReplicaToMainRes", nullptr}; +constexpr utils::TypeInfo coordination::SetMainToReplicaReq::kType{utils::TypeId::COORD_SET_REPL_MAIN_REQ, + "CoordSetReplMainReq", nullptr}; + +constexpr utils::TypeInfo coordination::SetMainToReplicaRes::kType{utils::TypeId::COORD_SET_REPL_MAIN_RES, + "CoordSetReplMainRes", nullptr}; + namespace slk { void Save(const memgraph::coordination::PromoteReplicaToMainRes &self, memgraph::slk::Builder *builder) { @@ -62,6 +84,22 @@ void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk:: memgraph::slk::Load(&self->replication_clients_info, reader); } +void Save(const memgraph::coordination::SetMainToReplicaReq &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.replication_client_info, builder); +} + +void Load(memgraph::coordination::SetMainToReplicaReq *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->replication_client_info, reader); +} + +void Save(const memgraph::coordination::SetMainToReplicaRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.success, builder); +} + +void Load(memgraph::coordination::SetMainToReplicaRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->success, reader); +} + } // namespace slk } // namespace memgraph diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index 145ac727d..60ec458ac 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -9,207 +9,75 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "coordination/coordinator_state.hpp" -#include -#include "coordination/coordinator_client.hpp" - #ifdef MG_ENTERPRISE +#include "coordination/coordinator_state.hpp" + #include "coordination/coordinator_config.hpp" -#include "coordination/coordinator_entity_info.hpp" +#include "coordination/register_main_replica_coordinator_status.hpp" #include "flags/replication.hpp" #include "spdlog/spdlog.h" #include "utils/logging.hpp" #include "utils/variant_helpers.hpp" -#include -#include -#include +#include namespace memgraph::coordination { -namespace { - -bool CheckName(const std::list &replicas, const CoordinatorClientConfig &config) { - auto name_matches = [&instance_name = config.instance_name](auto const &replica) { - return replica.InstanceName() == instance_name; - }; - return std::any_of(replicas.begin(), replicas.end(), name_matches); -}; - -} // namespace - CoordinatorState::CoordinatorState() { MG_ASSERT(!(FLAGS_coordinator && FLAGS_coordinator_server_port), "Instance cannot be a coordinator and have registered coordinator server."); + spdlog::info("Executing coordinator constructor"); if (FLAGS_coordinator_server_port) { + spdlog::info("Coordinator server port set"); auto const config = CoordinatorServerConfig{ .ip_address = kDefaultReplicationServerIp, .port = static_cast(FLAGS_coordinator_server_port), }; + spdlog::info("Executing coordinator constructor main replica"); data_ = CoordinatorMainReplicaData{.coordinator_server_ = std::make_unique(config)}; } } -auto CoordinatorState::RegisterReplica(const CoordinatorClientConfig &config) - -> utils::BasicResult { - const auto name_endpoint_status = - std::visit(memgraph::utils::Overloaded{[](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) { - return RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR; - }, - [&config](const CoordinatorData &coordinator_data) { - if (memgraph::coordination::CheckName( - coordinator_data.registered_replicas_, config)) { - return RegisterMainReplicaCoordinatorStatus::NAME_EXISTS; - } - return RegisterMainReplicaCoordinatorStatus::SUCCESS; - }}, - data_); +auto CoordinatorState::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus { + MG_ASSERT(std::holds_alternative(data_), + "Coordinator cannot register replica since variant holds wrong alternative"); - if (name_endpoint_status != RegisterMainReplicaCoordinatorStatus::SUCCESS) { - return name_endpoint_status; - } - - // Maybe no need to return client if you can start replica client here - return &std::get(data_).registered_replicas_.emplace_back(config); -} - -auto CoordinatorState::RegisterMain(const CoordinatorClientConfig &config) - -> utils::BasicResult { - const auto endpoint_status = std::visit( + return std::visit( memgraph::utils::Overloaded{ [](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) { - return RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR; + return RegisterInstanceCoordinatorStatus::NOT_COORDINATOR; }, - [](const CoordinatorData & /*coordinator_data*/) { return RegisterMainReplicaCoordinatorStatus::SUCCESS; }}, + [config](CoordinatorData &coordinator_data) { return coordinator_data.RegisterInstance(config); }}, data_); - - if (endpoint_status != RegisterMainReplicaCoordinatorStatus::SUCCESS) { - return endpoint_status; - } - - auto ®istered_main = std::get(data_).registered_main_; - registered_main = std::make_unique(config); - return registered_main.get(); } -auto CoordinatorState::ShowReplicas() const -> std::vector { +auto CoordinatorState::SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus { MG_ASSERT(std::holds_alternative(data_), - "Can't call show replicas on data_, as variant holds wrong alternative"); - std::vector result; - const auto ®istered_replicas = std::get(data_).registered_replicas_; - result.reserve(registered_replicas.size()); - std::ranges::transform(registered_replicas, std::back_inserter(result), [](const auto &replica) { - return CoordinatorEntityInfo{replica.InstanceName(), replica.Endpoint()}; - }); - return result; + "Coordinator cannot register replica since variant holds wrong alternative"); + + return std::visit( + memgraph::utils::Overloaded{[](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) { + return SetInstanceToMainCoordinatorStatus::NOT_COORDINATOR; + }, + [&instance_name](CoordinatorData &coordinator_data) { + return coordinator_data.SetInstanceToMain(instance_name); + }}, + data_); } -auto CoordinatorState::ShowMain() const -> std::optional { +auto CoordinatorState::ShowInstances() const -> std::vector { MG_ASSERT(std::holds_alternative(data_), - "Can't call show main on data_, as variant holds wrong alternative"); - const auto ®istered_main = std::get(data_).registered_main_; - if (registered_main) { - return CoordinatorEntityInfo{registered_main->InstanceName(), registered_main->Endpoint()}; - } - return std::nullopt; + "Can't call show instances on data_, as variant holds wrong alternative"); + return std::get(data_).ShowInstances(); } -auto CoordinatorState::PingReplicas() const -> std::unordered_map { - MG_ASSERT(std::holds_alternative(data_), - "Can't call ping replicas on data_, as variant holds wrong alternative"); - std::unordered_map result; - const auto ®istered_replicas = std::get(data_).registered_replicas_; - result.reserve(registered_replicas.size()); - for (const CoordinatorClient &replica_client : registered_replicas) { - result.emplace(replica_client.InstanceName(), replica_client.DoHealthCheck()); - } - - return result; -} - -auto CoordinatorState::PingMain() const -> std::optional { - MG_ASSERT(std::holds_alternative(data_), - "Can't call show main on data_, as variant holds wrong alternative"); - const auto ®istered_main = std::get(data_).registered_main_; - if (registered_main) { - return CoordinatorEntityHealthInfo{registered_main->InstanceName(), registered_main->DoHealthCheck()}; - } - return std::nullopt; -} - -auto CoordinatorState::DoFailover() -> DoFailoverStatus { - // 1. MAIN is already down, stop sending frequent checks - // 2. find new replica (coordinator) - // 3. make copy replica's client as potential new main client (coordinator) - // 4. send failover RPC to new main (coordinator and new main) - // 5. exchange old main to new main (coordinator) - // 6. remove replica which was promoted to main from all replicas -> this will shut down RPC frequent check client - // (coordinator) - // 7. for new main start frequent checks (coordinator) - +[[nodiscard]] auto CoordinatorState::DoFailover() -> DoFailoverStatus { MG_ASSERT(std::holds_alternative(data_), "Cannot do failover since variant holds wrong alternative"); - using ReplicationClientInfo = CoordinatorClientConfig::ReplicationClientInfo; - - // 1. - auto ¤t_main = std::get(data_).registered_main_; - - if (!current_main) { - return DoFailoverStatus::CLUSTER_UNINITIALIZED; - } - - if (current_main->DoHealthCheck()) { - return DoFailoverStatus::MAIN_ALIVE; - } - current_main->StopFrequentCheck(); - - // 2. - // Get all replicas and find new main - auto ®istered_replicas = std::get(data_).registered_replicas_; - - const auto chosen_replica = std::ranges::find_if( - registered_replicas, [](const CoordinatorClient &replica) { return replica.DoHealthCheck(); }); - if (chosen_replica == registered_replicas.end()) { - return DoFailoverStatus::ALL_REPLICAS_DOWN; - } - - std::vector repl_clients_info; - repl_clients_info.reserve(registered_replicas.size() - 1); - std::ranges::for_each(registered_replicas, [&chosen_replica, &repl_clients_info](const CoordinatorClient &replica) { - if (replica != *chosen_replica) { - repl_clients_info.emplace_back(replica.ReplicationClientInfo()); - } - }); - - // 3. - // Set on coordinator data of new main - // allocate resources for new main, clear replication info on this replica as main - // set last response time - auto potential_new_main = std::make_unique(chosen_replica->Config()); - potential_new_main->ReplicationClientInfo().reset(); - potential_new_main->UpdateTimeCheck(chosen_replica->GetLastTimeResponse()); - - // 4. - if (!chosen_replica->SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) { - spdlog::error("Sent RPC message, but exception was caught, aborting Failover"); - // TODO: new status and rollback all changes that were done... - MG_ASSERT(false, "RPC message failed"); - } - - // 5. - current_main = std::move(potential_new_main); - - // 6. remove old replica - // TODO: Stop pinging chosen_replica before failover. - // Check that it doesn't fail when you call StopFrequentCheck if it is already stopped - registered_replicas.erase(chosen_replica); - - // 7. - current_main->StartFrequentCheck(); - - return DoFailoverStatus::SUCCESS; + auto &coord_state = std::get(data_); + return coord_state.DoFailover(); } auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & { diff --git a/src/coordination/include/coordination/coordinator_client.hpp b/src/coordination/include/coordination/coordinator_client.hpp index baf8a380a..1bc361a57 100644 --- a/src/coordination/include/coordination/coordinator_client.hpp +++ b/src/coordination/include/coordination/coordinator_client.hpp @@ -16,53 +16,61 @@ #include "coordination/coordinator_config.hpp" #include "rpc/client.hpp" #include "utils/scheduler.hpp" -#include "utils/thread_pool.hpp" - -#include namespace memgraph::coordination { +class CoordinatorData; +using HealthCheckCallback = std::function; + class CoordinatorClient { public: - explicit CoordinatorClient(const CoordinatorClientConfig &config); + using ReplClientInfo = CoordinatorClientConfig::ReplicationClientInfo; + using ReplicationClientsInfo = std::vector; - ~CoordinatorClient(); + explicit CoordinatorClient(CoordinatorData *coord_data_, CoordinatorClientConfig config, HealthCheckCallback succ_cb, + HealthCheckCallback fail_cb); - CoordinatorClient(CoordinatorClient &other) = delete; - CoordinatorClient &operator=(CoordinatorClient const &other) = delete; + ~CoordinatorClient() = default; + + CoordinatorClient(CoordinatorClient &) = delete; + CoordinatorClient &operator=(CoordinatorClient const &) = delete; CoordinatorClient(CoordinatorClient &&) noexcept = delete; CoordinatorClient &operator=(CoordinatorClient &&) noexcept = delete; void StartFrequentCheck(); void StopFrequentCheck(); + void PauseFrequentCheck(); + void ResumeFrequentCheck(); - auto DoHealthCheck() const -> bool; - auto SendPromoteReplicaToMainRpc( - std::vector replication_clients_info) const -> bool; + auto InstanceName() const -> std::string; + auto SocketAddress() const -> std::string; - auto InstanceName() const -> std::string_view; - auto Endpoint() const -> io::network::Endpoint const &; - auto Config() const -> CoordinatorClientConfig const &; - auto ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo const &; - auto ReplicationClientInfo() -> std::optional &; - void UpdateTimeCheck(const std::chrono::system_clock::time_point &last_checked_time); - auto GetLastTimeResponse() -> std::chrono::system_clock::time_point; + auto SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool; + + auto ReplicationClientInfo() const -> const ReplClientInfo &; + auto ResetReplicationClientInfo() -> void; + + auto SendSetToReplicaRpc(ReplClientInfo replication_client_info) const -> bool; + + auto SetSuccCallback(HealthCheckCallback succ_cb) -> void; + auto SetFailCallback(HealthCheckCallback fail_cb) -> void; friend bool operator==(CoordinatorClient const &first, CoordinatorClient const &second) { return first.config_ == second.config_; } private: - utils::ThreadPool thread_pool_{1}; - utils::Scheduler replica_checker_; + utils::Scheduler instance_checker_; + // TODO: (andi) Pimpl? communication::ClientContext rpc_context_; mutable rpc::Client rpc_client_; - CoordinatorClientConfig config_; - std::atomic last_response_time_{}; - static constexpr int alive_response_time_difference_sec_{5}; + CoordinatorClientConfig config_; + CoordinatorData *coord_data_; + HealthCheckCallback succ_cb_; + HealthCheckCallback fail_cb_; }; } // namespace memgraph::coordination diff --git a/src/coordination/include/coordination/coordinator_cluster_config.hpp b/src/coordination/include/coordination/coordinator_cluster_config.hpp new file mode 100644 index 000000000..e1d91ff7d --- /dev/null +++ b/src/coordination/include/coordination/coordinator_cluster_config.hpp @@ -0,0 +1,22 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE +namespace memgraph::coordination { + +struct CoordinatorClusterConfig { + static constexpr int alive_response_time_difference_sec_{5}; +}; + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/include/coordination/coordinator_config.hpp b/src/coordination/include/coordination/coordinator_config.hpp index e3a723177..bbbed9dd7 100644 --- a/src/coordination/include/coordination/coordinator_config.hpp +++ b/src/coordination/include/coordination/coordinator_config.hpp @@ -25,16 +25,16 @@ namespace memgraph::coordination { inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0"; struct CoordinatorClientConfig { - const std::string instance_name; - const std::string ip_address; - const uint16_t port{}; + std::string instance_name; + std::string ip_address; + uint16_t port{}; + std::chrono::seconds health_check_frequency_sec{1}; - // Frequency with which coordinator pings main/replicas about it status - const std::chrono::seconds health_check_frequency_sec{1}; + auto SocketAddress() const -> std::string { return ip_address + ":" + std::to_string(port); } // Info which coordinator will send to new main when performing failover struct ReplicationClientInfo { - // Should be the same as CoordinatorClientConfig's instance_name + // Must be the same as CoordinatorClientConfig's instance_name std::string instance_name; replication_coordination_glue::ReplicationMode replication_mode{}; std::string replication_ip_address; @@ -43,16 +43,17 @@ struct CoordinatorClientConfig { friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default; }; - std::optional replication_client_info; + // Each instance has replication config in case it fails + ReplicationClientInfo replication_client_info; struct SSL { - const std::string key_file; - const std::string cert_file; + std::string key_file; + std::string cert_file; friend bool operator==(const SSL &, const SSL &) = default; }; - const std::optional ssl; + std::optional ssl; friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default; }; diff --git a/src/coordination/include/coordination/coordinator_data.hpp b/src/coordination/include/coordination/coordinator_data.hpp new file mode 100644 index 000000000..d14f5e1db --- /dev/null +++ b/src/coordination/include/coordination/coordinator_data.hpp @@ -0,0 +1,49 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_instance.hpp" +#include "coordination/coordinator_instance_status.hpp" +#include "coordination/coordinator_server.hpp" +#include "coordination/failover_status.hpp" +#include "coordination/register_main_replica_coordinator_status.hpp" +#include "utils/rw_lock.hpp" + +#include + +namespace memgraph::coordination { +class CoordinatorData { + public: + CoordinatorData(); + + [[nodiscard]] auto DoFailover() -> DoFailoverStatus; + + [[nodiscard]] auto RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus; + [[nodiscard]] auto SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus; + + auto ShowInstances() const -> std::vector; + + private: + mutable utils::RWLock coord_data_lock_{utils::RWLock::Priority::READ}; + HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_; + // Must be std::list because we rely on pointer stability + std::list registered_instances_; +}; + +struct CoordinatorMainReplicaData { + std::unique_ptr coordinator_server_; +}; + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/include/coordination/coordinator_instance.hpp b/src/coordination/include/coordination/coordinator_instance.hpp new file mode 100644 index 000000000..31a6d8204 --- /dev/null +++ b/src/coordination/include/coordination/coordinator_instance.hpp @@ -0,0 +1,77 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_client.hpp" +#include "coordination/coordinator_cluster_config.hpp" +#include "replication_coordination_glue/role.hpp" + +namespace memgraph::coordination { + +class CoordinatorData; + +class CoordinatorInstance { + public: + CoordinatorInstance(CoordinatorData *data, CoordinatorClientConfig config, HealthCheckCallback succ_cb, + HealthCheckCallback fail_cb, replication_coordination_glue::ReplicationRole replication_role) + : client_(data, std::move(config), std::move(succ_cb), std::move(fail_cb)), + replication_role_(replication_role), + is_alive_(true) {} + + CoordinatorInstance(CoordinatorInstance const &other) = delete; + CoordinatorInstance &operator=(CoordinatorInstance const &other) = delete; + CoordinatorInstance(CoordinatorInstance &&other) noexcept = delete; + CoordinatorInstance &operator=(CoordinatorInstance &&other) noexcept = delete; + ~CoordinatorInstance() = default; + + auto UpdateInstanceStatus() -> bool { + is_alive_ = std::chrono::duration_cast(std::chrono::system_clock::now() - last_response_time_) + .count() < CoordinatorClusterConfig::alive_response_time_difference_sec_; + return is_alive_; + } + auto UpdateLastResponseTime() -> void { last_response_time_ = std::chrono::system_clock::now(); } + + auto InstanceName() const -> std::string { return client_.InstanceName(); } + auto SocketAddress() const -> std::string { return client_.SocketAddress(); } + auto IsAlive() const -> bool { return is_alive_; } + + auto IsReplica() const -> bool { + return replication_role_ == replication_coordination_glue::ReplicationRole::REPLICA; + } + auto IsMain() const -> bool { return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN; } + + auto PrepareForFailover() -> void { client_.PauseFrequentCheck(); } + auto RestoreAfterFailedFailover() -> void { client_.ResumeFrequentCheck(); } + + auto PostFailover(HealthCheckCallback main_succ_cb, HealthCheckCallback main_fail_cb) -> void { + replication_role_ = replication_coordination_glue::ReplicationRole::MAIN; + client_.SetSuccCallback(std::move(main_succ_cb)); + client_.SetFailCallback(std::move(main_fail_cb)); + // Comment with Andi but we shouldn't delete this, what if this MAIN FAILS AGAIN + // client_.ResetReplicationClientInfo(); + client_.ResumeFrequentCheck(); + } + + CoordinatorClient client_; + replication_coordination_glue::ReplicationRole replication_role_; + std::chrono::system_clock::time_point last_response_time_{}; + bool is_alive_{false}; + + friend bool operator==(CoordinatorInstance const &first, CoordinatorInstance const &second) { + return first.client_ == second.client_ && first.replication_role_ == second.replication_role_; + } +}; + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/include/coordination/coordinator_entity_info.hpp b/src/coordination/include/coordination/coordinator_instance_status.hpp similarity index 78% rename from src/coordination/include/coordination/coordinator_entity_info.hpp rename to src/coordination/include/coordination/coordinator_instance_status.hpp index eb4321761..2a0a3a985 100644 --- a/src/coordination/include/coordination/coordinator_entity_info.hpp +++ b/src/coordination/include/coordination/coordinator_instance_status.hpp @@ -15,18 +15,15 @@ #include "io/network/endpoint.hpp" -#include +#include namespace memgraph::coordination { -struct CoordinatorEntityInfo { - std::string_view name; - const io::network::Endpoint &endpoint; -}; - -struct CoordinatorEntityHealthInfo { - std::string_view name; - bool alive; +struct CoordinatorInstanceStatus { + std::string instance_name; + std::string socket_address; + std::string replication_role; + bool is_alive; }; } // namespace memgraph::coordination diff --git a/src/coordination/include/coordination/coordinator_rpc.hpp b/src/coordination/include/coordination/coordinator_rpc.hpp index 64836a64f..99996ef52 100644 --- a/src/coordination/include/coordination/coordinator_rpc.hpp +++ b/src/coordination/include/coordination/coordinator_rpc.hpp @@ -48,6 +48,36 @@ struct PromoteReplicaToMainRes { using PromoteReplicaToMainRpc = rpc::RequestResponse; +struct SetMainToReplicaReq { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(SetMainToReplicaReq *self, memgraph::slk::Reader *reader); + static void Save(const SetMainToReplicaReq &self, memgraph::slk::Builder *builder); + + explicit SetMainToReplicaReq(CoordinatorClientConfig::ReplicationClientInfo replication_client_info) + : replication_client_info(std::move(replication_client_info)) {} + + SetMainToReplicaReq() = default; + + CoordinatorClientConfig::ReplicationClientInfo replication_client_info; +}; + +struct SetMainToReplicaRes { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(SetMainToReplicaRes *self, memgraph::slk::Reader *reader); + static void Save(const SetMainToReplicaRes &self, memgraph::slk::Builder *builder); + + explicit SetMainToReplicaRes(bool success) : success(success) {} + SetMainToReplicaRes() = default; + + bool success; +}; + +using SetMainToReplicaRpc = rpc::RequestResponse; + } // namespace memgraph::coordination // SLK serialization declarations @@ -61,6 +91,14 @@ void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph: void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader); +void Save(const memgraph::coordination::SetMainToReplicaRes &self, memgraph::slk::Builder *builder); + +void Load(memgraph::coordination::SetMainToReplicaRes *self, memgraph::slk::Reader *reader); + +void Save(const memgraph::coordination::SetMainToReplicaReq &self, memgraph::slk::Builder *builder); + +void Load(memgraph::coordination::SetMainToReplicaReq *self, memgraph::slk::Reader *reader); + } // namespace memgraph::slk #endif diff --git a/src/coordination/include/coordination/coordinator_state.hpp b/src/coordination/include/coordination/coordinator_state.hpp index e3cd26108..9cf2d2471 100644 --- a/src/coordination/include/coordination/coordinator_state.hpp +++ b/src/coordination/include/coordination/coordinator_state.hpp @@ -13,29 +13,16 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_client.hpp" -#include "coordination/coordinator_entity_info.hpp" +#include "coordination/coordinator_data.hpp" +#include "coordination/coordinator_instance_status.hpp" #include "coordination/coordinator_server.hpp" -#include "rpc/server.hpp" -#include "utils/result.hpp" -#include "utils/rw_spin_lock.hpp" -#include "utils/synchronized.hpp" +#include "coordination/failover_status.hpp" +#include "coordination/register_main_replica_coordinator_status.hpp" -#include #include namespace memgraph::coordination { -enum class RegisterMainReplicaCoordinatorStatus : uint8_t { - NAME_EXISTS, - END_POINT_EXISTS, - COULD_NOT_BE_PERSISTED, - NOT_COORDINATOR, - SUCCESS -}; - -enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, CLUSTER_UNINITIALIZED }; - class CoordinatorState { public: CoordinatorState(); @@ -44,49 +31,21 @@ class CoordinatorState { CoordinatorState(const CoordinatorState &) = delete; CoordinatorState &operator=(const CoordinatorState &) = delete; - CoordinatorState(CoordinatorState &&other) noexcept : data_(std::move(other.data_)) {} + CoordinatorState(CoordinatorState &&) noexcept = delete; + CoordinatorState &operator=(CoordinatorState &&) noexcept = delete; - CoordinatorState &operator=(CoordinatorState &&other) noexcept { - if (this == &other) { - return *this; - } - data_ = std::move(other.data_); - return *this; - } + [[nodiscard]] auto RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus; - auto RegisterReplica(const CoordinatorClientConfig &config) - -> utils::BasicResult; + [[nodiscard]] auto SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus; - auto RegisterMain(const CoordinatorClientConfig &config) - -> utils::BasicResult; - - auto ShowReplicas() const -> std::vector; - - auto PingReplicas() const -> std::unordered_map; - - auto ShowMain() const -> std::optional; - - auto PingMain() const -> std::optional; + auto ShowInstances() const -> std::vector; // The client code must check that the server exists before calling this method. auto GetCoordinatorServer() const -> CoordinatorServer &; - auto DoFailover() -> DoFailoverStatus; + [[nodiscard]] auto DoFailover() -> DoFailoverStatus; private: - // TODO: Data is not thread safe - - // Coordinator stores registered replicas and main - struct CoordinatorData { - std::list registered_replicas_; - std::unique_ptr registered_main_; - }; - - // Data which each main and replica stores - struct CoordinatorMainReplicaData { - std::unique_ptr coordinator_server_; - }; - std::variant data_; }; diff --git a/src/coordination/include/coordination/failover_status.hpp b/src/coordination/include/coordination/failover_status.hpp new file mode 100644 index 000000000..9cfa0ffe6 --- /dev/null +++ b/src/coordination/include/coordination/failover_status.hpp @@ -0,0 +1,21 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include + +namespace memgraph::coordination { +enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, RPC_FAILED }; +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp b/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp new file mode 100644 index 000000000..acb191bfd --- /dev/null +++ b/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp @@ -0,0 +1,37 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include + +namespace memgraph::coordination { + +enum class RegisterInstanceCoordinatorStatus : uint8_t { + NAME_EXISTS, + END_POINT_EXISTS, + COULD_NOT_BE_PERSISTED, + NOT_COORDINATOR, + RPC_FAILED, + SUCCESS +}; + +enum class SetInstanceToMainCoordinatorStatus : uint8_t { + NO_INSTANCE_WITH_NAME, + NOT_COORDINATOR, + SUCCESS, + COULD_NOT_PROMOTE_TO_MAIN, +}; + +} // namespace memgraph::coordination +#endif diff --git a/src/dbms/coordinator_handler.cpp b/src/dbms/coordinator_handler.cpp index 87a426237..1c062c074 100644 --- a/src/dbms/coordinator_handler.cpp +++ b/src/dbms/coordinator_handler.cpp @@ -9,6 +9,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#include "coordination/register_main_replica_coordinator_status.hpp" #ifdef MG_ENTERPRISE #include "dbms/coordinator_handler.hpp" @@ -19,81 +20,19 @@ namespace memgraph::dbms { CoordinatorHandler::CoordinatorHandler(DbmsHandler &dbms_handler) : dbms_handler_(dbms_handler) {} -auto CoordinatorHandler::RegisterReplicaOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config) - -> utils::BasicResult { - auto instance_client = dbms_handler_.CoordinatorState().RegisterReplica(config); - using repl_status = memgraph::coordination::RegisterMainReplicaCoordinatorStatus; - using dbms_status = memgraph::dbms::RegisterMainReplicaCoordinatorStatus; - if (instance_client.HasError()) { - switch (instance_client.GetError()) { - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR: - MG_ASSERT(false, "Only coordinator instance can register main and replica!"); - return {}; - case repl_status::NAME_EXISTS: - return dbms_status::NAME_EXISTS; - case repl_status::END_POINT_EXISTS: - return dbms_status::END_POINT_EXISTS; - case repl_status::COULD_NOT_BE_PERSISTED: - return dbms_status::COULD_NOT_BE_PERSISTED; - case repl_status::SUCCESS: - break; - } - } - - instance_client.GetValue()->StartFrequentCheck(); - return {}; +auto CoordinatorHandler::RegisterInstance(memgraph::coordination::CoordinatorClientConfig config) + -> coordination::RegisterInstanceCoordinatorStatus { + return dbms_handler_.CoordinatorState().RegisterInstance(config); } -auto CoordinatorHandler::RegisterMainOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config) - -> utils::BasicResult { - auto instance_client = dbms_handler_.CoordinatorState().RegisterMain(config); - if (instance_client.HasError()) switch (instance_client.GetError()) { - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR: - MG_ASSERT(false, "Only coordinator instance can register main and replica!"); - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NAME_EXISTS: - return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::NAME_EXISTS; - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::END_POINT_EXISTS: - return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::END_POINT_EXISTS; - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::COULD_NOT_BE_PERSISTED: - return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::COULD_NOT_BE_PERSISTED; - case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::SUCCESS: - break; - } - - instance_client.GetValue()->StartFrequentCheck(); - return {}; +auto CoordinatorHandler::SetInstanceToMain(std::string instance_name) + -> coordination::SetInstanceToMainCoordinatorStatus { + return dbms_handler_.CoordinatorState().SetInstanceToMain(std::move(instance_name)); } -auto CoordinatorHandler::ShowReplicasOnCoordinator() const -> std::vector { - return dbms_handler_.CoordinatorState().ShowReplicas(); +auto CoordinatorHandler::ShowInstances() const -> std::vector { + return dbms_handler_.CoordinatorState().ShowInstances(); } - -auto CoordinatorHandler::PingReplicasOnCoordinator() const -> std::unordered_map { - return dbms_handler_.CoordinatorState().PingReplicas(); -} - -auto CoordinatorHandler::ShowMainOnCoordinator() const -> std::optional { - return dbms_handler_.CoordinatorState().ShowMain(); -} - -auto CoordinatorHandler::PingMainOnCoordinator() const -> std::optional { - return dbms_handler_.CoordinatorState().PingMain(); -} - -auto CoordinatorHandler::DoFailover() const -> DoFailoverStatus { - auto status = dbms_handler_.CoordinatorState().DoFailover(); - switch (status) { - case memgraph::coordination::DoFailoverStatus::ALL_REPLICAS_DOWN: - return memgraph::dbms::DoFailoverStatus::ALL_REPLICAS_DOWN; - case memgraph::coordination::DoFailoverStatus::SUCCESS: - return memgraph::dbms::DoFailoverStatus::SUCCESS; - case memgraph::coordination::DoFailoverStatus::MAIN_ALIVE: - return memgraph::dbms::DoFailoverStatus::MAIN_ALIVE; - case memgraph::coordination::DoFailoverStatus::CLUSTER_UNINITIALIZED: - return memgraph::dbms::DoFailoverStatus::CLUSTER_UNINITIALIZED; - } -} - } // namespace memgraph::dbms #endif diff --git a/src/dbms/coordinator_handler.hpp b/src/dbms/coordinator_handler.hpp index 565ee8bf5..233532cbc 100644 --- a/src/dbms/coordinator_handler.hpp +++ b/src/dbms/coordinator_handler.hpp @@ -15,49 +15,29 @@ #include "utils/result.hpp" +#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_instance_status.hpp" +#include "coordination/failover_status.hpp" +#include "coordination/register_main_replica_coordinator_status.hpp" + #include #include #include -namespace memgraph::coordination { -struct CoordinatorEntityInfo; -struct CoordinatorEntityHealthInfo; -struct CoordinatorClientConfig; -} // namespace memgraph::coordination - namespace memgraph::dbms { -enum class RegisterMainReplicaCoordinatorStatus : uint8_t { - NAME_EXISTS, - END_POINT_EXISTS, - COULD_NOT_BE_PERSISTED, - NOT_COORDINATOR, - SUCCESS -}; - -enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, CLUSTER_UNINITIALIZED }; - class DbmsHandler; class CoordinatorHandler { public: explicit CoordinatorHandler(DbmsHandler &dbms_handler); - auto RegisterReplicaOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config) - -> utils::BasicResult; + auto RegisterInstance(coordination::CoordinatorClientConfig config) + -> coordination::RegisterInstanceCoordinatorStatus; - auto RegisterMainOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config) - -> utils::BasicResult; + auto SetInstanceToMain(std::string instance_name) -> coordination::SetInstanceToMainCoordinatorStatus; - auto ShowReplicasOnCoordinator() const -> std::vector; - - auto ShowMainOnCoordinator() const -> std::optional; - - auto PingReplicasOnCoordinator() const -> std::unordered_map; - - auto PingMainOnCoordinator() const -> std::optional; - - auto DoFailover() const -> DoFailoverStatus; + auto ShowInstances() const -> std::vector; private: DbmsHandler &dbms_handler_; diff --git a/src/dbms/coordinator_handlers.cpp b/src/dbms/coordinator_handlers.cpp index 317edcd49..5c051408e 100644 --- a/src/dbms/coordinator_handlers.cpp +++ b/src/dbms/coordinator_handlers.cpp @@ -19,6 +19,8 @@ #include "dbms/dbms_handler.hpp" #include "dbms/replication_client.hpp" +#include "range/v3/view.hpp" + namespace memgraph::dbms { void CoordinatorHandlers::Register(DbmsHandler &dbms_handler) { @@ -26,9 +28,40 @@ void CoordinatorHandlers::Register(DbmsHandler &dbms_handler) { server.Register( [&dbms_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void { - spdlog::info("Received PromoteReplicaToMainRpc from coordinator server"); + spdlog::info("Received PromoteReplicaToMainRpc"); CoordinatorHandlers::PromoteReplicaToMainHandler(dbms_handler, req_reader, res_builder); }); + + server.Register( + [&dbms_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void { + spdlog::info("Received SetMainToReplicaRpc from coordinator server"); + CoordinatorHandlers::SetMainToReplicaHandler(dbms_handler, req_reader, res_builder); + }); +} + +void CoordinatorHandlers::SetMainToReplicaHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader, + slk::Builder *res_builder) { + auto &repl_state = dbms_handler.ReplicationState(); + + if (!repl_state.IsMain()) { + spdlog::error("Setting to replica must be performed on main."); + slk::Save(coordination::SetMainToReplicaRes{false}, res_builder); + return; + } + + coordination::SetMainToReplicaReq req; + slk::Load(&req, req_reader); + + replication::ReplicationServerConfig clients_config{.ip_address = req.replication_client_info.replication_ip_address, + .port = req.replication_client_info.replication_port}; + + if (bool success = memgraph::dbms::SetReplicationRoleReplica(dbms_handler, clients_config); !success) { + spdlog::error("Setting main to replica failed!"); + slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); + return; + } + + slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder); } void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader, @@ -41,6 +74,10 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, return; } + auto repl_server_config = std::get(repl_state.ReplicationData()).config; + + // This can fail because of disk. If it does, the cluster state could get inconsistent. + // We don't handle disk issues. if (bool success = memgraph::dbms::DoReplicaToMainPromotion(dbms_handler); !success) { spdlog::error("Promoting replica to main failed!"); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); @@ -50,34 +87,40 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, coordination::PromoteReplicaToMainReq req; slk::Load(&req, req_reader); - std::vector clients_config; - clients_config.reserve(req.replication_clients_info.size()); - std::ranges::transform(req.replication_clients_info, std::back_inserter(clients_config), - [](const auto &repl_info_config) { - return replication::ReplicationClientConfig{ - .name = repl_info_config.instance_name, - .mode = repl_info_config.replication_mode, - .ip_address = repl_info_config.replication_ip_address, - .port = repl_info_config.replication_port, - }; - }); + auto const converter = [](const auto &repl_info_config) { + return replication::ReplicationClientConfig{ + .name = repl_info_config.instance_name, + .mode = repl_info_config.replication_mode, + .ip_address = repl_info_config.replication_ip_address, + .port = repl_info_config.replication_port, + }; + }; - std::ranges::for_each(clients_config, [&dbms_handler, &repl_state, &res_builder](const auto &config) { + MG_ASSERT( + std::get(repl_state.ReplicationData()).registered_replicas_.empty(), + "No replicas should be registered after promoting replica to main and before registering replication clients!"); + + // registering replicas + for (auto const &config : req.replication_clients_info | ranges::views::transform(converter)) { auto instance_client = repl_state.RegisterReplica(config); if (instance_client.HasError()) { switch (instance_client.GetError()) { + // Can't happen, we are already replica case memgraph::replication::RegisterReplicaError::NOT_MAIN: spdlog::error("Failover must be performed to main!"); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); return; + // Can't happen, checked on the coordinator side case memgraph::replication::RegisterReplicaError::NAME_EXISTS: spdlog::error("Replica with the same name already exists!"); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); return; - case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS: + // Can't happen, checked on the coordinator side + case memgraph::replication::RegisterReplicaError::ENDPOINT_EXISTS: spdlog::error("Replica with the same endpoint already exists!"); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); return; + // We don't handle disk issues case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: spdlog::error("Registered replica could not be persisted!"); slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); @@ -86,18 +129,22 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, break; } } - - auto &instance_client_ref = *instance_client.GetValue(); - const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler, instance_client_ref); - - if (!all_clients_good) { - spdlog::error("Failed to register all databases to the REPLICA \"{}\"", config.name); - slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder); - return; + if (!allow_mt_repl && dbms_handler.All().size() > 1) { + spdlog::warn("Multi-tenant replication is currently not supported!"); } + auto &instance_client_ref = *instance_client.GetValue(); + + // Update system before enabling individual storage <-> replica clients + dbms_handler.SystemRestore(instance_client_ref); + + // TODO: (andi) Policy for register all databases + // Will be resolved after deciding about choosing new replica + const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler, instance_client_ref); + MG_ASSERT(all_clients_good, "Failed to register one or more databases to the REPLICA \"{}\".", config.name); + StartReplicaClient(dbms_handler, instance_client_ref); - }); + } slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder); } diff --git a/src/dbms/coordinator_handlers.hpp b/src/dbms/coordinator_handlers.hpp index d08fccb6a..ae4c59a0a 100644 --- a/src/dbms/coordinator_handlers.hpp +++ b/src/dbms/coordinator_handlers.hpp @@ -26,6 +26,7 @@ class CoordinatorHandlers { private: static void PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); + static void SetMainToReplicaHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder); }; } // namespace memgraph::dbms diff --git a/src/dbms/database.hpp b/src/dbms/database.hpp index 0d87165b1..2d7d3fe88 100644 --- a/src/dbms/database.hpp +++ b/src/dbms/database.hpp @@ -110,7 +110,7 @@ class Database { * @param force_directory Use the configured directory, do not try to decipher the multi-db version * @return DatabaseInfo */ - DatabaseInfo GetInfo(bool force_directory, replication::ReplicationRole replication_role) const { + DatabaseInfo GetInfo(bool force_directory, replication_coordination_glue::ReplicationRole replication_role) const { DatabaseInfo info; info.storage_info = storage_->GetInfo(force_directory, replication_role); info.triggers = trigger_store_.GetTriggerInfo().size(); diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index 24ddcfd02..cef2bf8c6 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -23,7 +23,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/inmemory/unique_constraints.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using memgraph::storage::Delta; using memgraph::storage::EdgeAccessor; using memgraph::storage::EdgeRef; diff --git a/src/dbms/replication_handler.cpp b/src/dbms/replication_handler.cpp index 5eb9f97b1..285752f76 100644 --- a/src/dbms/replication_handler.cpp +++ b/src/dbms/replication_handler.cpp @@ -38,8 +38,8 @@ std::string RegisterReplicaErrorToString(RegisterReplicaError error) { using enum RegisterReplicaError; case NAME_EXISTS: return "NAME_EXISTS"; - case END_POINT_EXISTS: - return "END_POINT_EXISTS"; + case ENDPOINT_EXISTS: + return "ENDPOINT_EXISTS"; case CONNECTION_FAILED: return "CONNECTION_FAILED"; case COULD_NOT_BE_PERSISTED: @@ -100,16 +100,16 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio -> memgraph::utils::BasicResult { MG_ASSERT(dbms_handler_.ReplicationState().IsMain(), "Only main instance can register a replica!"); - auto instance_client = dbms_handler_.ReplicationState().RegisterReplica(config); - if (instance_client.HasError()) { - switch (instance_client.GetError()) { + auto maybe_client = dbms_handler_.ReplicationState().RegisterReplica(config); + if (maybe_client.HasError()) { + switch (maybe_client.GetError()) { case memgraph::replication::RegisterReplicaError::NOT_MAIN: MG_ASSERT(false, "Only main instance can register a replica!"); return {}; case memgraph::replication::RegisterReplicaError::NAME_EXISTS: return memgraph::dbms::RegisterReplicaError::NAME_EXISTS; - case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS: - return memgraph::dbms::RegisterReplicaError::END_POINT_EXISTS; + case memgraph::replication::RegisterReplicaError::ENDPOINT_EXISTS: + return memgraph::dbms::RegisterReplicaError::ENDPOINT_EXISTS; case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: return memgraph::dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED; case memgraph::replication::RegisterReplicaError::SUCCESS: @@ -123,14 +123,14 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio #ifdef MG_ENTERPRISE // Update system before enabling individual storage <-> replica clients - dbms_handler_.SystemRestore(*instance_client.GetValue()); + dbms_handler_.SystemRestore(*maybe_client.GetValue()); #endif - const auto dbms_error = memgraph::dbms::HandleErrorOnReplicaClient(instance_client); + const auto dbms_error = memgraph::dbms::HandleRegisterReplicaStatus(maybe_client); if (dbms_error.has_value()) { return *dbms_error; } - auto &instance_client_ptr = instance_client.GetValue(); + auto &instance_client_ptr = maybe_client.GetValue(); const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler_, *instance_client_ptr); // NOTE Currently if any databases fails, we revert back @@ -141,7 +141,7 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio } // No client error, start instance level client - StartReplicaClient(dbms_handler_, *instance_client.GetValue()); + StartReplicaClient(dbms_handler_, *instance_client_ptr); return {}; } @@ -169,7 +169,7 @@ auto ReplicationHandler::UnregisterReplica(std::string_view name) -> UnregisterR dbms_handler_.ReplicationState().ReplicationData()); } -auto ReplicationHandler::GetRole() const -> memgraph::replication::ReplicationRole { +auto ReplicationHandler::GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole { return dbms_handler_.ReplicationState().GetRole(); } diff --git a/src/dbms/replication_handler.hpp b/src/dbms/replication_handler.hpp index 7743ab4ed..53c64e34b 100644 --- a/src/dbms/replication_handler.hpp +++ b/src/dbms/replication_handler.hpp @@ -11,8 +11,8 @@ #pragma once +#include "replication_coordination_glue/role.hpp" #include "dbms/database.hpp" -#include "replication/role.hpp" #include "utils/result.hpp" namespace memgraph::replication { @@ -25,7 +25,7 @@ namespace memgraph::dbms { class DbmsHandler; -enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED }; +enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED }; enum class UnregisterReplicaResult : uint8_t { NOT_MAIN, @@ -53,7 +53,7 @@ struct ReplicationHandler { auto UnregisterReplica(std::string_view name) -> UnregisterReplicaResult; // Helper pass-through (TODO: remove) - auto GetRole() const -> memgraph::replication::ReplicationRole; + auto GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole; bool IsMain() const; bool IsReplica() const; diff --git a/src/dbms/utils.hpp b/src/dbms/utils.hpp index 7c465487f..fd5db9cf1 100644 --- a/src/dbms/utils.hpp +++ b/src/dbms/utils.hpp @@ -18,6 +18,7 @@ namespace memgraph::dbms { inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) { + auto &repl_state = dbms_handler.ReplicationState(); // STEP 1) bring down all REPLICA servers dbms_handler.ForEach([](DatabaseAccess db_acc) { auto *storage = db_acc->storage(); @@ -27,7 +28,7 @@ inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) { // STEP 2) Change to MAIN // TODO: restore replication servers if false? - if (!dbms_handler.ReplicationState().SetReplicationRoleMain()) { + if (!repl_state.SetReplicationRoleMain()) { // TODO: Handle recovery on failure??? return false; } @@ -43,6 +44,38 @@ inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) { return true; }; +inline bool SetReplicationRoleReplica(dbms::DbmsHandler &dbms_handler, + const memgraph::replication::ReplicationServerConfig &config) { + if (dbms_handler.ReplicationState().IsReplica()) { + return false; + } + + // TODO StorageState needs to be synched. Could have a dangling reference if someone adds a database as we are + // deleting the replica. + // Remove database specific clients + dbms_handler.ForEach([&](DatabaseAccess db_acc) { + auto *storage = db_acc->storage(); + storage->repl_storage_state_.replication_clients_.WithLock([](auto &clients) { clients.clear(); }); + }); + // Remove instance level clients + std::get(dbms_handler.ReplicationState().ReplicationData()).registered_replicas_.clear(); + + // Creates the server + dbms_handler.ReplicationState().SetReplicationRoleReplica(config); + + // Start + const auto success = std::visit(utils::Overloaded{[](replication::RoleMainData const &) { + // ASSERT + return false; + }, + [&dbms_handler](replication::RoleReplicaData const &data) { + return StartRpcServer(dbms_handler, data); + }}, + dbms_handler.ReplicationState().ReplicationData()); + // TODO Handle error (restore to main?) + return success; +} + inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler, replication::ReplicationClient &instance_client) { if (!allow_mt_repl && dbms_handler.All().size() > 1) { @@ -69,7 +102,7 @@ inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler, // MAYBE_BEHIND isn't a statement of the current state, this is the default value // Failed to start due an error like branching of MAIN and REPLICA if (client->State() == storage::replication::ReplicaState::MAYBE_BEHIND) { - return false; + return false; // TODO: sometimes we need to still add to storage_clients } storage_clients.push_back(std::move(client)); return true; @@ -79,7 +112,7 @@ inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler, return all_clients_good; } -inline std::optional HandleErrorOnReplicaClient( +inline std::optional HandleRegisterReplicaStatus( utils::BasicResult &instance_client) { if (instance_client.HasError()) switch (instance_client.GetError()) { case replication::RegisterReplicaError::NOT_MAIN: @@ -87,8 +120,8 @@ inline std::optional HandleErrorOnReplicaClient( return {}; case replication::RegisterReplicaError::NAME_EXISTS: return dbms::RegisterReplicaError::NAME_EXISTS; - case replication::RegisterReplicaError::END_POINT_EXISTS: - return dbms::RegisterReplicaError::END_POINT_EXISTS; + case replication::RegisterReplicaError::ENDPOINT_EXISTS: + return dbms::RegisterReplicaError::ENDPOINT_EXISTS; case replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: return dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED; case replication::RegisterReplicaError::SUCCESS: diff --git a/src/mg_import_csv.cpp b/src/mg_import_csv.cpp index abf289fa3..cbfb905aa 100644 --- a/src/mg_import_csv.cpp +++ b/src/mg_import_csv.cpp @@ -32,7 +32,7 @@ #include "utils/timer.hpp" #include "version.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; bool ValidateControlCharacter(const char *flagname, const std::string &value) { if (value.empty()) { diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index 98ba4da49..0cbb790d0 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -3072,14 +3072,11 @@ class CoordinatorQuery : public memgraph::query::Query { const utils::TypeInfo &GetTypeInfo() const override { return kType; } enum class Action { - REGISTER_MAIN_COORDINATOR_SERVER, - REGISTER_REPLICA_COORDINATOR_SERVER, + REGISTER_INSTANCE, + SET_INSTANCE_TO_MAIN, SHOW_REPLICATION_CLUSTER, - DO_FAILOVER }; - enum class ReplicationRole { MAIN, REPLICA }; - enum class SyncMode { SYNC, ASYNC }; CoordinatorQuery() = default; @@ -3087,18 +3084,17 @@ class CoordinatorQuery : public memgraph::query::Query { DEFVISITABLE(QueryVisitor); memgraph::query::CoordinatorQuery::Action action_; - memgraph::query::CoordinatorQuery::ReplicationRole role_; std::string instance_name_; - memgraph::query::Expression *socket_address_{nullptr}; + memgraph::query::Expression *replication_socket_address_{nullptr}; memgraph::query::Expression *coordinator_socket_address_{nullptr}; memgraph::query::CoordinatorQuery::SyncMode sync_mode_; CoordinatorQuery *Clone(AstStorage *storage) const override { auto *object = storage->Create(); object->action_ = action_; - object->role_ = role_; object->instance_name_ = instance_name_; - object->socket_address_ = socket_address_ ? socket_address_->Clone(storage) : nullptr; + object->replication_socket_address_ = + replication_socket_address_ ? replication_socket_address_->Clone(storage) : nullptr; object->sync_mode_ = sync_mode_; object->coordinator_socket_address_ = coordinator_socket_address_ ? coordinator_socket_address_->Clone(storage) : nullptr; diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 0c4d499c4..5735326ac 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -375,10 +375,28 @@ antlrcpp::Any CypherMainVisitor::visitRegisterReplica(MemgraphCypher::RegisterRe } // License check is done in the interpreter. -antlrcpp::Any CypherMainVisitor::visitRegisterCoordinatorServer(MemgraphCypher::RegisterCoordinatorServerContext *ctx) { - MG_ASSERT(ctx->children.size() == 1, "RegisterCoordinatorServerQuery should have exactly one child!"); - auto *coordinator_query = std::any_cast(ctx->children[0]->accept(this)); - query_ = coordinator_query; +antlrcpp::Any CypherMainVisitor::visitRegisterInstanceOnCoordinator( + MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) { + auto *coordinator_query = storage_->Create(); + if (!ctx->replicationSocketAddress()->literal()->StringLiteral()) { + throw SemanticException("Replication socket address should be a string literal!"); + } + + if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) { + throw SemanticException("Coordinator socket address should be a string literal!"); + } + coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_INSTANCE; + coordinator_query->replication_socket_address_ = + std::any_cast(ctx->replicationSocketAddress()->accept(this)); + coordinator_query->coordinator_socket_address_ = + std::any_cast(ctx->coordinatorSocketAddress()->accept(this)); + coordinator_query->instance_name_ = std::any_cast(ctx->instanceName()->symbolicName()->accept(this)); + if (ctx->ASYNC()) { + coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC; + } else { + coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC; + } + return coordinator_query; } @@ -389,48 +407,6 @@ antlrcpp::Any CypherMainVisitor::visitShowReplicationCluster(MemgraphCypher::Sho return coordinator_query; } -// License check is done in the interpreter -antlrcpp::Any CypherMainVisitor::visitRegisterReplicaCoordinatorServer( - MemgraphCypher::RegisterReplicaCoordinatorServerContext *ctx) { - auto *coordinator_query = storage_->Create(); - if (!ctx->socketAddress()->literal()->StringLiteral()) { - throw SemanticException("Socket address should be a string literal!"); - } - - if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Coordinator socket address should be a string literal!"); - } - coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER; - coordinator_query->role_ = CoordinatorQuery::ReplicationRole::REPLICA; - coordinator_query->socket_address_ = std::any_cast(ctx->socketAddress()->accept(this)); - coordinator_query->coordinator_socket_address_ = - std::any_cast(ctx->coordinatorSocketAddress()->accept(this)); - coordinator_query->instance_name_ = std::any_cast(ctx->instanceName()->symbolicName()->accept(this)); - if (ctx->SYNC()) { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC; - } else if (ctx->ASYNC()) { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC; - } - - return coordinator_query; -} - -// License check is done in the interpreter -antlrcpp::Any CypherMainVisitor::visitRegisterMainCoordinatorServer( - MemgraphCypher::RegisterMainCoordinatorServerContext *ctx) { - if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Coordinator socket address should be a string literal!"); - } - auto *coordinator_query = storage_->Create(); - coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER; - coordinator_query->role_ = CoordinatorQuery::ReplicationRole::MAIN; - coordinator_query->coordinator_socket_address_ = - std::any_cast(ctx->coordinatorSocketAddress()->accept(this)); - coordinator_query->instance_name_ = std::any_cast(ctx->instanceName()->symbolicName()->accept(this)); - - return coordinator_query; -} - antlrcpp::Any CypherMainVisitor::visitDropReplica(MemgraphCypher::DropReplicaContext *ctx) { auto *replication_query = storage_->Create(); replication_query->action_ = ReplicationQuery::Action::DROP_REPLICA; @@ -445,9 +421,10 @@ antlrcpp::Any CypherMainVisitor::visitShowReplicas(MemgraphCypher::ShowReplicasC } // License check is done in the interpreter -antlrcpp::Any CypherMainVisitor::visitDoFailover(MemgraphCypher::DoFailoverContext * /*ctx*/) { +antlrcpp::Any CypherMainVisitor::visitSetInstanceToMain(MemgraphCypher::SetInstanceToMainContext *ctx) { auto *coordinator_query = storage_->Create(); - coordinator_query->action_ = CoordinatorQuery::Action::DO_FAILOVER; + coordinator_query->action_ = CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN; + coordinator_query->instance_name_ = std::any_cast(ctx->instanceName()->symbolicName()->accept(this)); query_ = coordinator_query; return coordinator_query; } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 174588bbb..e9da98f71 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -241,29 +241,18 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { /** * @return CoordinatorQuery* */ - antlrcpp::Any visitRegisterCoordinatorServer(MemgraphCypher::RegisterCoordinatorServerContext *ctx) override; + antlrcpp::Any visitRegisterInstanceOnCoordinator(MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) override; /** * @return CoordinatorQuery* */ - antlrcpp::Any visitRegisterMainCoordinatorServer(MemgraphCypher::RegisterMainCoordinatorServerContext *ctx) override; - - /** - * @return CoordinatorQuery* - */ - antlrcpp::Any visitRegisterReplicaCoordinatorServer( - MemgraphCypher::RegisterReplicaCoordinatorServerContext *ctx) override; + antlrcpp::Any visitSetInstanceToMain(MemgraphCypher::SetInstanceToMainContext *ctx) override; /** * @return CoordinatorQuery* */ antlrcpp::Any visitShowReplicationCluster(MemgraphCypher::ShowReplicationClusterContext *ctx) override; - /** - * @return CoordinatorQuery* - */ - antlrcpp::Any visitDoFailover(MemgraphCypher::DoFailoverContext *ctx) override; - /** * @return LockPathQuery* */ diff --git a/src/query/frontend/opencypher/grammar/CypherLexer.g4 b/src/query/frontend/opencypher/grammar/CypherLexer.g4 index 3428a2191..3e3c640d6 100644 --- a/src/query/frontend/opencypher/grammar/CypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/CypherLexer.g4 @@ -102,6 +102,7 @@ FILTER : F I L T E R ; IN : I N ; INDEX : I N D E X ; INFO : I N F O ; +INSTANCE : I N S T A N C E ; IS : I S ; KB : K B ; KEY : K E Y ; @@ -122,6 +123,7 @@ PROCEDURE : P R O C E D U R E ; PROFILE : P R O F I L E ; QUERY : Q U E R Y ; REDUCE : R E D U C E ; +REGISTER : R E G I S T E R; REMOVE : R E M O V E ; RETURN : R E T U R N ; SET : S E T ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 6be2aef86..e41184468 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -63,6 +63,7 @@ memgraphCypherKeyword : cypherKeyword | GRANT | HEADER | IDENTIFIED + | INSTANCE | NODE_LABELS | NULLIF | IMPORT @@ -186,9 +187,9 @@ replicationQuery : setReplicationRole | showReplicas ; -coordinatorQuery : registerCoordinatorServer +coordinatorQuery : registerInstanceOnCoordinator + | setInstanceToMain | showReplicationCluster - | doFailover ; triggerQuery : createTrigger @@ -252,8 +253,6 @@ transactionQueueQuery : showTransactions showTransactions : SHOW TRANSACTIONS ; -doFailover : DO FAILOVER ; - terminateTransactions : TERMINATE TRANSACTIONS transactionIdList; loadCsv : LOAD CSV FROM csvFile ( WITH | NO ) HEADER @@ -382,15 +381,14 @@ instanceName : symbolicName ; socketAddress : literal ; coordinatorSocketAddress : literal ; +replicationSocketAddress : literal ; registerReplica : REGISTER REPLICA instanceName ( SYNC | ASYNC ) TO socketAddress ; -registerReplicaCoordinatorServer: REGISTER REPLICA instanceName ( ASYNC | SYNC ) TO socketAddress WITH COORDINATOR SERVER ON coordinatorSocketAddress ; +registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ON coordinatorSocketAddress ( AS ASYNC ) ? WITH replicationSocketAddress ; -registerMainCoordinatorServer: REGISTER MAIN instanceName WITH COORDINATOR SERVER ON coordinatorSocketAddress ; - -registerCoordinatorServer : registerMainCoordinatorServer | registerReplicaCoordinatorServer ; +setInstanceToMain : SET INSTANCE instanceName TO MAIN ; dropReplica : DROP REPLICA instanceName ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 5ffd5aadd..b0febc4af 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -79,6 +79,7 @@ IMPORT : I M P O R T ; INACTIVE : I N A C T I V E ; IN_MEMORY_ANALYTICAL : I N UNDERSCORE M E M O R Y UNDERSCORE A N A L Y T I C A L ; IN_MEMORY_TRANSACTIONAL : I N UNDERSCORE M E M O R Y UNDERSCORE T R A N S A C T I O N A L ; +INSTANCE : I N S T A N C E ; ISOLATION : I S O L A T I O N ; KAFKA : K A F K A ; LABELS : L A B E L S ; diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index 21a14ae83..bd6ab7971 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -218,7 +218,8 @@ const trie::Trie kKeywords = {"union", "directory", "lock", "unlock", - "build"}; + "build", + "instance"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index baf4eb551..1576df7c4 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -110,7 +110,6 @@ #ifdef MG_ENTERPRISE #include "coordination/constants.hpp" -#include "coordination/coordinator_entity_info.hpp" #endif namespace memgraph::metrics { @@ -337,9 +336,9 @@ class ReplQueryHandler { /// @throw QueryRuntimeException if an error ocurred. ReplicationQuery::ReplicationRole ShowReplicationRole() const { switch (handler_.GetRole()) { - case memgraph::replication::ReplicationRole::MAIN: + case memgraph::replication_coordination_glue::ReplicationRole::MAIN: return ReplicationQuery::ReplicationRole::MAIN; - case memgraph::replication::ReplicationRole::REPLICA: + case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: return ReplicationQuery::ReplicationRole::REPLICA; } throw QueryRuntimeException("Couldn't show replication role - invalid role set!"); @@ -462,11 +461,9 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { #ifdef MG_ENTERPRISE /// @throw QueryRuntimeException if an error ocurred. - void RegisterReplicaCoordinatorServer(const std::string &replication_socket_address, - const std::string &coordinator_socket_address, - const std::chrono::seconds instance_check_frequency, - const std::string &instance_name, - CoordinatorQuery::SyncMode sync_mode) override { + void RegisterInstance(const std::string &coordinator_socket_address, const std::string &replication_socket_address, + const std::chrono::seconds instance_check_frequency, const std::string &instance_name, + CoordinatorQuery::SyncMode sync_mode) override { const auto maybe_replication_ip_port = io::network::Endpoint::ParseSocketOrAddress(replication_socket_address, std::nullopt); if (!maybe_replication_ip_port) { @@ -487,7 +484,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { .replication_ip_address = replication_ip, .replication_port = replication_port}; - const auto coordinator_client_config = + auto coordinator_client_config = coordination::CoordinatorClientConfig{.instance_name = instance_name, .ip_address = coordinator_server_ip, .port = coordinator_server_port, @@ -495,87 +492,50 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { .replication_client_info = repl_config, .ssl = std::nullopt}; - if (const auto ret = coordinator_handler_.RegisterReplicaOnCoordinator(coordinator_client_config); ret.HasError()) { - throw QueryRuntimeException("Couldn't register replica on coordinator!"); - } - } - - void RegisterMainCoordinatorServer(const std::string &coordinator_socket_address, - const std::chrono::seconds instance_check_frequency, - const std::string &instance_name) override { - const auto maybe_ip_and_port = - io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address, std::nullopt); - if (!maybe_ip_and_port) { - throw QueryRuntimeException("Invalid socket address!"); - } - const auto [ip, port] = *maybe_ip_and_port; - const auto config = coordination::CoordinatorClientConfig{.instance_name = instance_name, - .ip_address = ip, - .port = port, - .health_check_frequency_sec = instance_check_frequency, - .ssl = std::nullopt}; - - if (const auto ret = coordinator_handler_.RegisterMainOnCoordinator(config); ret.HasError()) { - throw QueryRuntimeException("Couldn't register main on coordinator!"); - } - } - - /// @throw QueryRuntimeException if an error ocurred. - void DoFailover() const override { - if (!FLAGS_coordinator) { - throw QueryRuntimeException("Only coordinator can register coordinator server!"); - } - - auto status = coordinator_handler_.DoFailover(); + auto status = coordinator_handler_.RegisterInstance(coordinator_client_config); switch (status) { - using enum memgraph::dbms::DoFailoverStatus; - case ALL_REPLICAS_DOWN: - throw QueryRuntimeException("Failover aborted since all replicas are down!"); - case MAIN_ALIVE: - throw QueryRuntimeException("Failover aborted since main is alive!"); - case CLUSTER_UNINITIALIZED: - throw QueryRuntimeException("Failover aborted since cluster is uninitialized!"); + using enum memgraph::coordination::RegisterInstanceCoordinatorStatus; + case NAME_EXISTS: + throw QueryRuntimeException("Couldn't register replica instance since instance with such name already exists!"); + case END_POINT_EXISTS: + throw QueryRuntimeException( + "Couldn't register replica instance since instance with such endpoint already exists!"); + case COULD_NOT_BE_PERSISTED: + throw QueryRuntimeException("Couldn't register replica instance since it couldn't be persisted!"); + case NOT_COORDINATOR: + throw QueryRuntimeException("Couldn't register replica instance since this instance is not a coordinator!"); + case RPC_FAILED: + throw QueryRuntimeException( + "Couldn't register replica because promotion on replica failed! Check logs on replica to find out more " + "info!"); case SUCCESS: break; } } - std::vector ShowMainReplicaStatus( - const std::vector &replicas, - const std::unordered_map &health_check_replicas, - const std::optional &main, - const std::optional &health_check_main) const override { - std::vector result{}; - result.reserve(replicas.size() + 1); // replicas + 1 main - std::ranges::transform( - replicas, std::back_inserter(result), [&health_check_replicas](const auto &replica) -> MainReplicaStatus { - return {replica.name, replica.endpoint.SocketAddress(), health_check_replicas.at(replica.name), false}; - }); - if (main) { - bool is_main_alive = health_check_main.has_value() ? health_check_main.value().alive : false; - result.emplace_back(main->name, main->endpoint.SocketAddress(), is_main_alive, true); + void SetInstanceToMain(const std::string &instance_name) override { + auto status = coordinator_handler_.SetInstanceToMain(instance_name); + switch (status) { + using enum memgraph::coordination::SetInstanceToMainCoordinatorStatus; + case NO_INSTANCE_WITH_NAME: + throw QueryRuntimeException("No instance with such name!"); + case NOT_COORDINATOR: + throw QueryRuntimeException("Couldn't set replica instance to main since this instance is not a coordinator!"); + case COULD_NOT_PROMOTE_TO_MAIN: + throw QueryRuntimeException( + "Couldn't set replica instance to main. Check coordinator and replica for more logs"); + case SUCCESS: + break; } - return result; } #endif #ifdef MG_ENTERPRISE - std::vector ShowReplicasOnCoordinator() const override { - return coordinator_handler_.ShowReplicasOnCoordinator(); + std::vector ShowInstances() const override { + return coordinator_handler_.ShowInstances(); } - std::unordered_map PingReplicasOnCoordinator() const override { - return coordinator_handler_.PingReplicasOnCoordinator(); - } - - std::optional ShowMainOnCoordinator() const override { - return coordinator_handler_.ShowMainOnCoordinator(); - } - - std::optional PingMainOnCoordinator() const override { - return coordinator_handler_.PingMainOnCoordinator(); - } #endif private: @@ -890,10 +850,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & case ReplicationQuery::Action::SET_REPLICATION_ROLE: { #ifdef MG_ENTERPRISE if (FLAGS_coordinator) { - if (repl_query->role_ == ReplicationQuery::ReplicationRole::REPLICA) { - throw QueryRuntimeException("Coordinator cannot become a replica!"); - } - throw QueryRuntimeException("Coordinator cannot become main!"); + throw QueryRuntimeException("Coordinator can't set roles!"); + } + if (FLAGS_coordinator_server_port) { + throw QueryRuntimeException("Can't set role manually on instance with coordinator server port."); } #endif @@ -938,6 +898,11 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & return callback; } case ReplicationQuery::Action::REGISTER_REPLICA: { +#ifdef MG_ENTERPRISE + if (FLAGS_coordinator_server_port) { + throw QueryRuntimeException("Can't register replica manually on instance with coordinator server port."); + } +#endif const auto &name = repl_query->instance_name_; const auto &sync_mode = repl_query->sync_mode_; auto socket_address = repl_query->socket_address_->Accept(evaluator); @@ -954,6 +919,11 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::DROP_REPLICA: { +#ifdef MG_ENTERPRISE + if (FLAGS_coordinator_server_port) { + throw QueryRuntimeException("Can't drop replica manually on instance with coordinator server port."); + } +#endif const auto &name = repl_query->instance_name_; callback.fn = [handler = ReplQueryHandler{dbms_handler}, name]() mutable { handler.DropReplica(name); @@ -1026,7 +996,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param std::vector *notifications) { Callback callback; switch (coordinator_query->action_) { - case CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER: { + case CoordinatorQuery::Action::REGISTER_INSTANCE: { if (!license::global_license_checker.IsEnterpriseValidFast()) { throw QueryException("Trying to use enterprise feature without a valid license."); } @@ -1045,11 +1015,14 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator); + auto replication_socket_address_tv = coordinator_query->replication_socket_address_->Accept(evaluator); callback.fn = [handler = CoordQueryHandler{dbms_handler}, coordinator_socket_address_tv, - main_check_frequency = config.replication_replica_check_frequency, - instance_name = coordinator_query->instance_name_]() mutable { - handler.RegisterMainCoordinatorServer(std::string(coordinator_socket_address_tv.ValueString()), - main_check_frequency, instance_name); + replication_socket_address_tv, main_check_frequency = config.replication_replica_check_frequency, + instance_name = coordinator_query->instance_name_, + sync_mode = coordinator_query->sync_mode_]() mutable { + handler.RegisterInstance(std::string(coordinator_socket_address_tv.ValueString()), + std::string(replication_socket_address_tv.ValueString()), main_check_frequency, + instance_name, sync_mode); return std::vector>(); }; @@ -1060,7 +1033,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; #endif } - case CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER: { + case CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN: { if (!license::global_license_checker.IsEnterpriseValidFast()) { throw QueryException("Trying to use enterprise feature without a valid license."); } @@ -1077,22 +1050,13 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param // the argument to Callback. EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters}; auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; - auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator); - auto replication_socket_address_tv = coordinator_query->socket_address_->Accept(evaluator); - callback.fn = [handler = CoordQueryHandler{dbms_handler}, coordinator_socket_address_tv, - replication_socket_address_tv, main_check_frequency = config.replication_replica_check_frequency, - instance_name = coordinator_query->instance_name_, - sync_mode = coordinator_query->sync_mode_]() mutable { - handler.RegisterReplicaCoordinatorServer(std::string(replication_socket_address_tv.ValueString()), - std::string(coordinator_socket_address_tv.ValueString()), - main_check_frequency, instance_name, sync_mode); + + callback.fn = [handler = CoordQueryHandler{dbms_handler}, + instance_name = coordinator_query->instance_name_]() mutable { + handler.SetInstanceToMain(instance_name); return std::vector>(); }; - notifications->emplace_back( - SeverityLevel::INFO, NotificationCode::REGISTER_COORDINATOR_SERVER, - fmt::format("Coordinator has registered coordinator server on {} for instance {}.", - coordinator_socket_address_tv.ValueString(), coordinator_query->instance_name_)); return callback; #endif } @@ -1112,57 +1076,19 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param callback.header = {"name", "socket_address", "alive", "role"}; callback.fn = [handler = CoordQueryHandler{dbms_handler}, replica_nfields = callback.header.size()]() mutable { - const auto main = handler.ShowMainOnCoordinator(); - const auto health_check_main = main ? handler.PingMainOnCoordinator() : std::nullopt; - const auto result_status = handler.ShowMainReplicaStatus( - handler.ShowReplicasOnCoordinator(), handler.PingReplicasOnCoordinator(), main, health_check_main); + auto const instances = handler.ShowInstances(); std::vector> result{}; - result.reserve(result_status.size()); + result.reserve(result.size()); - std::ranges::transform(result_status, std::back_inserter(result), + std::ranges::transform(instances, std::back_inserter(result), [](const auto &status) -> std::vector { - return {TypedValue{status.name}, TypedValue{status.socket_address}, - TypedValue{status.alive}, TypedValue{status.is_main ? "main" : "replica"}}; + return {TypedValue{status.instance_name}, TypedValue{status.socket_address}, + TypedValue{status.is_alive}, TypedValue{status.replication_role}}; }); + return result; }; return callback; -#endif - } - case CoordinatorQuery::Action::DO_FAILOVER: { - if (!license::global_license_checker.IsEnterpriseValidFast()) { - throw QueryException("Trying to use enterprise feature without a valid license."); - } -#ifdef MG_ENTERPRISE - if constexpr (!coordination::allow_ha) { - throw QueryRuntimeException( - "High availability is experimental feature. Please set MG_EXPERIMENTAL_HIGH_AVAILABILITY compile flag to " - "be able to use this functionality."); - } - if (!FLAGS_coordinator) { - throw QueryRuntimeException("Only coordinator can run DO FAILOVER!"); - } - - callback.header = {"name", "socket_address", "alive", "role"}; - callback.fn = [handler = CoordQueryHandler{dbms_handler}]() mutable { - handler.DoFailover(); - const auto main = handler.ShowMainOnCoordinator(); - const auto health_check_main = main ? handler.PingMainOnCoordinator() : std::nullopt; - const auto result_status = handler.ShowMainReplicaStatus( - handler.ShowReplicasOnCoordinator(), handler.PingReplicasOnCoordinator(), main, health_check_main); - std::vector> result{}; - result.reserve(result_status.size()); - - std::ranges::transform(result_status, std::back_inserter(result), - [](const auto &status) -> std::vector { - return {TypedValue{status.name}, TypedValue{status.socket_address}, - TypedValue{status.alive}, TypedValue{status.is_main ? "main" : "replica"}}; - }); - return result; - }; - notifications->emplace_back(SeverityLevel::INFO, NotificationCode::DO_FAILOVER, - "DO FAILOVER called on coordinator."); - return callback; #endif } return callback; @@ -3157,7 +3083,7 @@ PreparedQuery PrepareEdgeImportModeQuery(ParsedQuery parsed_query, CurrentDB &cu } PreparedQuery PrepareCreateSnapshotQuery(ParsedQuery parsed_query, bool in_explicit_transaction, CurrentDB ¤t_db, - replication::ReplicationRole replication_role) { + replication_coordination_glue::ReplicationRole replication_role) { if (in_explicit_transaction) { throw CreateSnapshotInMulticommandTxException(); } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 4cd7b3992..42100059c 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -15,7 +15,6 @@ #include -#include "coordination/coordinator_entity_info.hpp" #include "dbms/database.hpp" #include "dbms/dbms_handler.hpp" #include "memory/query_memory_control.hpp" @@ -53,6 +52,10 @@ #include "utils/timer.hpp" #include "utils/tsc.hpp" +#ifdef MG_ENTERPRISE +#include "coordination/coordinator_instance_status.hpp" +#endif + namespace memgraph::metrics { extern const Event FailedQuery; extern const Event FailedPrepare; @@ -93,47 +96,27 @@ class CoordinatorQueryHandler { #ifdef MG_ENTERPRISE struct MainReplicaStatus { std::string_view name; - std::string socket_address; + std::string_view socket_address; bool alive; bool is_main; - MainReplicaStatus(std::string_view name, std::string socket_address, bool alive, bool is_main) - : name{name}, socket_address{std::move(socket_address)}, alive{alive}, is_main{is_main} {} + MainReplicaStatus(std::string_view name, std::string_view socket_address, bool alive, bool is_main) + : name{name}, socket_address{socket_address}, alive{alive}, is_main{is_main} {} }; #endif #ifdef MG_ENTERPRISE /// @throw QueryRuntimeException if an error ocurred. - virtual void RegisterReplicaCoordinatorServer(const std::string &replication_socket_address, - const std::string &coordinator_socket_address, - const std::chrono::seconds instance_check_frequency, - const std::string &instance_name, - CoordinatorQuery::SyncMode sync_mode) = 0; - virtual void RegisterMainCoordinatorServer(const std::string &socket_address, - const std::chrono::seconds instance_check_frequency, - const std::string &instance_name) = 0; + virtual void RegisterInstance(const std::string &coordinator_socket_address, + const std::string &replication_socket_address, + const std::chrono::seconds instance_check_frequency, const std::string &instance_name, + CoordinatorQuery::SyncMode sync_mode) = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual std::vector ShowReplicasOnCoordinator() const = 0; + virtual void SetInstanceToMain(const std::string &instance_name) = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual std::optional ShowMainOnCoordinator() const = 0; - - /// @throw QueryRuntimeException if an error ocurred. - virtual std::unordered_map PingReplicasOnCoordinator() const = 0; - - /// @throw QueryRuntimeException if an error ocurred. - virtual std::optional PingMainOnCoordinator() const = 0; - - /// @throw QueryRuntimeException if an error ocurred. - virtual void DoFailover() const = 0; - - /// @throw QueryRuntimeException if an error ocurred. - virtual std::vector ShowMainReplicaStatus( - const std::vector &replicas, - const std::unordered_map &health_check_replicas, - const std::optional &main, - const std::optional &health_check_main) const = 0; + virtual std::vector ShowInstances() const = 0; #endif }; diff --git a/src/query/metadata.cpp b/src/query/metadata.cpp index 6995285f7..56ef57431 100644 --- a/src/query/metadata.cpp +++ b/src/query/metadata.cpp @@ -69,8 +69,6 @@ constexpr std::string_view GetCodeString(const NotificationCode code) { #ifdef MG_ENTERPRISE case NotificationCode::REGISTER_COORDINATOR_SERVER: return "RegisterCoordinatorServer"sv; - case NotificationCode::DO_FAILOVER: - return "DoFailover"sv; #endif case NotificationCode::REPLICA_PORT_WARNING: return "ReplicaPortWarning"sv; diff --git a/src/query/metadata.hpp b/src/query/metadata.hpp index 8e61280c0..8e82ad1e3 100644 --- a/src/query/metadata.hpp +++ b/src/query/metadata.hpp @@ -44,7 +44,6 @@ enum class NotificationCode : uint8_t { REGISTER_REPLICA, #ifdef MG_ENTERPRISE REGISTER_COORDINATOR_SERVER, - DO_FAILOVER, #endif SET_REPLICA, START_STREAM, diff --git a/src/replication/CMakeLists.txt b/src/replication/CMakeLists.txt index 97d4aba24..e19ba7061 100644 --- a/src/replication/CMakeLists.txt +++ b/src/replication/CMakeLists.txt @@ -5,7 +5,6 @@ target_sources(mg-replication include/replication/state.hpp include/replication/epoch.hpp include/replication/config.hpp - include/replication/role.hpp include/replication/status.hpp include/replication/messages.hpp include/replication/replication_client.hpp diff --git a/src/replication/include/replication/config.hpp b/src/replication/include/replication/config.hpp index 9ac9edbd0..822e09f72 100644 --- a/src/replication/include/replication/config.hpp +++ b/src/replication/include/replication/config.hpp @@ -40,7 +40,7 @@ struct ReplicationClientConfig { friend bool operator==(const SSL &, const SSL &) = default; }; - std::optional ssl; + std::optional ssl{}; friend bool operator==(ReplicationClientConfig const &, ReplicationClientConfig const &) = default; }; diff --git a/src/replication/include/replication/state.hpp b/src/replication/include/replication/state.hpp index 32be51ef6..a53885aff 100644 --- a/src/replication/include/replication/state.hpp +++ b/src/replication/include/replication/state.hpp @@ -15,8 +15,8 @@ #include "replication/config.hpp" #include "replication/epoch.hpp" #include "replication/replication_client.hpp" -#include "replication/role.hpp" #include "replication_coordination_glue/mode.hpp" +#include "replication_coordination_glue/role.hpp" #include "replication_server.hpp" #include "status.hpp" #include "utils/result.hpp" @@ -32,7 +32,8 @@ namespace memgraph::replication { enum class RolePersisted : uint8_t { UNKNOWN_OR_NO, YES }; -enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, COULD_NOT_BE_PERSISTED, NOT_MAIN, SUCCESS }; +// TODO: (andi) Rename Error to Status +enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, COULD_NOT_BE_PERSISTED, NOT_MAIN, SUCCESS }; struct RoleMainData { RoleMainData() = default; @@ -72,12 +73,13 @@ struct ReplicationState { using FetchReplicationResult_t = utils::BasicResult; auto FetchReplicationData() -> FetchReplicationResult_t; - auto GetRole() const -> ReplicationRole { - return std::holds_alternative(replication_data_) ? ReplicationRole::REPLICA - : ReplicationRole::MAIN; + auto GetRole() const -> replication_coordination_glue::ReplicationRole { + return std::holds_alternative(replication_data_) + ? replication_coordination_glue::ReplicationRole::REPLICA + : replication_coordination_glue::ReplicationRole::MAIN; } - bool IsMain() const { return GetRole() == ReplicationRole::MAIN; } - bool IsReplica() const { return GetRole() == ReplicationRole::REPLICA; } + bool IsMain() const { return GetRole() == replication_coordination_glue::ReplicationRole::MAIN; } + bool IsReplica() const { return GetRole() == replication_coordination_glue::ReplicationRole::REPLICA; } bool HasDurability() const { return nullptr != durability_; } @@ -92,7 +94,6 @@ struct ReplicationState { utils::BasicResult RegisterReplica(const ReplicationClientConfig &config); bool SetReplicationRoleMain(); - bool SetReplicationRoleReplica(const ReplicationServerConfig &config); private: diff --git a/src/replication/include/replication/status.hpp b/src/replication/include/replication/status.hpp index 880086fdb..4dfba6aaa 100644 --- a/src/replication/include/replication/status.hpp +++ b/src/replication/include/replication/status.hpp @@ -21,7 +21,7 @@ #include "replication/config.hpp" #include "replication/epoch.hpp" -#include "replication/role.hpp" +#include "replication_coordination_glue/role.hpp" namespace memgraph::replication::durability { diff --git a/src/replication/state.cpp b/src/replication/state.cpp index 47ab221c8..d04a3d245 100644 --- a/src/replication/state.cpp +++ b/src/replication/state.cpp @@ -260,7 +260,7 @@ utils::BasicResult ReplicationState:: return std::any_of(replicas.begin(), replicas.end(), endpoint_matches); }; if (endpoint_check(mainData.registered_replicas_)) { - return RegisterReplicaError::END_POINT_EXISTS; + return RegisterReplicaError::ENDPOINT_EXISTS; } // Durability @@ -279,4 +279,5 @@ utils::BasicResult ReplicationState:: } return res; } + } // namespace memgraph::replication diff --git a/src/replication/status.cpp b/src/replication/status.cpp index acdade47b..de1af9589 100644 --- a/src/replication/status.cpp +++ b/src/replication/status.cpp @@ -29,12 +29,14 @@ constexpr auto *kVersion = "durability_version"; void to_json(nlohmann::json &j, const ReplicationRoleEntry &p) { auto processMAIN = [&](MainRole const &main) { - j = nlohmann::json{{kVersion, p.version}, {kReplicationRole, ReplicationRole::MAIN}, {kEpoch, main.epoch.id()}}; + j = nlohmann::json{{kVersion, p.version}, + {kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN}, + {kEpoch, main.epoch.id()}}; }; auto processREPLICA = [&](ReplicaRole const &replica) { j = nlohmann::json{ {kVersion, p.version}, - {kReplicationRole, ReplicationRole::REPLICA}, + {kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA}, {kIpAddress, replica.config.ip_address}, {kPort, replica.config.port} // TODO: SSL @@ -47,17 +49,17 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) { // This value did not exist in V1, hence default DurabilityVersion::V1 DurabilityVersion version = j.value(kVersion, DurabilityVersion::V1); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - ReplicationRole role; + replication_coordination_glue::ReplicationRole role; j.at(kReplicationRole).get_to(role); switch (role) { - case ReplicationRole::MAIN: { + case replication_coordination_glue::ReplicationRole::MAIN: { auto json_epoch = j.value(kEpoch, std::string{}); auto epoch = ReplicationEpoch{}; if (!json_epoch.empty()) epoch.SetEpoch(json_epoch); p = ReplicationRoleEntry{.version = version, .role = MainRole{.epoch = std::move(epoch)}}; break; } - case ReplicationRole::REPLICA: { + case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: { std::string ip_address; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) uint16_t port; diff --git a/src/replication_coordination_glue/CMakeLists.txt b/src/replication_coordination_glue/CMakeLists.txt index 768f13eac..010a7b596 100644 --- a/src/replication_coordination_glue/CMakeLists.txt +++ b/src/replication_coordination_glue/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources(mg-repl_coord_glue PUBLIC messages.hpp mode.hpp + role.hpp PRIVATE messages.cpp diff --git a/src/replication/include/replication/role.hpp b/src/replication_coordination_glue/role.hpp similarity index 87% rename from src/replication/include/replication/role.hpp rename to src/replication_coordination_glue/role.hpp index bb720f8e0..d472cb454 100644 --- a/src/replication/include/replication/role.hpp +++ b/src/replication_coordination_glue/role.hpp @@ -12,8 +12,8 @@ #pragma once #include -namespace memgraph::replication { +namespace memgraph::replication_coordination_glue { // TODO: figure out a way of ensuring that usage of this type is never uninitialed/defaulted incorrectly to MAIN enum class ReplicationRole : uint8_t { MAIN, REPLICA }; -} // namespace memgraph::replication +} // namespace memgraph::replication_coordination_glue diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index c62125b70..f3c3aa0f4 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -288,7 +288,8 @@ DiskStorage::~DiskStorage() { DiskStorage::DiskAccessor::DiskAccessor(auto tag, DiskStorage *storage, IsolationLevel isolation_level, StorageMode storage_mode) - : Accessor(tag, storage, isolation_level, storage_mode, memgraph::replication::ReplicationRole::MAIN) { + : Accessor(tag, storage, isolation_level, storage_mode, + memgraph::replication_coordination_glue::ReplicationRole::MAIN) { rocksdb::WriteOptions write_options; auto txOptions = rocksdb::TransactionOptions{.set_snapshot = true}; transaction_.disk_transaction_ = storage->kvstore_->db_->BeginTransaction(write_options, txOptions); @@ -837,7 +838,8 @@ StorageInfo DiskStorage::GetBaseInfo(bool /* unused */) { return info; } -StorageInfo DiskStorage::GetInfo(bool force_dir, memgraph::replication::ReplicationRole replication_role) { +StorageInfo DiskStorage::GetInfo(bool force_dir, + memgraph::replication_coordination_glue::ReplicationRole replication_role) { StorageInfo info = GetBaseInfo(force_dir); { auto access = Access(replication_role); @@ -2007,7 +2009,7 @@ UniqueConstraints::DeletionStatus DiskStorage::DiskAccessor::DropUniqueConstrain } Transaction DiskStorage::CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole /*is_main*/) { + memgraph::replication_coordination_glue::ReplicationRole /*is_main*/) { /// We acquire the transaction engine lock here because we access (and /// modify) the transaction engine variables (`transaction_id` and /// `timestamp`) below. @@ -2032,8 +2034,9 @@ uint64_t DiskStorage::CommitTimestamp(const std::optional desired_comm return *desired_commit_timestamp; } -std::unique_ptr DiskStorage::Access(memgraph::replication::ReplicationRole /*replication_role*/, - std::optional override_isolation_level) { +std::unique_ptr DiskStorage::Access( + memgraph::replication_coordination_glue::ReplicationRole /*replication_role*/, + std::optional override_isolation_level) { auto isolation_level = override_isolation_level.value_or(isolation_level_); if (isolation_level != IsolationLevel::SNAPSHOT_ISOLATION) { throw utils::NotYetImplemented("Disk storage supports only SNAPSHOT isolation level."); @@ -2042,7 +2045,7 @@ std::unique_ptr DiskStorage::Access(memgraph::replication::Re new DiskAccessor{Storage::Accessor::shared_access, this, isolation_level, storage_mode_}); } std::unique_ptr DiskStorage::UniqueAccess( - memgraph::replication::ReplicationRole /*replication_role*/, + memgraph::replication_coordination_glue::ReplicationRole /*replication_role*/, std::optional override_isolation_level) { auto isolation_level = override_isolation_level.value_or(isolation_level_); if (isolation_level != IsolationLevel::SNAPSHOT_ISOLATION) { diff --git a/src/storage/v2/disk/storage.hpp b/src/storage/v2/disk/storage.hpp index 54cf81496..293e102b1 100644 --- a/src/storage/v2/disk/storage.hpp +++ b/src/storage/v2/disk/storage.hpp @@ -176,11 +176,11 @@ class DiskStorage final : public Storage { }; using Storage::Access; - std::unique_ptr Access(memgraph::replication::ReplicationRole replication_role, + std::unique_ptr Access(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::optional override_isolation_level) override; using Storage::UniqueAccess; - std::unique_ptr UniqueAccess(memgraph::replication::ReplicationRole replication_role, + std::unique_ptr UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::optional override_isolation_level) override; /// Flushing methods @@ -285,7 +285,7 @@ class DiskStorage final : public Storage { RocksDBStorage *GetRocksDBStorage() const { return kvstore_.get(); } Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role) override; + memgraph::replication_coordination_glue::ReplicationRole replication_role) override; void SetEdgeImportMode(EdgeImportMode edge_import_status); @@ -308,7 +308,8 @@ class DiskStorage final : public Storage { PropertyId property); StorageInfo GetBaseInfo(bool force_directory) override; - StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) override; + StorageInfo GetInfo(bool force_directory, + memgraph::replication_coordination_glue::ReplicationRole replication_role) override; void FreeMemory(std::unique_lock /*lock*/) override {} diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index 0a810ae3b..381a67d3f 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -178,7 +178,7 @@ InMemoryStorage::~InMemoryStorage() { InMemoryStorage::InMemoryAccessor::InMemoryAccessor(auto tag, InMemoryStorage *storage, IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role) + memgraph::replication_coordination_glue::ReplicationRole replication_role) : Accessor(tag, storage, isolation_level, storage_mode, replication_role), config_(storage->config_.salient.items) {} InMemoryStorage::InMemoryAccessor::InMemoryAccessor(InMemoryAccessor &&other) noexcept @@ -1280,8 +1280,9 @@ VerticesIterable InMemoryStorage::InMemoryAccessor::Vertices( mem_label_property_index->Vertices(label, property, lower_bound, upper_bound, view, storage_, &transaction_)); } -Transaction InMemoryStorage::CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role) { +Transaction InMemoryStorage::CreateTransaction( + IsolationLevel isolation_level, StorageMode storage_mode, + memgraph::replication_coordination_glue::ReplicationRole replication_role) { // We acquire the transaction engine lock here because we access (and // modify) the transaction engine variables (`transaction_id` and // `timestamp`) below. @@ -1296,7 +1297,7 @@ Transaction InMemoryStorage::CreateTransaction(IsolationLevel isolation_level, S // of any query on replica to the last commited transaction // which is timestamp_ as only commit of transaction with writes // can change the value of it. - if (replication_role == memgraph::replication::ReplicationRole::MAIN) { + if (replication_role == memgraph::replication_coordination_glue::ReplicationRole::MAIN) { start_timestamp = timestamp_++; } else { start_timestamp = timestamp_; @@ -1683,7 +1684,8 @@ StorageInfo InMemoryStorage::GetBaseInfo(bool force_directory) { return info; } -StorageInfo InMemoryStorage::GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) { +StorageInfo InMemoryStorage::GetInfo(bool force_directory, + memgraph::replication_coordination_glue::ReplicationRole replication_role) { StorageInfo info = GetBaseInfo(force_directory); { auto access = Access(replication_role); // TODO: override isolation level? @@ -2004,15 +2006,15 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera } utils::BasicResult InMemoryStorage::CreateSnapshot( - memgraph::replication::ReplicationRole replication_role) { - if (replication_role == memgraph::replication::ReplicationRole::REPLICA) { + memgraph::replication_coordination_glue::ReplicationRole replication_role) { + if (replication_role == memgraph::replication_coordination_glue::ReplicationRole::REPLICA) { return InMemoryStorage::CreateSnapshotError::DisabledForReplica; } auto const &epoch = repl_storage_state_.epoch_; auto snapshot_creator = [this, &epoch]() { utils::Timer timer; auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION, storage_mode_, - memgraph::replication::ReplicationRole::MAIN); + memgraph::replication_coordination_glue::ReplicationRole::MAIN); durability::CreateSnapshot(this, &transaction, recovery_.snapshot_directory_, recovery_.wal_directory_, &vertices_, &edges_, uuid_, epoch, repl_storage_state_.history, &file_retainer_); // Finalize snapshot transaction. @@ -2100,14 +2102,16 @@ utils::FileRetainer::FileLockerAccessor::ret_type InMemoryStorage::UnlockPath() return true; } -std::unique_ptr InMemoryStorage::Access(memgraph::replication::ReplicationRole replication_role, - std::optional override_isolation_level) { +std::unique_ptr InMemoryStorage::Access( + memgraph::replication_coordination_glue::ReplicationRole replication_role, + std::optional override_isolation_level) { return std::unique_ptr(new InMemoryAccessor{Storage::Accessor::shared_access, this, override_isolation_level.value_or(isolation_level_), storage_mode_, replication_role}); } std::unique_ptr InMemoryStorage::UniqueAccess( - memgraph::replication::ReplicationRole replication_role, std::optional override_isolation_level) { + memgraph::replication_coordination_glue::ReplicationRole replication_role, + std::optional override_isolation_level) { return std::unique_ptr(new InMemoryAccessor{Storage::Accessor::unique_access, this, override_isolation_level.value_or(isolation_level_), storage_mode_, replication_role}); diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index b5cb2e174..6f8806c26 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -73,7 +73,8 @@ class InMemoryStorage final : public Storage { friend class InMemoryStorage; explicit InMemoryAccessor(auto tag, InMemoryStorage *storage, IsolationLevel isolation_level, - StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role); + StorageMode storage_mode, + memgraph::replication_coordination_glue::ReplicationRole replication_role); public: InMemoryAccessor(const InMemoryAccessor &) = delete; @@ -322,10 +323,10 @@ class InMemoryStorage final : public Storage { }; using Storage::Access; - std::unique_ptr Access(memgraph::replication::ReplicationRole replication_role, + std::unique_ptr Access(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::optional override_isolation_level) override; using Storage::UniqueAccess; - std::unique_ptr UniqueAccess(memgraph::replication::ReplicationRole replication_role, + std::unique_ptr UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::optional override_isolation_level) override; void FreeMemory(std::unique_lock main_guard) override; @@ -335,12 +336,12 @@ class InMemoryStorage final : public Storage { utils::FileRetainer::FileLockerAccessor::ret_type UnlockPath(); utils::BasicResult CreateSnapshot( - memgraph::replication::ReplicationRole replication_role); + memgraph::replication_coordination_glue::ReplicationRole replication_role); void CreateSnapshotHandler(std::function()> cb); Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role) override; + memgraph::replication_coordination_glue::ReplicationRole replication_role) override; void SetStorageMode(StorageMode storage_mode); @@ -365,7 +366,8 @@ class InMemoryStorage final : public Storage { void FinalizeWalFile(); StorageInfo GetBaseInfo(bool force_directory) override; - StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) override; + StorageInfo GetInfo(bool force_directory, + memgraph::replication_coordination_glue::ReplicationRole replication_role) override; /// Return true in all cases excepted if any sync replicas have not sent confirmation. [[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp, diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 5b29e8b15..b68618e04 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -67,6 +67,7 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce "now hold unique data. Please resolve data conflicts and start the " "replication on a clean instance.", client_.name_, client_.name_, client_.name_); + // TODO: (andi) Talk about renaming MAYBE_BEHIND to branching // State not updated, hence in MAYBE_BEHIND state return; } diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 9045f20e9..536a504a0 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -49,7 +49,8 @@ Storage::Storage(Config config, StorageMode storage_mode) } Storage::Accessor::Accessor(SharedAccess /* tag */, Storage *storage, IsolationLevel isolation_level, - StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role) + StorageMode storage_mode, + memgraph::replication_coordination_glue::ReplicationRole replication_role) : storage_(storage), // The lock must be acquired before creating the transaction object to // prevent freshly created transactions from dangling in an active state @@ -61,7 +62,8 @@ Storage::Accessor::Accessor(SharedAccess /* tag */, Storage *storage, IsolationL creation_storage_mode_(storage_mode) {} Storage::Accessor::Accessor(UniqueAccess /* tag */, Storage *storage, IsolationLevel isolation_level, - StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role) + StorageMode storage_mode, + memgraph::replication_coordination_glue::ReplicationRole replication_role) : storage_(storage), // The lock must be acquired before creating the transaction object to // prevent freshly created transactions from dangling in an active state diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 94c4ccdf9..a096f27fd 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -145,9 +145,9 @@ class Storage { } unique_access; Accessor(SharedAccess /* tag */, Storage *storage, IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role); + memgraph::replication_coordination_glue::ReplicationRole replication_role); Accessor(UniqueAccess /* tag */, Storage *storage, IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role); + memgraph::replication_coordination_glue::ReplicationRole replication_role); Accessor(const Accessor &) = delete; Accessor &operator=(const Accessor &) = delete; Accessor &operator=(Accessor &&other) = delete; @@ -328,16 +328,17 @@ class Storage { void FreeMemory() { FreeMemory({}); } - virtual std::unique_ptr Access(memgraph::replication::ReplicationRole replication_role, + virtual std::unique_ptr Access(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::optional override_isolation_level) = 0; - std::unique_ptr Access(memgraph::replication::ReplicationRole replication_role) { + std::unique_ptr Access(memgraph::replication_coordination_glue::ReplicationRole replication_role) { return Access(replication_role, {}); } - virtual std::unique_ptr UniqueAccess(memgraph::replication::ReplicationRole replication_role, - std::optional override_isolation_level) = 0; - std::unique_ptr UniqueAccess(memgraph::replication::ReplicationRole replication_role) { + virtual std::unique_ptr UniqueAccess( + memgraph::replication_coordination_glue::ReplicationRole replication_role, + std::optional override_isolation_level) = 0; + std::unique_ptr UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role) { return UniqueAccess(replication_role, {}); } @@ -356,10 +357,11 @@ class Storage { return GetBaseInfo(force_dir); } - virtual StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) = 0; + virtual StorageInfo GetInfo(bool force_directory, + memgraph::replication_coordination_glue::ReplicationRole replication_role) = 0; virtual Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, - memgraph::replication::ReplicationRole replication_role) = 0; + memgraph::replication_coordination_glue::ReplicationRole replication_role) = 0; virtual void PrepareForNewEpoch() = 0; diff --git a/src/utils/scheduler.hpp b/src/utils/scheduler.hpp index d96178598..742271a95 100644 --- a/src/utils/scheduler.hpp +++ b/src/utils/scheduler.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -58,28 +58,40 @@ class Scheduler { // the start of the program. Since Server will log some messages on // the program start we let him log first and we make sure by first // waiting that funcion f will not log before it. + // Check for pause also. std::unique_lock lk(mutex_); auto now = std::chrono::system_clock::now(); start_time += pause; if (start_time > now) { - condition_variable_.wait_until(lk, start_time, [&] { return is_working_.load() == false; }); + condition_variable_.wait_until(lk, start_time, [&] { return !is_working_.load(); }); } else { start_time = now; } + pause_cv_.wait(lk, [&] { return !is_paused_.load(); }); + if (!is_working_) break; f(); } }); } + void Resume() { + is_paused_.store(false); + pause_cv_.notify_one(); + } + + void Pause() { is_paused_.store(true); } + /** * @brief Stops the thread execution. This is a blocking call and may take as * much time as one call to the function given previously to Run takes. * @throw std::system_error */ void Stop() { + is_paused_.store(false); is_working_.store(false); + pause_cv_.notify_one(); condition_variable_.notify_one(); if (thread_.joinable()) thread_.join(); } @@ -97,6 +109,16 @@ class Scheduler { */ std::atomic is_working_{false}; + /** + * Variable is true when thread is paused. + */ + std::atomic is_paused_{false}; + + /* + * Wait until the thread is resumed. + */ + std::condition_variable pause_cv_; + /** * Mutex used to synchronize threads using condition variable. */ diff --git a/src/utils/typeinfo.hpp b/src/utils/typeinfo.hpp index aae88c63a..fd0d1fdeb 100644 --- a/src/utils/typeinfo.hpp +++ b/src/utils/typeinfo.hpp @@ -97,6 +97,8 @@ enum class TypeId : uint64_t { // Coordinator COORD_FAILOVER_REQ, COORD_FAILOVER_RES, + COORD_SET_REPL_MAIN_REQ, + COORD_SET_REPL_MAIN_RES, // AST AST_LABELIX = 3000, diff --git a/tests/benchmark/query/eval.cpp b/tests/benchmark/query/eval.cpp index 09e789137..92ba67cd6 100644 --- a/tests/benchmark/query/eval.cpp +++ b/tests/benchmark/query/eval.cpp @@ -17,7 +17,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // The following classes are wrappers for memgraph::utils::MemoryResource, so that we can // use BENCHMARK_TEMPLATE diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index 750dd5564..d49b14fc3 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -33,7 +33,7 @@ #include "query/interpreter.hpp" #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // The following classes are wrappers for memgraph::utils::MemoryResource, so that we can // use BENCHMARK_TEMPLATE diff --git a/tests/benchmark/query/planner.cpp b/tests/benchmark/query/planner.cpp index b64c4c39f..c70de0869 100644 --- a/tests/benchmark/query/planner.cpp +++ b/tests/benchmark/query/planner.cpp @@ -20,7 +20,7 @@ #include "query/plan/vertex_count_cache.hpp" #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // Add chained MATCH (node1) -- (node2), MATCH (node2) -- (node3) ... clauses. static memgraph::query::CypherQuery *AddChainedMatches(int num_matches, memgraph::query::AstStorage &storage) { diff --git a/tests/benchmark/storage_v2_gc.cpp b/tests/benchmark/storage_v2_gc.cpp index 246df09f6..6f0e5712d 100644 --- a/tests/benchmark/storage_v2_gc.cpp +++ b/tests/benchmark/storage_v2_gc.cpp @@ -17,7 +17,7 @@ #include "storage/v2/storage.hpp" #include "utils/timer.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // This benchmark should be run for a fixed amount of time that is // large compared to GC interval to make the output relevant. diff --git a/tests/benchmark/storage_v2_gc2.cpp b/tests/benchmark/storage_v2_gc2.cpp index 52aa01870..f3986edd3 100644 --- a/tests/benchmark/storage_v2_gc2.cpp +++ b/tests/benchmark/storage_v2_gc2.cpp @@ -17,7 +17,7 @@ #include "storage/v2/storage.hpp" #include "utils/timer.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // This benchmark should be run for a fixed amount of time that is // large compared to GC interval to make the output relevant. diff --git a/tests/concurrent/storage_indices.cpp b/tests/concurrent/storage_indices.cpp index 967e98c91..fc4d75a76 100644 --- a/tests/concurrent/storage_indices.cpp +++ b/tests/concurrent/storage_indices.cpp @@ -19,7 +19,7 @@ #include "storage/v2/storage_error.hpp" #include "utils/thread.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; const uint64_t kNumVerifiers = 5; const uint64_t kNumMutators = 1; diff --git a/tests/concurrent/storage_unique_constraints.cpp b/tests/concurrent/storage_unique_constraints.cpp index 7defb4211..dc3b30146 100644 --- a/tests/concurrent/storage_unique_constraints.cpp +++ b/tests/concurrent/storage_unique_constraints.cpp @@ -16,7 +16,7 @@ #include "storage/v2/constraints/constraints.hpp" #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; const int kNumThreads = 8; diff --git a/tests/e2e/high_availability_experimental/CMakeLists.txt b/tests/e2e/high_availability_experimental/CMakeLists.txt index 2975ce588..76e1a6956 100644 --- a/tests/e2e/high_availability_experimental/CMakeLists.txt +++ b/tests/e2e/high_availability_experimental/CMakeLists.txt @@ -1,8 +1,8 @@ find_package(gflags REQUIRED) copy_e2e_python_files(ha_experimental coordinator.py) -copy_e2e_python_files(ha_experimental client_initiated_failover.py) -copy_e2e_python_files(ha_experimental uninitialized_cluster.py) +copy_e2e_python_files(ha_experimental automatic_failover.py) +copy_e2e_python_files(ha_experimental manual_setting_replicas.py) copy_e2e_python_files(ha_experimental common.py) copy_e2e_python_files(ha_experimental conftest.py) copy_e2e_python_files(ha_experimental workloads.yaml) diff --git a/tests/e2e/high_availability_experimental/client_initiated_failover.py b/tests/e2e/high_availability_experimental/automatic_failover.py similarity index 55% rename from tests/e2e/high_availability_experimental/client_initiated_failover.py rename to tests/e2e/high_availability_experimental/automatic_failover.py index f54e67cd6..f3ffadfe8 100644 --- a/tests/e2e/high_availability_experimental/client_initiated_failover.py +++ b/tests/e2e/high_availability_experimental/automatic_failover.py @@ -1,4 +1,4 @@ -# Copyright 2022 Memgraph Ltd. +# Copyright 2024 Memgraph Ltd. # # Use of this software is governed by the Business Source License # included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -28,33 +28,88 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "instance_1": { "args": ["--bolt-port", "7688", "--log-level", "TRACE", "--coordinator-server-port", "10011"], "log_file": "replica1.log", - "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], + "setup_queries": [], }, "instance_2": { "args": ["--bolt-port", "7689", "--log-level", "TRACE", "--coordinator-server-port", "10012"], "log_file": "replica2.log", - "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], + "setup_queries": [], }, "instance_3": { "args": ["--bolt-port", "7687", "--log-level", "TRACE", "--coordinator-server-port", "10013"], "log_file": "main.log", - "setup_queries": [ - "REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001'", - "REGISTER REPLICA instance_2 SYNC TO '127.0.0.1:10002'", - ], + "setup_queries": [], }, "coordinator": { "args": ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"], "log_file": "replica3.log", "setup_queries": [ - "REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011';", - "REGISTER REPLICA instance_2 SYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012';", - "REGISTER MAIN instance_3 WITH COORDINATOR SERVER ON '127.0.0.1:10013';", + "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", + "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", + "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "SET INSTANCE instance_3 TO MAIN", ], }, } +def test_replication_works_on_failover(connection): + # Goal of this test is to check the replication works after failover command. + # 1. We start all replicas, main and coordinator manually: we want to be able to kill them ourselves without relying on external tooling to kill processes. + # 2. We check that main has correct state + # 3. We kill main + # 4. We check that coordinator and new main have correct state + # 5. We insert one vertex on new main + # 6. We check that vertex appears on new replica + + # 1 + interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + + # 2 + main_cursor = connection(7687, "instance_3").cursor() + expected_data_on_main = [ + ("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ] + actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + assert actual_data_on_main == expected_data_on_main + + # 3 + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") + + # 4 + coord_cursor = connection(7690, "coordinator").cursor() + + def retrieve_data_show_repl_cluster(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;"))) + + expected_data_on_coord = [ + ("instance_1", "127.0.0.1:10011", True, "main"), + ("instance_2", "127.0.0.1:10012", True, "replica"), + ("instance_3", "127.0.0.1:10013", False, ""), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) + + new_main_cursor = connection(7688, "instance_1").cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) + + expected_data_on_new_main = [ + ("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ] + mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas) + + # 5 + execute_and_fetch_all(new_main_cursor, "CREATE ();") + + # 6 + alive_replica_cursror = connection(7689, "instance_2").cursor() + res = execute_and_fetch_all(alive_replica_cursror, "MATCH (n) RETURN count(n) as count;")[0][0] + assert res == 1, "Vertex should be replicated" + interactive_mg_runner.stop_all(MEMGRAPH_INSTANCES_DESCRIPTION) + + def test_show_replication_cluster(connection): # Goal of this test is to check the SHOW REPLICATION CLUSTER command. # 1. We start all replicas, main and coordinator manually: we want to be able to kill them ourselves without relying on external tooling to kill processes. @@ -71,153 +126,97 @@ def test_show_replication_cluster(connection): # We leave some time for the coordinator to realise the replicas are down. def retrieve_data(): - return set(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")) + return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;"))) - expected_data = { + expected_data = [ ("instance_1", "127.0.0.1:10011", True, "replica"), ("instance_2", "127.0.0.1:10012", True, "replica"), ("instance_3", "127.0.0.1:10013", True, "main"), - } + ] mg_sleep_and_assert(expected_data, retrieve_data) # 3. interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") - expected_data = { + expected_data = [ + ("instance_1", "127.0.0.1:10011", False, ""), ("instance_2", "127.0.0.1:10012", True, "replica"), ("instance_3", "127.0.0.1:10013", True, "main"), - ("instance_1", "127.0.0.1:10011", False, "replica"), - } + ] mg_sleep_and_assert(expected_data, retrieve_data) # 4. - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") - expected_data = { - ("instance_2", "127.0.0.1:10012", True, "replica"), - ("instance_1", "127.0.0.1:10011", False, "replica"), - ("instance_3", "127.0.0.1:10013", False, "main"), - } + expected_data = [ + ("instance_1", "127.0.0.1:10011", False, ""), + ("instance_2", "127.0.0.1:10012", False, ""), + ("instance_3", "127.0.0.1:10013", True, "main"), + ] mg_sleep_and_assert(expected_data, retrieve_data) -def test_simple_client_initiated_failover(connection): - # 1. Start all instances - # 2. Kill main - # 3. Run DO FAILOVER on COORDINATOR - # 4. Assert new config on coordinator by running show replication cluster - # 5. Assert replicas on new main - - # 1. +def test_simple_automatic_failover(connection): interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - # 2. main_cursor = connection(7687, "instance_3").cursor() - expected_data_on_main = { + expected_data_on_main = [ ("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), ("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), - } - actual_data_on_main = set(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")) + ] + actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) assert actual_data_on_main == expected_data_on_main interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") + coord_cursor = connection(7690, "coordinator").cursor() def retrieve_data_show_repl_cluster(): - return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")) + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;"))) - expected_data_on_coord = { - ("instance_1", "127.0.0.1:10011", True, "replica"), - ("instance_2", "127.0.0.1:10012", True, "replica"), - ("instance_3", "127.0.0.1:10013", False, "main"), - } - mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) - - # 3. - execute_and_fetch_all(coord_cursor, "DO FAILOVER") - - expected_data_on_coord = { + expected_data_on_coord = [ ("instance_1", "127.0.0.1:10011", True, "main"), ("instance_2", "127.0.0.1:10012", True, "replica"), - } + ("instance_3", "127.0.0.1:10013", False, ""), + ] mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) - # 4. new_main_cursor = connection(7688, "instance_1").cursor() def retrieve_data_show_replicas(): - return set(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")) + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) - expected_data_on_new_main = { + expected_data_on_new_main = [ ("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), - } + ] mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas) -def test_failover_fails_all_replicas_down(connection): - # 1. Start all instances - # 2. Kill all replicas - # 3. Kill main - # 4. Run DO FAILOVER on COORDINATOR. Assert exception is being thrown due to all replicas being down - # 5. Assert cluster status didn't change - - # 1. +def test_registering_replica_fails_name_exists(connection): interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - # 2. - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") - - # 3. - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") - coord_cursor = connection(7690, "coordinator").cursor() - # 4. with pytest.raises(Exception) as e: - execute_and_fetch_all(coord_cursor, "DO FAILOVER;") - assert str(e.value) == "Failover aborted since all replicas are down!" - - # 5. - - def retrieve_data(): - return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")) - - expected_data_on_coord = { - ("instance_1", "127.0.0.1:10011", False, "replica"), - ("instance_2", "127.0.0.1:10012", False, "replica"), - ("instance_3", "127.0.0.1:10013", False, "main"), - } - mg_sleep_and_assert(expected_data_on_coord, retrieve_data) + execute_and_fetch_all( + coord_cursor, + "REGISTER INSTANCE instance_1 ON '127.0.0.1:10051' WITH '127.0.0.1:10111';", + ) + assert str(e.value) == "Couldn't register replica instance since instance with such name already exists!" -def test_failover_fails_main_is_alive(connection): - # 1. Start all instances - # 2. Run DO FAILOVER on COORDINATOR. Assert exception is being thrown due to main is still live. - # 3. Assert cluster status didn't change - - # 1. +def test_registering_replica_fails_endpoint_exists(connection): interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - # 2. coord_cursor = connection(7690, "coordinator").cursor() - - def retrieve_data(): - return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")) - - expected_data_on_coord = { - ("instance_1", "127.0.0.1:10011", True, "replica"), - ("instance_2", "127.0.0.1:10012", True, "replica"), - ("instance_3", "127.0.0.1:10013", True, "main"), - } - mg_sleep_and_assert(expected_data_on_coord, retrieve_data) - - # 4. with pytest.raises(Exception) as e: - execute_and_fetch_all(coord_cursor, "DO FAILOVER;") - assert str(e.value) == "Failover aborted since main is alive!" - - # 5. - mg_sleep_and_assert(expected_data_on_coord, retrieve_data) + execute_and_fetch_all( + coord_cursor, + "REGISTER INSTANCE instance_5 ON '127.0.0.1:10001' WITH '127.0.0.1:10013';", + ) + assert ( + str(e.value) + == "Couldn't register replica because promotion on replica failed! Check logs on replica to find out more info!" + ) if __name__ == "__main__": diff --git a/tests/e2e/high_availability_experimental/coordinator.py b/tests/e2e/high_availability_experimental/coordinator.py index 711ccb8d0..e34e9f069 100644 --- a/tests/e2e/high_availability_experimental/coordinator.py +++ b/tests/e2e/high_availability_experimental/coordinator.py @@ -1,4 +1,4 @@ -# Copyright 2022 Memgraph Ltd. +# Copyright 2024 Memgraph Ltd. # # Use of this software is governed by the Business Source License # included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -41,13 +41,13 @@ def test_coordinator_show_replication_cluster(connection): cursor = connection(7690, "coordinator").cursor() def retrieve_data(): - return set(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")) + return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;"))) - expected_data = { - ("main", "127.0.0.1:10013", True, "main"), - ("replica_1", "127.0.0.1:10011", True, "replica"), - ("replica_2", "127.0.0.1:10012", True, "replica"), - } + expected_data = [ + ("instance_1", "127.0.0.1:10011", True, "replica"), + ("instance_2", "127.0.0.1:10012", True, "replica"), + ("instance_3", "127.0.0.1:10013", True, "main"), + ] mg_sleep_and_assert(expected_data, retrieve_data) @@ -78,21 +78,10 @@ def test_main_and_replicas_cannot_register_coord_server(port, role, connection): with pytest.raises(Exception) as e: execute_and_fetch_all( cursor, - "REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011';", + "REGISTER INSTANCE instance_1 ON '127.0.0.1:10001' WITH '127.0.0.1:10011';", ) assert str(e.value) == "Only coordinator can register coordinator server!" -@pytest.mark.parametrize( - "port, role", - [(7687, "main"), (7688, "replica"), (7689, "replica")], -) -def test_main_and_replicas_cannot_run_do_failover(port, role, connection): - cursor = connection(port, role).cursor() - with pytest.raises(Exception) as e: - execute_and_fetch_all(cursor, "DO FAILOVER;") - assert str(e.value) == "Only coordinator can run DO FAILOVER!" - - if __name__ == "__main__": sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/high_availability_experimental/manual_setting_replicas.py b/tests/e2e/high_availability_experimental/manual_setting_replicas.py new file mode 100644 index 000000000..f2d48ffd7 --- /dev/null +++ b/tests/e2e/high_availability_experimental/manual_setting_replicas.py @@ -0,0 +1,57 @@ +# Copyright 2024 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import os +import sys + +import interactive_mg_runner +import pytest +from common import execute_and_fetch_all +from mg_utils import mg_sleep_and_assert + +interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +interactive_mg_runner.PROJECT_DIR = os.path.normpath( + os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..") +) +interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build")) +interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph")) + +MEMGRAPH_INSTANCES_DESCRIPTION = { + "instance_3": { + "args": ["--bolt-port", "7687", "--log-level", "TRACE", "--coordinator-server-port", "10013"], + "log_file": "main.log", + "setup_queries": [], + }, +} + + +def test_no_manual_setup_on_main(connection): + # Goal of this test is to check that all manual registration actions are disabled on instances with coordiantor server port + + # 1 + interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + + any_main = connection(7687, "instance_3").cursor() + with pytest.raises(Exception) as e: + execute_and_fetch_all(any_main, "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001';") + assert str(e.value) == "Can't register replica manually on instance with coordinator server port." + + with pytest.raises(Exception) as e: + execute_and_fetch_all(any_main, "DROP REPLICA replica_1;") + assert str(e.value) == "Can't drop replica manually on instance with coordinator server port." + + with pytest.raises(Exception) as e: + execute_and_fetch_all(any_main, "SET REPLICATION ROLE TO REPLICA WITH PORT 10002;") + assert str(e.value) == "Can't set role manually on instance with coordinator server port." + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/high_availability_experimental/uninitialized_cluster.py b/tests/e2e/high_availability_experimental/uninitialized_cluster.py deleted file mode 100644 index 6c0f77bb7..000000000 --- a/tests/e2e/high_availability_experimental/uninitialized_cluster.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2022 Memgraph Ltd. -# -# Use of this software is governed by the Business Source License -# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source -# License, and you may not use this file except in compliance with the Business Source License. -# -# As of the Change Date specified in that file, in accordance with -# the Business Source License, use of this software will be governed -# by the Apache License, Version 2.0, included in the file -# licenses/APL.txt. - -import sys - -import pytest -from common import execute_and_fetch_all - - -def test_failover_on_non_setup_cluster(connection): - cursor = connection(7690, "coordinator").cursor() - with pytest.raises(Exception) as e: - execute_and_fetch_all(cursor, "DO FAILOVER;") - assert str(e.value) == "Failover aborted since cluster is uninitialized!" - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/high_availability_experimental/workloads.yaml b/tests/e2e/high_availability_experimental/workloads.yaml index dbb6457ff..1d692084a 100644 --- a/tests/e2e/high_availability_experimental/workloads.yaml +++ b/tests/e2e/high_availability_experimental/workloads.yaml @@ -3,49 +3,24 @@ ha_cluster: &ha_cluster replica_1: args: ["--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"] log_file: "replication-e2e-replica1.log" - setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"] - replica_2: - args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"] - log_file: "replication-e2e-replica2.log" - setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"] - main: - args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"] - log_file: "replication-e2e-main.log" - setup_queries: [ - "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001'", - "REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002'", - ] - coordinator: - args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"] - log_file: "replication-e2e-coordinator.log" - setup_queries: [ - "REGISTER MAIN main WITH COORDINATOR SERVER ON '127.0.0.1:10013'", - "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011'", - "REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012'", - ] - -noninitialized_cluster: &noninitialized_cluster - cluster: - replica_1: - args: ["--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"] - log_file: "replication-e2e-replica1.log" - setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"] - replica_2: - args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"] - log_file: "replication-e2e-replica2.log" - setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"] - main: - args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"] - log_file: "replication-e2e-main.log" - setup_queries: [ - "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001'", - "REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002'", - ] - coordinator: - args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"] - log_file: "replication-e2e-coordinator.log" setup_queries: [] - + replica_2: + args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"] + log_file: "replication-e2e-replica2.log" + setup_queries: [] + main: + args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"] + log_file: "replication-e2e-main.log" + setup_queries: [] + coordinator: + args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"] + log_file: "replication-e2e-coordinator.log" + setup_queries: [ + "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", + "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", + "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "SET INSTANCE instance_3 TO MAIN;" + ] workloads: - name: "Coordinator" @@ -53,11 +28,10 @@ workloads: args: ["high_availability_experimental/coordinator.py"] <<: *ha_cluster - - name: "Uninitialized cluster" + - name: "Automatic failover" binary: "tests/e2e/pytest_runner.sh" - args: ["high_availability_experimental/uninitialized_cluster.py"] - <<: *noninitialized_cluster + args: ["high_availability_experimental/automatic_failover.py"] - - name: "Client initiated failover" + - name: "Disabled manual setting of replication cluster" binary: "tests/e2e/pytest_runner.sh" - args: ["high_availability_experimental/client_initiated_failover.py"] + args: ["high_availability_experimental/manual_setting_replicas.py"] diff --git a/tests/manual/query_planner.cpp b/tests/manual/query_planner.cpp index f455bf716..8f2c107bc 100644 --- a/tests/manual/query_planner.cpp +++ b/tests/manual/query_planner.cpp @@ -14,7 +14,7 @@ #include #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; DECLARE_int32(min_log_level); int main(int argc, char *argv[]) { diff --git a/tests/property_based/random_graph.cpp b/tests/property_based/random_graph.cpp index ae71a68c8..097c2dc0e 100644 --- a/tests/property_based/random_graph.cpp +++ b/tests/property_based/random_graph.cpp @@ -23,7 +23,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/storage.hpp" #include "storage/v2/vertex_accessor.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; /** * It is possible to run test with custom seed with: * RC_PARAMS="seed=1" ./random_graph diff --git a/tests/unit/auth_checker.cpp b/tests/unit/auth_checker.cpp index 0122a4440..f4c499cd7 100644 --- a/tests/unit/auth_checker.cpp +++ b/tests/unit/auth_checker.cpp @@ -22,7 +22,7 @@ #include "storage/v2/disk/storage.hpp" #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/view.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; #ifdef MG_ENTERPRISE template class FineGrainedAuthCheckerFixture : public testing::Test { diff --git a/tests/unit/bfs_fine_grained.cpp b/tests/unit/bfs_fine_grained.cpp index 1557910fe..568206dfd 100644 --- a/tests/unit/bfs_fine_grained.cpp +++ b/tests/unit/bfs_fine_grained.cpp @@ -43,7 +43,7 @@ class VertexDb : public Database { } std::unique_ptr Access() override { - return db_->Access(memgraph::replication::ReplicationRole::MAIN); + return db_->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); } std::unique_ptr MakeBfsOperator(Symbol source_sym, Symbol sink_sym, Symbol edge_sym, diff --git a/tests/unit/bfs_single_node.cpp b/tests/unit/bfs_single_node.cpp index a518a7729..a6816242d 100644 --- a/tests/unit/bfs_single_node.cpp +++ b/tests/unit/bfs_single_node.cpp @@ -32,7 +32,7 @@ class SingleNodeDb : public Database { } std::unique_ptr Access() override { - return db_->Access(memgraph::replication::ReplicationRole::MAIN); + return db_->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); } std::unique_ptr MakeBfsOperator(Symbol source_sym, Symbol sink_sym, Symbol edge_sym, diff --git a/tests/unit/bolt_encoder.cpp b/tests/unit/bolt_encoder.cpp index dd275a2fa..19a958118 100644 --- a/tests/unit/bolt_encoder.cpp +++ b/tests/unit/bolt_encoder.cpp @@ -182,7 +182,7 @@ void TestVertexAndEdgeWithDifferentStorages(std::unique_ptrAccess(memgraph::replication::ReplicationRole::MAIN); + auto dba = db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); auto va1 = dba->CreateVertex(); auto va2 = dba->CreateVertex(); auto l1 = dba->NameToLabel("label1"); diff --git a/tests/unit/clearing_old_disk_data.cpp b/tests/unit/clearing_old_disk_data.cpp index 76315115a..395391e12 100644 --- a/tests/unit/clearing_old_disk_data.cpp +++ b/tests/unit/clearing_old_disk_data.cpp @@ -20,7 +20,7 @@ #include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; class ClearingOldDiskDataTest : public ::testing::Test { public: diff --git a/tests/unit/cpp_api.cpp b/tests/unit/cpp_api.cpp index ce968b13e..84ca0b195 100644 --- a/tests/unit/cpp_api.cpp +++ b/tests/unit/cpp_api.cpp @@ -43,7 +43,8 @@ struct CppApiTestFixture : public ::testing::Test { } memgraph::query::DbAccessor &CreateDbAccessor(const memgraph::storage::IsolationLevel isolationLevel) { - accessors_.push_back(storage->Access(memgraph::replication::ReplicationRole::MAIN, isolationLevel)); + accessors_.push_back( + storage->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN, isolationLevel)); db_accessors_.emplace_back(accessors_.back().get()); return db_accessors_.back(); } diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 6ef902186..1353a56dd 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2632,77 +2632,6 @@ TEST_P(CypherMainVisitorTest, TestRegisterReplicationQuery) { ReplicationQuery::SyncMode::SYNC); } -#ifdef MG_ENTERPRISE -TEST_P(CypherMainVisitorTest, TestRegisterCoordinatorServer) { - auto &ast_generator = *GetParam(); - - { - const std::string faulty_query_1 = "REGISTER MAIN COORDINATOR SERVER TO"; - ASSERT_THROW(ast_generator.ParseQuery(faulty_query_1), SyntaxException); - } - - { - const std::string faulty_query_2 = "REGISTER MAIN COORDINATOR SERVER TO MAIN"; - ASSERT_THROW(ast_generator.ParseQuery(faulty_query_2), SyntaxException); - } - - { - std::string full_query = "REGISTER MAIN main WITH COORDINATOR SERVER ON '127.0.0.1:10011';"; - - auto *full_query_parsed = dynamic_cast(ast_generator.ParseQuery(full_query)); - - ASSERT_TRUE(full_query_parsed); - EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER); - EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::MAIN); - EXPECT_EQ(full_query_parsed->instance_name_, "main"); - ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10011"); - ASSERT_EQ(full_query_parsed->socket_address_, nullptr); - } - - { - std::string full_query = - R"(REGISTER REPLICA replica_1 SYNC TO "127.0.0.1:10002" WITH COORDINATOR SERVER ON "127.0.0.1:10012")"; - auto *full_query_parsed = dynamic_cast(ast_generator.ParseQuery(full_query)); - ASSERT_TRUE(full_query_parsed); - EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER); - EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::REPLICA); - ast_generator.CheckLiteral(full_query_parsed->socket_address_, "127.0.0.1:10002"); - ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10012"); - EXPECT_EQ(full_query_parsed->instance_name_, "replica_1"); - EXPECT_EQ(full_query_parsed->sync_mode_, CoordinatorQuery::SyncMode::SYNC); - } - - { - std::string full_query = - R"(REGISTER REPLICA replica_1 ASYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012')"; - auto *full_query_parsed = dynamic_cast(ast_generator.ParseQuery(full_query)); - ASSERT_TRUE(full_query_parsed); - EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER); - EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::REPLICA); - ast_generator.CheckLiteral(full_query_parsed->socket_address_, "127.0.0.1:10002"); - ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10012"); - EXPECT_EQ(full_query_parsed->instance_name_, "replica_1"); - EXPECT_EQ(full_query_parsed->sync_mode_, CoordinatorQuery::SyncMode::ASYNC); - } -} - -TEST_P(CypherMainVisitorTest, TestDoFailover) { - auto &ast_generator = *GetParam(); - - { - std::string invalid_query = "DO FAILO"; - ASSERT_THROW(ast_generator.ParseQuery(invalid_query), SyntaxException); - } - - { - std::string correct_query = "DO FAILOVER"; - auto *correct_query_parsed = dynamic_cast(ast_generator.ParseQuery(correct_query)); - ASSERT_TRUE(correct_query_parsed); - EXPECT_EQ(correct_query_parsed->action_, CoordinatorQuery::Action::DO_FAILOVER); - } -} -#endif - TEST_P(CypherMainVisitorTest, TestDeleteReplica) { auto &ast_generator = *GetParam(); diff --git a/tests/unit/database_get_info.cpp b/tests/unit/database_get_info.cpp index 8a268580e..a8a275a61 100644 --- a/tests/unit/database_get_info.cpp +++ b/tests/unit/database_get_info.cpp @@ -165,8 +165,8 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_FALSE(unique_acc->Commit().HasError()); } - const auto &info = - db_acc->GetInfo(true, memgraph::replication::ReplicationRole::MAIN); // force to use configured directory + const auto &info = db_acc->GetInfo( + true, memgraph::replication_coordination_glue::ReplicationRole::MAIN); // force to use configured directory ASSERT_EQ(info.storage_info.vertex_count, 5); ASSERT_EQ(info.storage_info.edge_count, 2); diff --git a/tests/unit/plan_pretty_print.cpp b/tests/unit/plan_pretty_print.cpp index 4a513e82c..ef2395931 100644 --- a/tests/unit/plan_pretty_print.cpp +++ b/tests/unit/plan_pretty_print.cpp @@ -43,7 +43,7 @@ class PrintToJsonTest : public ::testing::Test { PrintToJsonTest() : config(disk_test_utils::GenerateOnDiskConfig(testSuite)), db(new StorageType(config)), - dba_storage(db->Access(memgraph::replication::ReplicationRole::MAIN)), + dba_storage(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)), dba(dba_storage.get()) {} ~PrintToJsonTest() override { diff --git a/tests/unit/query_cost_estimator.cpp b/tests/unit/query_cost_estimator.cpp index 631d17414..702b6e759 100644 --- a/tests/unit/query_cost_estimator.cpp +++ b/tests/unit/query_cost_estimator.cpp @@ -23,7 +23,7 @@ using namespace memgraph::query; using namespace memgraph::query::plan; -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using CardParam = CostEstimator::CardParam; using CostParam = CostEstimator::CostParam; using MiscParam = CostEstimator::MiscParam; diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index 1817f1671..23eab17e0 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -141,7 +141,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) { // Capture all vertices std::map gid_mapping; std::set vertices; - auto dba = db->Access(memgraph::replication::ReplicationRole::MAIN); + auto dba = db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); for (const auto &vertex : dba->Vertices(memgraph::storage::View::NEW)) { std::set> labels; auto maybe_labels = vertex.Labels(memgraph::storage::View::NEW); @@ -1105,7 +1105,7 @@ TYPED_TEST(DumpTest, MultiplePartialPulls) { } TYPED_TEST(DumpTest, DumpDatabaseWithTriggers) { - auto acc = this->db->storage()->Access(memgraph::replication::ReplicationRole::MAIN); + auto acc = this->db->storage()->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); memgraph::query::DbAccessor dba(acc.get()); { auto trigger_store = this->db.get()->trigger_store(); diff --git a/tests/unit/query_expression_evaluator.cpp b/tests/unit/query_expression_evaluator.cpp index b2a7c1f7a..c725d7e54 100644 --- a/tests/unit/query_expression_evaluator.cpp +++ b/tests/unit/query_expression_evaluator.cpp @@ -67,7 +67,7 @@ class ExpressionEvaluatorTest : public ::testing::Test { ExpressionEvaluatorTest() : config(disk_test_utils::GenerateOnDiskConfig(testSuite)), db(new StorageType(config)), - storage_dba(db->Access(memgraph::replication::ReplicationRole::MAIN)), + storage_dba(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)), dba(storage_dba.get()) {} ~ExpressionEvaluatorTest() override { diff --git a/tests/unit/query_hint_provider.cpp b/tests/unit/query_hint_provider.cpp index 5510812f1..4165ef9d2 100644 --- a/tests/unit/query_hint_provider.cpp +++ b/tests/unit/query_hint_provider.cpp @@ -39,7 +39,7 @@ class HintProviderSuite : public ::testing::Test { int symbol_count = 0; void SetUp() { - storage_dba.emplace(db->Access(memgraph::replication::ReplicationRole::MAIN)); + storage_dba.emplace(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)); dba.emplace(storage_dba->get()); } diff --git a/tests/unit/query_plan_accumulate_aggregate.cpp b/tests/unit/query_plan_accumulate_aggregate.cpp index 68498cc40..c8f1c30c9 100644 --- a/tests/unit/query_plan_accumulate_aggregate.cpp +++ b/tests/unit/query_plan_accumulate_aggregate.cpp @@ -25,7 +25,7 @@ #include "storage/v2/disk/storage.hpp" #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using namespace memgraph::query; using namespace memgraph::query::plan; diff --git a/tests/unit/query_plan_bag_semantics.cpp b/tests/unit/query_plan_bag_semantics.cpp index 1bdaf68c1..4f3bd5256 100644 --- a/tests/unit/query_plan_bag_semantics.cpp +++ b/tests/unit/query_plan_bag_semantics.cpp @@ -31,7 +31,7 @@ #include "query_plan_common.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using namespace memgraph::query; using namespace memgraph::query::plan; diff --git a/tests/unit/query_plan_create_set_remove_delete.cpp b/tests/unit/query_plan_create_set_remove_delete.cpp index fcb98cbd9..1fa400940 100644 --- a/tests/unit/query_plan_create_set_remove_delete.cpp +++ b/tests/unit/query_plan_create_set_remove_delete.cpp @@ -38,7 +38,7 @@ using namespace memgraph::query; using namespace memgraph::query::plan; -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; template class QueryPlanTest : public testing::Test { diff --git a/tests/unit/query_plan_match_filter_return.cpp b/tests/unit/query_plan_match_filter_return.cpp index e97b10742..d5468b6b5 100644 --- a/tests/unit/query_plan_match_filter_return.cpp +++ b/tests/unit/query_plan_match_filter_return.cpp @@ -42,7 +42,7 @@ using namespace memgraph::query; using namespace memgraph::query::plan; -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; const std::string testSuite = "query_plan_match_filter_return"; diff --git a/tests/unit/query_plan_operator_to_string.cpp b/tests/unit/query_plan_operator_to_string.cpp index 4430e6d23..694552cf0 100644 --- a/tests/unit/query_plan_operator_to_string.cpp +++ b/tests/unit/query_plan_operator_to_string.cpp @@ -37,7 +37,7 @@ class OperatorToStringTest : public ::testing::Test { OperatorToStringTest() : config(disk_test_utils::GenerateOnDiskConfig(testSuite)), db(new StorageType(config)), - dba_storage(db->Access(memgraph::replication::ReplicationRole::MAIN)), + dba_storage(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)), dba(dba_storage.get()) {} ~OperatorToStringTest() override { diff --git a/tests/unit/query_plan_read_write_typecheck.cpp b/tests/unit/query_plan_read_write_typecheck.cpp index 99b3c3da7..f9f14902b 100644 --- a/tests/unit/query_plan_read_write_typecheck.cpp +++ b/tests/unit/query_plan_read_write_typecheck.cpp @@ -37,7 +37,7 @@ class ReadWriteTypeCheckTest : public ::testing::Test { memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite); std::unique_ptr db{new StorageType(config)}; std::unique_ptr dba_storage{ - db->Access(memgraph::replication::ReplicationRole::MAIN)}; + db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)}; memgraph::query::DbAccessor dba{dba_storage.get()}; void TearDown() override { diff --git a/tests/unit/query_plan_v2_create_set_remove_delete.cpp b/tests/unit/query_plan_v2_create_set_remove_delete.cpp index c6a7b3627..b82454682 100644 --- a/tests/unit/query_plan_v2_create_set_remove_delete.cpp +++ b/tests/unit/query_plan_v2_create_set_remove_delete.cpp @@ -18,7 +18,7 @@ #include "query/plan/operator.hpp" #include "storage/v2/disk/storage.hpp" #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; template class QueryPlan : public testing::Test { diff --git a/tests/unit/query_pretty_print.cpp b/tests/unit/query_pretty_print.cpp index 4382176be..ac789b1da 100644 --- a/tests/unit/query_pretty_print.cpp +++ b/tests/unit/query_pretty_print.cpp @@ -37,7 +37,7 @@ class ExpressionPrettyPrinterTest : public ::testing::Test { memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite); std::unique_ptr db{new StorageType(config)}; std::unique_ptr storage_dba{ - db->Access(memgraph::replication::ReplicationRole::MAIN)}; + db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)}; memgraph::query::DbAccessor dba{storage_dba.get()}; AstStorage storage; diff --git a/tests/unit/query_procedure_mgp_type.cpp b/tests/unit/query_procedure_mgp_type.cpp index 9018d5997..e12a61f28 100644 --- a/tests/unit/query_procedure_mgp_type.cpp +++ b/tests/unit/query_procedure_mgp_type.cpp @@ -23,7 +23,7 @@ #include "disk_test_utils.hpp" #include "test_utils.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; template class CypherType : public testing::Test { diff --git a/tests/unit/query_procedure_py_module.cpp b/tests/unit/query_procedure_py_module.cpp index 90d9cb669..baef2e1c8 100644 --- a/tests/unit/query_procedure_py_module.cpp +++ b/tests/unit/query_procedure_py_module.cpp @@ -21,7 +21,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "test_utils.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; template class PyModule : public testing::Test { diff --git a/tests/unit/query_procedures_mgp_graph.cpp b/tests/unit/query_procedures_mgp_graph.cpp index 22ea64cfd..cf3b5a137 100644 --- a/tests/unit/query_procedures_mgp_graph.cpp +++ b/tests/unit/query_procedures_mgp_graph.cpp @@ -34,7 +34,7 @@ #include "utils/memory.hpp" #include "utils/variant_helpers.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; #define EXPECT_SUCCESS(...) EXPECT_EQ(__VA_ARGS__, mgp_error::MGP_ERROR_NO_ERROR) diff --git a/tests/unit/query_semantic.cpp b/tests/unit/query_semantic.cpp index 4f27fa1e2..c4bb966eb 100644 --- a/tests/unit/query_semantic.cpp +++ b/tests/unit/query_semantic.cpp @@ -35,7 +35,7 @@ class TestSymbolGenerator : public ::testing::Test { memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite); std::unique_ptr db{new StorageType(config)}; std::unique_ptr storage_dba{ - db->Access(memgraph::replication::ReplicationRole::MAIN)}; + db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)}; memgraph::query::DbAccessor dba{storage_dba.get()}; AstStorage storage; diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp index 040af0a22..1b2ca5e9c 100644 --- a/tests/unit/query_trigger.cpp +++ b/tests/unit/query_trigger.cpp @@ -29,7 +29,7 @@ #include "utils/exceptions.hpp" #include "utils/memory.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; namespace { const std::unordered_set kAllEventTypes{ diff --git a/tests/unit/query_variable_start_planner.cpp b/tests/unit/query_variable_start_planner.cpp index ef08e8cca..df7173db2 100644 --- a/tests/unit/query_variable_start_planner.cpp +++ b/tests/unit/query_variable_start_planner.cpp @@ -28,7 +28,7 @@ #include "formatters.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using namespace memgraph::query::plan; using memgraph::query::AstStorage; using Type = memgraph::query::EdgeAtom::Type; diff --git a/tests/unit/storage_rocks.cpp b/tests/unit/storage_rocks.cpp index 365f46ad3..5cdaf4691 100644 --- a/tests/unit/storage_rocks.cpp +++ b/tests/unit/storage_rocks.cpp @@ -30,7 +30,7 @@ #include "storage/v2/view.hpp" #include "utils/rocksdb_serialization.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // NOLINTNEXTLINE(google-build-using-namespace) using namespace memgraph::storage; diff --git a/tests/unit/storage_v2.cpp b/tests/unit/storage_v2.cpp index dc3ec1512..7db51ddd4 100644 --- a/tests/unit/storage_v2.cpp +++ b/tests/unit/storage_v2.cpp @@ -23,7 +23,7 @@ #include "storage/v2/vertex_accessor.hpp" #include "storage_test_utils.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::Types; using testing::UnorderedElementsAre; diff --git a/tests/unit/storage_v2_constraints.cpp b/tests/unit/storage_v2_constraints.cpp index b36bc59c9..7f03f40d1 100644 --- a/tests/unit/storage_v2_constraints.cpp +++ b/tests/unit/storage_v2_constraints.cpp @@ -23,7 +23,7 @@ #include "disk_test_utils.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; // NOLINTNEXTLINE(google-build-using-namespace) using namespace memgraph::storage; diff --git a/tests/unit/storage_v2_durability_inmemory.cpp b/tests/unit/storage_v2_durability_inmemory.cpp index 433242c0f..54671077f 100644 --- a/tests/unit/storage_v2_durability_inmemory.cpp +++ b/tests/unit/storage_v2_durability_inmemory.cpp @@ -48,7 +48,7 @@ #include "utils/timer.hpp" #include "utils/uuid.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::Contains; using testing::UnorderedElementsAre; diff --git a/tests/unit/storage_v2_edge_inmemory.cpp b/tests/unit/storage_v2_edge_inmemory.cpp index 96fa1debe..50ae1f14f 100644 --- a/tests/unit/storage_v2_edge_inmemory.cpp +++ b/tests/unit/storage_v2_edge_inmemory.cpp @@ -17,7 +17,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::UnorderedElementsAre; class StorageEdgeTest : public ::testing::TestWithParam {}; diff --git a/tests/unit/storage_v2_edge_ondisk.cpp b/tests/unit/storage_v2_edge_ondisk.cpp index 823edf16e..7f3357b10 100644 --- a/tests/unit/storage_v2_edge_ondisk.cpp +++ b/tests/unit/storage_v2_edge_ondisk.cpp @@ -18,7 +18,7 @@ #include "storage/v2/disk/storage.hpp" #include "storage/v2/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::UnorderedElementsAre; class StorageEdgeTest : public ::testing::TestWithParam {}; diff --git a/tests/unit/storage_v2_gc.cpp b/tests/unit/storage_v2_gc.cpp index 770d570bc..e619f3723 100644 --- a/tests/unit/storage_v2_gc.cpp +++ b/tests/unit/storage_v2_gc.cpp @@ -14,7 +14,7 @@ #include "storage/v2/inmemory/storage.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::UnorderedElementsAre; // TODO: The point of these is not to test GC fully, these are just simple diff --git a/tests/unit/storage_v2_get_info.cpp b/tests/unit/storage_v2_get_info.cpp index aa864d7cd..c0f7e2dbc 100644 --- a/tests/unit/storage_v2_get_info.cpp +++ b/tests/unit/storage_v2_get_info.cpp @@ -22,7 +22,7 @@ // NOLINTNEXTLINE(google-build-using-namespace) using namespace memgraph::storage; -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; constexpr auto testSuite = "storage_v2_get_info"; const std::filesystem::path storage_directory{std::filesystem::temp_directory_path() / testSuite}; diff --git a/tests/unit/storage_v2_indices.cpp b/tests/unit/storage_v2_indices.cpp index 10ccb7660..8ee053087 100644 --- a/tests/unit/storage_v2_indices.cpp +++ b/tests/unit/storage_v2_indices.cpp @@ -25,7 +25,7 @@ // NOLINTNEXTLINE(google-build-using-namespace) using namespace memgraph::storage; -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; using testing::IsEmpty; using testing::Types; using testing::UnorderedElementsAre; diff --git a/tests/unit/storage_v2_isolation_level.cpp b/tests/unit/storage_v2_isolation_level.cpp index 5efedf7f9..39d7a92ec 100644 --- a/tests/unit/storage_v2_isolation_level.cpp +++ b/tests/unit/storage_v2_isolation_level.cpp @@ -16,7 +16,7 @@ #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/isolation_level.hpp" #include "utils/on_scope_exit.hpp" -using memgraph::replication::ReplicationRole; +using memgraph::replication_coordination_glue::ReplicationRole; namespace { int64_t VerticesCount(memgraph::storage::Storage::Accessor *accessor) { diff --git a/tests/unit/storage_v2_replication.cpp b/tests/unit/storage_v2_replication.cpp index 008494436..e572440ca 100644 --- a/tests/unit/storage_v2_replication.cpp +++ b/tests/unit/storage_v2_replication.cpp @@ -41,9 +41,9 @@ using memgraph::dbms::RegisterReplicaError; using memgraph::dbms::ReplicationHandler; using memgraph::dbms::UnregisterReplicaResult; using memgraph::replication::ReplicationClientConfig; -using memgraph::replication::ReplicationRole; using memgraph::replication::ReplicationServerConfig; using memgraph::replication_coordination_glue::ReplicationMode; +using memgraph::replication_coordination_glue::ReplicationRole; using memgraph::storage::Config; using memgraph::storage::EdgeAccessor; using memgraph::storage::Gid; @@ -940,7 +940,7 @@ TEST_F(ReplicationTest, ReplicationReplicaWithExistingEndPoint) { .ip_address = local_host, .port = common_port, }) - .GetError() == RegisterReplicaError::END_POINT_EXISTS); + .GetError() == RegisterReplicaError::ENDPOINT_EXISTS); } TEST_F(ReplicationTest, RestoringReplicationAtStartupAfterDroppingReplica) { diff --git a/tests/unit/storage_v2_show_storage_info.cpp b/tests/unit/storage_v2_show_storage_info.cpp index 2fb750eb8..73d33a77d 100644 --- a/tests/unit/storage_v2_show_storage_info.cpp +++ b/tests/unit/storage_v2_show_storage_info.cpp @@ -44,7 +44,7 @@ class ShowStorageInfoTest : public testing::Test { }; TEST_F(ShowStorageInfoTest, CountOnAbort) { - auto acc = this->storage->Access(memgraph::replication::ReplicationRole::MAIN); + auto acc = this->storage->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); auto src_vertex = acc->CreateVertex(); auto dest_vertex = acc->CreateVertex(); auto et = acc->NameToEdgeType("et5"); diff --git a/tests/unit/storage_v2_storage_mode.cpp b/tests/unit/storage_v2_storage_mode.cpp index dbf3394d3..487319d3c 100644 --- a/tests/unit/storage_v2_storage_mode.cpp +++ b/tests/unit/storage_v2_storage_mode.cpp @@ -44,8 +44,8 @@ TEST_P(StorageModeTest, Mode) { .transaction{.isolation_level = memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION}}); static_cast(storage.get())->SetStorageMode(storage_mode); - auto creator = storage->Access(memgraph::replication::ReplicationRole::MAIN); - auto other_analytics_mode_reader = storage->Access(memgraph::replication::ReplicationRole::MAIN); + auto creator = storage->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); + auto other_analytics_mode_reader = storage->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN); ASSERT_EQ(CountVertices(*creator, memgraph::storage::View::OLD), 0); ASSERT_EQ(CountVertices(*other_analytics_mode_reader, memgraph::storage::View::OLD), 0); diff --git a/tests/unit/typed_value.cpp b/tests/unit/typed_value.cpp index fa2d3cb95..41dd6e3ba 100644 --- a/tests/unit/typed_value.cpp +++ b/tests/unit/typed_value.cpp @@ -38,7 +38,7 @@ class AllTypesFixture : public testing::Test { memgraph::storage::Config config_{disk_test_utils::GenerateOnDiskConfig(testSuite)}; std::unique_ptr db{new StorageType(config_)}; std::unique_ptr storage_dba{ - db->Access(memgraph::replication::ReplicationRole::MAIN)}; + db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)}; memgraph::query::DbAccessor dba{storage_dba.get()}; void SetUp() override {