From f699c0b37f1f24db27401cbbe0560e96409260d3 Mon Sep 17 00:00:00 2001 From: Andi Date: Thu, 21 Mar 2024 07:41:26 +0100 Subject: [PATCH 1/5] Support bolt+routing (#1796) --- .github/workflows/diff.yaml | 13 +- release/package/mgbuild.sh | 20 +- src/communication/bolt/v1/session.hpp | 8 +- .../bolt/v1/states/executing.hpp | 6 +- src/communication/bolt/v1/states/handlers.hpp | 20 +- src/coordination/CMakeLists.txt | 4 +- src/coordination/coordinator_client.cpp | 13 +- .../coordinator_cluster_state.cpp | 74 +- ...p => coordinator_communication_config.cpp} | 38 +- src/coordination/coordinator_handlers.cpp | 8 +- src/coordination/coordinator_instance.cpp | 67 +- src/coordination/coordinator_server.cpp | 5 +- src/coordination/coordinator_state.cpp | 17 +- .../coordinator_state_machine.cpp | 25 +- .../coordinator_state_manager.cpp | 6 + .../coordination/coordinator_client.hpp | 10 +- ...p => coordinator_communication_config.hpp} | 68 +- .../coordination/coordinator_exceptions.hpp | 11 + .../coordination/coordinator_instance.hpp | 17 +- .../include/coordination/coordinator_rpc.hpp | 11 +- .../coordination/coordinator_server.hpp | 4 +- .../include/coordination/coordinator_slk.hpp | 24 +- .../coordination/coordinator_state.hpp | 6 +- .../include/coordination/raft_state.hpp | 13 +- .../coordination/replication_instance.hpp | 4 +- .../nuraft/coordinator_cluster_state.hpp | 37 +- .../nuraft/coordinator_state_machine.hpp | 12 +- .../include/nuraft/raft_log_action.hpp | 16 +- src/coordination/raft_state.cpp | 45 +- src/coordination/replication_instance.cpp | 4 +- src/dbms/coordinator_handler.cpp | 7 +- src/dbms/coordinator_handler.hpp | 6 +- src/glue/SessionHL.cpp | 34 + src/glue/SessionHL.hpp | 7 + src/io/network/endpoint.cpp | 19 +- src/io/network/endpoint.hpp | 11 +- src/query/interpreter.cpp | 120 ++- src/query/interpreter.hpp | 19 +- tests/drivers/go/v5/docs_quick_start.go | 20 +- tests/drivers/go/v5/go.mod | 2 +- tests/drivers/go/v5/go.sum | 2 + tests/drivers/go/v5/read_routing.go | 51 ++ tests/drivers/go/v5/run.sh | 1 - tests/drivers/go/v5/run_cluster_tests.sh | 21 + tests/drivers/go/v5/write_routing.go | 51 ++ tests/drivers/java/v5_8/pom.xml | 39 + tests/drivers/java/v5_8/run.sh | 1 - tests/drivers/java/v5_8/run_cluster_tests.sh | 37 + .../src/main/java/memgraph/ReadRouting.java | 35 + .../src/main/java/memgraph/WriteRouting.java | 44 + tests/drivers/node/v5_8/read_routing.js | 59 ++ tests/drivers/node/v5_8/run.sh | 1 - tests/drivers/node/v5_8/run_cluster_tests.sh | 17 + tests/drivers/node/v5_8/write_routing.js | 59 ++ tests/drivers/python/v5_8/read_routing.py | 41 + .../drivers/python/v5_8/run_cluster_tests.sh | 25 + tests/drivers/python/v5_8/write_routing.py | 41 + tests/drivers/run_cluster.sh | 203 +++++ tests/e2e/high_availability/common.py | 11 - .../coord_cluster_registration.py | 818 +++++++++--------- .../disable_writing_on_main_after_restart.py | 6 +- tests/e2e/replication/common.hpp | 7 +- tests/unit/CMakeLists.txt | 9 +- tests/unit/bolt_session.cpp | 211 +++-- tests/unit/coordinator_cluster_state.cpp | 135 +-- tests/unit/raft_log_serialization.cpp | 54 +- tests/unit/routing_table.cpp | 176 ++++ tests/unit/slk_advanced.cpp | 22 +- 68 files changed, 2131 insertions(+), 897 deletions(-) rename src/coordination/{coordinator_config.cpp => coordinator_communication_config.cpp} (57%) rename src/coordination/include/coordination/{coordinator_config.hpp => coordinator_communication_config.hpp} (51%) create mode 100644 tests/drivers/go/v5/read_routing.go create mode 100755 tests/drivers/go/v5/run_cluster_tests.sh create mode 100644 tests/drivers/go/v5/write_routing.go create mode 100755 tests/drivers/java/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java create mode 100644 tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java create mode 100644 tests/drivers/node/v5_8/read_routing.js create mode 100755 tests/drivers/node/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/node/v5_8/write_routing.js create mode 100644 tests/drivers/python/v5_8/read_routing.py create mode 100755 tests/drivers/python/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/python/v5_8/write_routing.py create mode 100755 tests/drivers/run_cluster.sh create mode 100644 tests/unit/routing_table.cpp diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 49b7d4273..8b8ee607f 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -257,6 +257,17 @@ jobs: --organization-name $MEMGRAPH_ORGANIZATION_NAME \ test-memgraph drivers + - name: Run HA driver tests + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph drivers-high-availability + - name: Run integration tests run: | ./release/package/mgbuild.sh \ @@ -278,7 +289,7 @@ jobs: --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ --organization-name $MEMGRAPH_ORGANIZATION_NAME \ test-memgraph cppcheck-and-clang-format - + - name: Save cppcheck and clang-format errors uses: actions/upload-artifact@v4 with: diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh index e24776f60..934e962b7 100755 --- a/release/package/mgbuild.sh +++ b/release/package/mgbuild.sh @@ -48,9 +48,9 @@ SUPPORTED_ARCHS=( ) SUPPORTED_TESTS=( clang-tidy cppcheck-and-clang-format code-analysis - code-coverage drivers durability e2e gql-behave + code-coverage drivers drivers-high-availability durability e2e gql-behave integration leftover-CTest macro-benchmark - mgbench stress-plain stress-ssl + mgbench stress-plain stress-ssl unit unit-coverage upload-to-bench-graph ) @@ -116,7 +116,7 @@ print_help () { echo -e "\nToolchain v5 supported OSs:" echo -e " \"${SUPPORTED_OS_V5[*]}\"" - + echo -e "\nExample usage:" echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run" echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community" @@ -296,7 +296,7 @@ build_memgraph () { docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/" fi # Change ownership of copied files so the mg user inside container can access them - docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" + docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" @@ -318,10 +318,9 @@ build_memgraph () { # Define cmake command local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO && $cmake_cmd" - # ' is used instead of " because we need to run make within the allowed # container resources. - # Default value for $threads is 0 instead of $(nproc) because macos + # Default value for $threads is 0 instead of $(nproc) because macos # doesn't support the nproc command. # 0 is set for default value and checked here because mgbuild containers # support nproc @@ -363,7 +362,7 @@ copy_memgraph() { local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph" local host_output_path="$PROJECT_ROOT/build/memgraph" mkdir -p "$PROJECT_ROOT/build" - docker cp -L $build_container:$container_output_path $host_output_path + docker cp -L $build_container:$container_output_path $host_output_path echo "Binary saved to $host_output_path" ;; --build-logs) @@ -371,7 +370,7 @@ copy_memgraph() { local container_output_path="$MGBUILD_ROOT_DIR/build/logs" local host_output_path="$PROJECT_ROOT/build/logs" mkdir -p "$PROJECT_ROOT/build" - docker cp -L $build_container:$container_output_path $host_output_path + docker cp -L $build_container:$container_output_path $host_output_path echo "Build logs saved to $host_output_path" ;; --package) @@ -418,6 +417,9 @@ test_memgraph() { drivers) docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh' ;; + drivers-high-availability) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run_cluster.sh' + ;; integration) docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh' ;; @@ -664,4 +666,4 @@ case $command in echo "Error: Unknown command '$command'" exit 1 ;; -esac +esac diff --git a/src/communication/bolt/v1/session.hpp b/src/communication/bolt/v1/session.hpp index 2261a3234..55d8a7a54 100644 --- a/src/communication/bolt/v1/session.hpp +++ b/src/communication/bolt/v1/session.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -88,6 +88,12 @@ class Session { virtual void Configure(const std::map &run_time_info) = 0; +#ifdef MG_ENTERPRISE + virtual auto Route(std::map const &routing, + std::vector const &bookmarks, + std::map const &extra) -> std::map = 0; +#endif + /** * Put results of the processed query in the `encoder`. * diff --git a/src/communication/bolt/v1/states/executing.hpp b/src/communication/bolt/v1/states/executing.hpp index b58b3c39b..2ab2cacc2 100644 --- a/src/communication/bolt/v1/states/executing.hpp +++ b/src/communication/bolt/v1/states/executing.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -79,9 +79,9 @@ State RunHandlerV4(Signature signature, TSession &session, State state, Marker m } case Signature::Route: { if constexpr (bolt_minor >= 3) { - if (signature == Signature::Route) return HandleRoute(session, marker); + return HandleRoute(session, marker); } else { - spdlog::trace("Supported only in bolt v4.3"); + spdlog::trace("Supported only in bolt versions >= 4.3"); return State::Close; } } diff --git a/src/communication/bolt/v1/states/handlers.hpp b/src/communication/bolt/v1/states/handlers.hpp index 3ffcb6f55..afdc37ad9 100644 --- a/src/communication/bolt/v1/states/handlers.hpp +++ b/src/communication/bolt/v1/states/handlers.hpp @@ -478,9 +478,6 @@ State HandleGoodbye() { template State HandleRoute(TSession &session, const Marker marker) { - // Route message is not implemented since it is Neo4j specific, therefore we will receive it and inform user that - // there is no implementation. Before that, we have to read out the fields from the buffer to leave it in a clean - // state. if (marker != Marker::TinyStruct3) { spdlog::trace("Expected TinyStruct3 marker, but received 0x{:02x}!", utils::UnderlyingCast(marker)); return State::Close; @@ -496,11 +493,27 @@ State HandleRoute(TSession &session, const Marker marker) { spdlog::trace("Couldn't read bookmarks field!"); return State::Close; } + + // TODO: (andi) Fix Bolt versions Value db; if (!session.decoder_.ReadValue(&db)) { spdlog::trace("Couldn't read db field!"); return State::Close; } + +#ifdef MG_ENTERPRISE + try { + auto res = session.Route(routing.ValueMap(), bookmarks.ValueList(), {}); + if (!session.encoder_.MessageSuccess(std::move(res))) { + spdlog::trace("Couldn't send result of routing!"); + return State::Close; + } + return State::Idle; + } catch (const std::exception &e) { + return HandleFailure(session, e); + } + +#else session.encoder_buffer_.Clear(); bool fail_sent = session.encoder_.MessageFailure({{"code", "66"}, {"message", "Route message is not supported in Memgraph!"}}); @@ -509,6 +522,7 @@ State HandleRoute(TSession &session, const Marker marker) { return State::Close; } return State::Error; +#endif } template diff --git a/src/coordination/CMakeLists.txt b/src/coordination/CMakeLists.txt index ef9376a70..d39d3e738 100644 --- a/src/coordination/CMakeLists.txt +++ b/src/coordination/CMakeLists.txt @@ -6,7 +6,7 @@ target_sources(mg-coordination include/coordination/coordinator_state.hpp include/coordination/coordinator_rpc.hpp include/coordination/coordinator_server.hpp - include/coordination/coordinator_config.hpp + include/coordination/coordinator_communication_config.hpp include/coordination/coordinator_exceptions.hpp include/coordination/coordinator_slk.hpp include/coordination/coordinator_instance.hpp @@ -23,7 +23,7 @@ target_sources(mg-coordination include/nuraft/coordinator_state_manager.hpp PRIVATE - coordinator_config.cpp + coordinator_communication_config.cpp coordinator_client.cpp coordinator_state.cpp coordinator_rpc.cpp diff --git a/src/coordination/coordinator_client.cpp b/src/coordination/coordinator_client.cpp index 8530faff3..44817ccfe 100644 --- a/src/coordination/coordinator_client.cpp +++ b/src/coordination/coordinator_client.cpp @@ -14,7 +14,7 @@ #include "coordination/coordinator_client.hpp" -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_rpc.hpp" #include "replication_coordination_glue/common.hpp" #include "replication_coordination_glue/messages.hpp" @@ -23,18 +23,17 @@ namespace memgraph::coordination { namespace { -auto CreateClientContext(memgraph::coordination::CoordinatorClientConfig const &config) +auto CreateClientContext(memgraph::coordination::CoordinatorToReplicaConfig const &config) -> communication::ClientContext { return (config.ssl) ? communication::ClientContext{config.ssl->key_file, config.ssl->cert_file} : communication::ClientContext{}; } } // namespace -CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, +CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb) : rpc_context_{CreateClientContext(config)}, - rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), - &rpc_context_}, + rpc_client_{config.mgt_server, &rpc_context_}, config_{std::move(config)}, coord_instance_{coord_instance}, succ_cb_{std::move(succ_cb)}, @@ -86,7 +85,9 @@ void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); } void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); } void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); } -auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; } +auto CoordinatorClient::ReplicationClientInfo() const -> coordination::ReplicationClientInfo { + return config_.replication_client_info; +} auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid, ReplicationClientsInfo replication_clients_info) const -> bool { diff --git a/src/coordination/coordinator_cluster_state.cpp b/src/coordination/coordinator_cluster_state.cpp index cf6e1a574..00bbc1336 100644 --- a/src/coordination/coordinator_cluster_state.cpp +++ b/src/coordination/coordinator_cluster_state.cpp @@ -18,86 +18,88 @@ namespace memgraph::coordination { -void to_json(nlohmann::json &j, InstanceState const &instance_state) { +void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state) { j = nlohmann::json{{"config", instance_state.config}, {"status", instance_state.status}}; } -void from_json(nlohmann::json const &j, InstanceState &instance_state) { +void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state) { j.at("config").get_to(instance_state.config); j.at("status").get_to(instance_state.status); } -CoordinatorClusterState::CoordinatorClusterState(std::map> instances) - : instances_{std::move(instances)} {} +CoordinatorClusterState::CoordinatorClusterState(std::map> instances) + : repl_instances_{std::move(instances)} {} -CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) : instances_{other.instances_} {} +CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) + : repl_instances_{other.repl_instances_} {} CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState const &other) { if (this == &other) { return *this; } - instances_ = other.instances_; + repl_instances_ = other.repl_instances_; return *this; } CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState &&other) noexcept - : instances_{std::move(other.instances_)} {} + : repl_instances_{std::move(other.repl_instances_)} {} CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState &&other) noexcept { if (this == &other) { return *this; } - instances_ = std::move(other.instances_); + repl_instances_ = std::move(other.repl_instances_); return *this; } auto CoordinatorClusterState::MainExists() const -> bool { auto lock = std::shared_lock{log_lock_}; - return std::ranges::any_of(instances_, + return std::ranges::any_of(repl_instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); } auto CoordinatorClusterState::IsMain(std::string_view instance_name) const -> bool { auto lock = std::shared_lock{log_lock_}; - auto const it = instances_.find(instance_name); - return it != instances_.end() && it->second.status == ReplicationRole::MAIN; + auto const it = repl_instances_.find(instance_name); + return it != repl_instances_.end() && it->second.status == ReplicationRole::MAIN; } auto CoordinatorClusterState::IsReplica(std::string_view instance_name) const -> bool { auto lock = std::shared_lock{log_lock_}; - auto const it = instances_.find(instance_name); - return it != instances_.end() && it->second.status == ReplicationRole::REPLICA; + auto const it = repl_instances_.find(instance_name); + return it != repl_instances_.end() && it->second.status == ReplicationRole::REPLICA; } -auto CoordinatorClusterState::InsertInstance(std::string instance_name, InstanceState instance_state) -> void { +auto CoordinatorClusterState::InsertInstance(std::string instance_name, ReplicationInstanceState instance_state) + -> void { auto lock = std::lock_guard{log_lock_}; - instances_.insert_or_assign(std::move(instance_name), std::move(instance_state)); + repl_instances_.insert_or_assign(std::move(instance_name), std::move(instance_state)); } auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void { auto lock = std::lock_guard{log_lock_}; switch (log_action) { case RaftLogAction::REGISTER_REPLICATION_INSTANCE: { - auto const &config = std::get(log_entry); - instances_[config.instance_name] = InstanceState{config, ReplicationRole::REPLICA}; + auto const &config = std::get(log_entry); + repl_instances_[config.instance_name] = ReplicationInstanceState{config, ReplicationRole::REPLICA}; break; } case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: { auto const instance_name = std::get(log_entry); - instances_.erase(instance_name); + repl_instances_.erase(instance_name); break; } case RaftLogAction::SET_INSTANCE_AS_MAIN: { auto const instance_name = std::get(log_entry); - auto it = instances_.find(instance_name); - MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + auto it = repl_instances_.find(instance_name); + MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!"); it->second.status = ReplicationRole::MAIN; break; } case RaftLogAction::SET_INSTANCE_AS_REPLICA: { auto const instance_name = std::get(log_entry); - auto it = instances_.find(instance_name); - MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + auto it = repl_instances_.find(instance_name); + MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!"); it->second.status = ReplicationRole::REPLICA; break; } @@ -105,13 +107,18 @@ auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_act uuid_ = std::get(log_entry); break; } + case RaftLogAction::ADD_COORDINATOR_INSTANCE: { + auto const &config = std::get(log_entry); + coordinators_.emplace_back(CoordinatorInstanceState{config}); + break; + } } } auto CoordinatorClusterState::Serialize(ptr &data) -> void { auto lock = std::shared_lock{log_lock_}; - auto const log = nlohmann::json(instances_).dump(); + auto const log = nlohmann::json(repl_instances_).dump(); data = buffer::alloc(sizeof(uint32_t) + log.size()); buffer_serializer bs(data); @@ -121,27 +128,22 @@ auto CoordinatorClusterState::Serialize(ptr &data) -> void { auto CoordinatorClusterState::Deserialize(buffer &data) -> CoordinatorClusterState { buffer_serializer bs(data); auto const j = nlohmann::json::parse(bs.get_str()); - auto instances = j.get>>(); + auto instances = j.get>>(); return CoordinatorClusterState{std::move(instances)}; } -auto CoordinatorClusterState::GetInstances() const -> std::vector { +auto CoordinatorClusterState::GetReplicationInstances() const -> std::vector { auto lock = std::shared_lock{log_lock_}; - return instances_ | ranges::views::values | ranges::to>; + return repl_instances_ | ranges::views::values | ranges::to>; +} + +auto CoordinatorClusterState::GetCoordinatorInstances() const -> std::vector { + auto lock = std::shared_lock{log_lock_}; + return coordinators_; } auto CoordinatorClusterState::GetUUID() const -> utils::UUID { return uuid_; } -auto CoordinatorClusterState::FindCurrentMainInstanceName() const -> std::optional { - auto lock = std::shared_lock{log_lock_}; - auto const it = - std::ranges::find_if(instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); - if (it == instances_.end()) { - return {}; - } - return it->first; -} - } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_config.cpp b/src/coordination/coordinator_communication_config.cpp similarity index 57% rename from src/coordination/coordinator_config.cpp rename to src/coordination/coordinator_communication_config.cpp index a1147d3b6..31ed20fd0 100644 --- a/src/coordination/coordinator_config.cpp +++ b/src/coordination/coordinator_communication_config.cpp @@ -11,43 +11,53 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" namespace memgraph::coordination { -void to_json(nlohmann::json &j, ReplClientInfo const &config) { +void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config) { + j = nlohmann::json{{"coordinator_server_id", config.coordinator_server_id}, + {"coordinator_server", config.coordinator_server}, + {"bolt_server", config.bolt_server}}; +} + +void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config) { + config.coordinator_server_id = j.at("coordinator_server_id").get(); + config.coordinator_server = j.at("coordinator_server").get(); + config.bolt_server = j.at("bolt_server").get(); +} + +void to_json(nlohmann::json &j, ReplicationClientInfo const &config) { j = nlohmann::json{{"instance_name", config.instance_name}, {"replication_mode", config.replication_mode}, - {"replication_ip_address", config.replication_ip_address}, - {"replication_port", config.replication_port}}; + {"replication_server", config.replication_server}}; } -void from_json(nlohmann::json const &j, ReplClientInfo &config) { +void from_json(nlohmann::json const &j, ReplicationClientInfo &config) { config.instance_name = j.at("instance_name").get(); config.replication_mode = j.at("replication_mode").get(); - config.replication_ip_address = j.at("replication_ip_address").get(); - config.replication_port = j.at("replication_port").get(); + config.replication_server = j.at("replication_server").get(); } -void to_json(nlohmann::json &j, CoordinatorClientConfig const &config) { +void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config) { j = nlohmann::json{{"instance_name", config.instance_name}, - {"ip_address", config.ip_address}, - {"port", config.port}, + {"mgt_server", config.mgt_server}, + {"bolt_server", config.bolt_server}, {"instance_health_check_frequency_sec", config.instance_health_check_frequency_sec.count()}, {"instance_down_timeout_sec", config.instance_down_timeout_sec.count()}, {"instance_get_uuid_frequency_sec", config.instance_get_uuid_frequency_sec.count()}, {"replication_client_info", config.replication_client_info}}; } -void from_json(nlohmann::json const &j, CoordinatorClientConfig &config) { +void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config) { config.instance_name = j.at("instance_name").get(); - config.ip_address = j.at("ip_address").get(); - config.port = j.at("port").get(); + config.mgt_server = j.at("mgt_server").get(); + config.bolt_server = j.at("bolt_server").get(); config.instance_health_check_frequency_sec = std::chrono::seconds{j.at("instance_health_check_frequency_sec").get()}; config.instance_down_timeout_sec = std::chrono::seconds{j.at("instance_down_timeout_sec").get()}; config.instance_get_uuid_frequency_sec = std::chrono::seconds{j.at("instance_get_uuid_frequency_sec").get()}; - config.replication_client_info = j.at("replication_client_info").get(); + config.replication_client_info = j.at("replication_client_info").get(); } } // namespace memgraph::coordination diff --git a/src/coordination/coordinator_handlers.cpp b/src/coordination/coordinator_handlers.cpp index 637360267..e5b7a663f 100644 --- a/src/coordination/coordinator_handlers.cpp +++ b/src/coordination/coordinator_handlers.cpp @@ -95,8 +95,8 @@ void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHan slk::Load(&req, req_reader); const replication::ReplicationServerConfig clients_config{ - .ip_address = req.replication_client_info.replication_ip_address, - .port = req.replication_client_info.replication_port}; + .ip_address = req.replication_client_info.replication_server.address, + .port = req.replication_client_info.replication_server.port}; if (!replication_handler.SetReplicationRoleReplica(clients_config, std::nullopt)) { spdlog::error("Demoting main to replica failed!"); @@ -136,8 +136,8 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa return replication::ReplicationClientConfig{ .name = repl_info_config.instance_name, .mode = repl_info_config.replication_mode, - .ip_address = repl_info_config.replication_ip_address, - .port = repl_info_config.replication_port, + .ip_address = repl_info_config.replication_server.address, + .port = repl_info_config.replication_server.port, }; }; diff --git a/src/coordination/coordinator_instance.cpp b/src/coordination/coordinator_instance.cpp index 791ffbc59..2182e2405 100644 --- a/src/coordination/coordinator_instance.cpp +++ b/src/coordination/coordinator_instance.cpp @@ -14,7 +14,6 @@ #include "coordination/coordinator_instance.hpp" #include "coordination/coordinator_exceptions.hpp" -#include "coordination/fmt.hpp" #include "dbms/constants.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "nuraft/coordinator_state_manager.hpp" @@ -34,7 +33,7 @@ CoordinatorInstance::CoordinatorInstance() : raft_state_(RaftState::MakeRaftState( [this]() { spdlog::info("Leader changed, starting all replication instances!"); - auto const instances = raft_state_.GetInstances(); + auto const instances = raft_state_.GetReplicationInstances(); auto replicas = instances | ranges::views::filter([](auto const &instance) { return instance.status == ReplicationRole::REPLICA; }); @@ -133,7 +132,7 @@ auto CoordinatorInstance::ShowInstances() const -> std::vector { .health = "unknown"}; }; - std::ranges::transform(raft_state_.GetInstances(), std::back_inserter(instances_status), + std::ranges::transform(raft_state_.GetReplicationInstances(), std::back_inserter(instances_status), process_repl_instance_as_follower); } @@ -288,7 +287,7 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance return SetInstanceToMainCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig const &config) +auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus { auto lock = std::lock_guard{coord_instance_lock_}; @@ -382,9 +381,12 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instanc return UnregisterInstanceCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, - std::string_view raft_address) -> void { - raft_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); +auto CoordinatorInstance::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + raft_state_.AddCoordinatorInstance(config); + // NOTE: We ignore error we added coordinator instance to networkign stuff but not in raft log. + if (!raft_state_.AppendAddCoordinatorInstanceLog(config)) { + spdlog::error("Failed to append add coordinator instance log"); + } } void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name) { @@ -557,5 +559,56 @@ auto CoordinatorInstance::IsReplica(std::string_view instance_name) const -> boo return raft_state_.IsReplica(instance_name); } +auto CoordinatorInstance::GetRoutingTable(std::map const &routing) -> RoutingTable { + auto res = RoutingTable{}; + + auto const repl_instance_to_bolt = [](ReplicationInstanceState const &instance) { + return instance.config.BoltSocketAddress(); + }; + + // TODO: (andi) This is wrong check, Fico will correct in #1819. + auto const is_instance_main = [&](ReplicationInstanceState const &instance) { + return instance.status == ReplicationRole::MAIN; + }; + + auto const is_instance_replica = [&](ReplicationInstanceState const &instance) { + return instance.status == ReplicationRole::REPLICA; + }; + + auto const &raft_log_repl_instances = raft_state_.GetReplicationInstances(); + + auto bolt_mains = raft_log_repl_instances | ranges::views::filter(is_instance_main) | + ranges::views::transform(repl_instance_to_bolt) | ranges::to(); + MG_ASSERT(bolt_mains.size() <= 1, "There can be at most one main instance active!"); + + if (!std::ranges::empty(bolt_mains)) { + res.emplace_back(std::move(bolt_mains), "WRITE"); + } + + auto bolt_replicas = raft_log_repl_instances | ranges::views::filter(is_instance_replica) | + ranges::views::transform(repl_instance_to_bolt) | ranges::to(); + if (!std::ranges::empty(bolt_replicas)) { + res.emplace_back(std::move(bolt_replicas), "READ"); + } + + auto const coord_instance_to_bolt = [](CoordinatorInstanceState const &instance) { + return instance.config.bolt_server.SocketAddress(); + }; + + auto const &raft_log_coord_instances = raft_state_.GetCoordinatorInstances(); + auto bolt_coords = + raft_log_coord_instances | ranges::views::transform(coord_instance_to_bolt) | ranges::to(); + + auto const &local_bolt_coord = routing.find("address"); + if (local_bolt_coord == routing.end()) { + throw InvalidRoutingTableException("No bolt address found in routing table for the current coordinator!"); + } + + bolt_coords.push_back(local_bolt_coord->second); + res.emplace_back(std::move(bolt_coords), "ROUTE"); + + return res; +} + } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_server.cpp b/src/coordination/coordinator_server.cpp index 60dc5e348..327097830 100644 --- a/src/coordination/coordinator_server.cpp +++ b/src/coordination/coordinator_server.cpp @@ -18,8 +18,7 @@ namespace memgraph::coordination { namespace { -auto CreateServerContext(const memgraph::coordination::CoordinatorServerConfig &config) - -> communication::ServerContext { +auto CreateServerContext(const memgraph::coordination::ManagementServerConfig &config) -> communication::ServerContext { return (config.ssl) ? communication::ServerContext{config.ssl->key_file, config.ssl->cert_file, config.ssl->ca_file, config.ssl->verify_peer} : communication::ServerContext{}; @@ -32,7 +31,7 @@ constexpr auto kCoordinatorServerThreads = 1; } // namespace -CoordinatorServer::CoordinatorServer(const CoordinatorServerConfig &config) +CoordinatorServer::CoordinatorServer(const ManagementServerConfig &config) : rpc_server_context_{CreateServerContext(config)}, rpc_server_{io::network::Endpoint{config.ip_address, config.port}, &rpc_server_context_, kCoordinatorServerThreads} { diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index f429cd5a7..149a9cb97 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -13,7 +13,7 @@ #include "coordination/coordinator_state.hpp" -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/register_main_replica_coordinator_status.hpp" #include "flags/replication.hpp" #include "spdlog/spdlog.h" @@ -31,7 +31,7 @@ CoordinatorState::CoordinatorState() { spdlog::info("Executing coordinator constructor"); if (FLAGS_coordinator_server_port) { spdlog::info("Coordinator server port set"); - auto const config = CoordinatorServerConfig{ + auto const config = ManagementServerConfig{ .ip_address = kDefaultReplicationServerIp, .port = static_cast(FLAGS_coordinator_server_port), }; @@ -41,7 +41,7 @@ CoordinatorState::CoordinatorState() { } } -auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig const &config) +auto CoordinatorState::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus { MG_ASSERT(std::holds_alternative(data_), "Coordinator cannot register replica since variant holds wrong alternative"); @@ -98,11 +98,16 @@ auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & { return *std::get(data_).coordinator_server_; } -auto CoordinatorState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, - std::string_view raft_address) -> void { +auto CoordinatorState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { MG_ASSERT(std::holds_alternative(data_), "Coordinator cannot register replica since variant holds wrong alternative"); - return std::get(data_).AddCoordinatorInstance(raft_server_id, raft_port, raft_address); + return std::get(data_).AddCoordinatorInstance(config); +} + +auto CoordinatorState::GetRoutingTable(std::map const &routing) -> RoutingTable { + MG_ASSERT(std::holds_alternative(data_), + "Coordinator cannot get routing table since variant holds wrong alternative"); + return std::get(data_).GetRoutingTable(routing); } } // namespace memgraph::coordination diff --git a/src/coordination/coordinator_state_machine.cpp b/src/coordination/coordinator_state_machine.cpp index 631c3c4d2..789ac2e5e 100644 --- a/src/coordination/coordinator_state_machine.cpp +++ b/src/coordination/coordinator_state_machine.cpp @@ -20,10 +20,6 @@ constexpr int MAX_SNAPSHOTS = 3; namespace memgraph::coordination { -auto CoordinatorStateMachine::FindCurrentMainInstanceName() const -> std::optional { - return cluster_state_.FindCurrentMainInstanceName(); -} - auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); } auto CoordinatorStateMachine::IsMain(std::string_view instance_name) const -> bool { @@ -42,7 +38,7 @@ auto CoordinatorStateMachine::CreateLog(nlohmann::json &&log) -> ptr { return log_buf; } -auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr { +auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr { return CreateLog({{"action", RaftLogAction::REGISTER_REPLICATION_INSTANCE}, {"info", config}}); } @@ -62,6 +58,11 @@ auto CoordinatorStateMachine::SerializeUpdateUUID(utils::UUID const &uuid) -> pt return CreateLog({{"action", RaftLogAction::UPDATE_UUID}, {"info", uuid}}); } +auto CoordinatorStateMachine::SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) + -> ptr { + return CreateLog({{"action", RaftLogAction::ADD_COORDINATOR_INSTANCE}, {"info", config}}); +} + auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair { buffer_serializer bs(data); auto const json = nlohmann::json::parse(bs.get_str()); @@ -71,7 +72,7 @@ auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair(), action}; + return {info.get(), action}; case RaftLogAction::UPDATE_UUID: return {info.get(), action}; case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: @@ -79,6 +80,8 @@ auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair(), action}; + case RaftLogAction::ADD_COORDINATOR_INSTANCE: + return {info.get(), action}; } throw std::runtime_error("Unknown action"); } @@ -133,6 +136,7 @@ auto CoordinatorStateMachine::read_logical_snp_obj(snapshot &snapshot, void *& / } else { // Object ID > 0: second object, put actual value. ctx->cluster_state_.Serialize(data_out); + is_last_obj = true; } return 0; @@ -155,6 +159,7 @@ auto CoordinatorStateMachine::save_logical_snp_obj(snapshot &snapshot, ulong &ob DMG_ASSERT(entry != snapshots_.end()); entry->second->cluster_state_ = cluster_state; } + obj_id++; } auto CoordinatorStateMachine::apply_snapshot(snapshot &s) -> bool { @@ -205,8 +210,12 @@ auto CoordinatorStateMachine::create_snapshot_internal(ptr snapshot) - } } -auto CoordinatorStateMachine::GetInstances() const -> std::vector { - return cluster_state_.GetInstances(); +auto CoordinatorStateMachine::GetReplicationInstances() const -> std::vector { + return cluster_state_.GetReplicationInstances(); +} + +auto CoordinatorStateMachine::GetCoordinatorInstances() const -> std::vector { + return cluster_state_.GetCoordinatorInstances(); } auto CoordinatorStateMachine::GetUUID() const -> utils::UUID { return cluster_state_.GetUUID(); } diff --git a/src/coordination/coordinator_state_manager.cpp b/src/coordination/coordinator_state_manager.cpp index b2fb81ea1..db49b1f21 100644 --- a/src/coordination/coordinator_state_manager.cpp +++ b/src/coordination/coordinator_state_manager.cpp @@ -33,6 +33,7 @@ CoordinatorStateManager::CoordinatorStateManager(int srv_id, std::string const & auto CoordinatorStateManager::load_config() -> ptr { // Just return in-memory data in this example. // May require reading from disk here, if it has been written to disk. + spdlog::trace("Loading cluster config"); return cluster_config_; } @@ -41,6 +42,11 @@ auto CoordinatorStateManager::save_config(cluster_config const &config) -> void // Need to write to disk here, if want to make it durable. ptr buf = config.serialize(); cluster_config_ = cluster_config::deserialize(*buf); + spdlog::info("Saving cluster config."); + auto servers = cluster_config_->get_servers(); + for (auto const &server : servers) { + spdlog::trace("Server id: {}, endpoint: {}", server->get_id(), server->get_endpoint()); + } } auto CoordinatorStateManager::save_state(srv_state const &state) -> void { diff --git a/src/coordination/include/coordination/coordinator_client.hpp b/src/coordination/include/coordination/coordinator_client.hpp index 5d4795f81..875efaa45 100644 --- a/src/coordination/include/coordination/coordinator_client.hpp +++ b/src/coordination/include/coordination/coordinator_client.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "rpc/client.hpp" #include "rpc_errors.hpp" @@ -25,11 +25,11 @@ namespace memgraph::coordination { class CoordinatorInstance; using HealthCheckClientCallback = std::function; -using ReplicationClientsInfo = std::vector; +using ReplicationClientsInfo = std::vector; class CoordinatorClient { public: - explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, + explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb); ~CoordinatorClient() = default; @@ -62,7 +62,7 @@ class CoordinatorClient { auto SendGetInstanceUUIDRpc() const -> memgraph::utils::BasicResult>; - auto ReplicationClientInfo() const -> ReplClientInfo; + auto ReplicationClientInfo() const -> ReplicationClientInfo; auto SendGetInstanceTimestampsRpc() const -> utils::BasicResult; @@ -83,7 +83,7 @@ class CoordinatorClient { communication::ClientContext rpc_context_; mutable rpc::Client rpc_client_; - CoordinatorClientConfig config_; + CoordinatorToReplicaConfig config_; CoordinatorInstance *coord_instance_; HealthCheckClientCallback succ_cb_; HealthCheckClientCallback fail_cb_; diff --git a/src/coordination/include/coordination/coordinator_config.hpp b/src/coordination/include/coordination/coordinator_communication_config.hpp similarity index 51% rename from src/coordination/include/coordination/coordinator_config.hpp rename to src/coordination/include/coordination/coordinator_communication_config.hpp index 127a365eb..4f11b188f 100644 --- a/src/coordination/include/coordination/coordinator_config.hpp +++ b/src/coordination/include/coordination/coordinator_communication_config.hpp @@ -13,6 +13,7 @@ #ifdef MG_ENTERPRISE +#include "io/network/endpoint.hpp" #include "replication_coordination_glue/mode.hpp" #include "utils/string.hpp" @@ -28,46 +29,50 @@ namespace memgraph::coordination { inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0"; -struct CoordinatorClientConfig { - std::string instance_name; - std::string ip_address; - uint16_t port{}; +struct ReplicationClientInfo { + std::string instance_name{}; + replication_coordination_glue::ReplicationMode replication_mode{}; + io::network::Endpoint replication_server; + + friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default; +}; + +struct CoordinatorToReplicaConfig { + auto BoltSocketAddress() const -> std::string { return bolt_server.SocketAddress(); } + auto CoordinatorSocketAddress() const -> std::string { return mgt_server.SocketAddress(); } + auto ReplicationSocketAddress() const -> std::string { + return replication_client_info.replication_server.SocketAddress(); + } + + std::string instance_name{}; + io::network::Endpoint mgt_server; + io::network::Endpoint bolt_server; + ReplicationClientInfo replication_client_info; + std::chrono::seconds instance_health_check_frequency_sec{1}; std::chrono::seconds instance_down_timeout_sec{5}; std::chrono::seconds instance_get_uuid_frequency_sec{10}; - auto CoordinatorSocketAddress() const -> std::string { return fmt::format("{}:{}", ip_address, port); } - auto ReplicationSocketAddress() const -> std::string { - return fmt::format("{}:{}", replication_client_info.replication_ip_address, - replication_client_info.replication_port); - } - - struct ReplicationClientInfo { - std::string instance_name; - replication_coordination_glue::ReplicationMode replication_mode{}; - std::string replication_ip_address; - uint16_t replication_port{}; - - friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default; - }; - - ReplicationClientInfo replication_client_info; - struct SSL { std::string key_file; std::string cert_file; - friend bool operator==(const SSL &, const SSL &) = default; }; std::optional ssl; - friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default; + friend bool operator==(CoordinatorToReplicaConfig const &, CoordinatorToReplicaConfig const &) = default; }; -using ReplClientInfo = CoordinatorClientConfig::ReplicationClientInfo; +struct CoordinatorToCoordinatorConfig { + uint32_t coordinator_server_id{0}; + io::network::Endpoint bolt_server; + io::network::Endpoint coordinator_server; -struct CoordinatorServerConfig { + friend bool operator==(CoordinatorToCoordinatorConfig const &, CoordinatorToCoordinatorConfig const &) = default; +}; + +struct ManagementServerConfig { std::string ip_address; uint16_t port{}; struct SSL { @@ -80,14 +85,17 @@ struct CoordinatorServerConfig { std::optional ssl; - friend bool operator==(CoordinatorServerConfig const &, CoordinatorServerConfig const &) = default; + friend bool operator==(ManagementServerConfig const &, ManagementServerConfig const &) = default; }; -void to_json(nlohmann::json &j, CoordinatorClientConfig const &config); -void from_json(nlohmann::json const &j, CoordinatorClientConfig &config); +void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config); +void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config); -void to_json(nlohmann::json &j, ReplClientInfo const &config); -void from_json(nlohmann::json const &j, ReplClientInfo &config); +void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config); +void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config); + +void to_json(nlohmann::json &j, ReplicationClientInfo const &config); +void from_json(nlohmann::json const &j, ReplicationClientInfo &config); } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_exceptions.hpp b/src/coordination/include/coordination/coordinator_exceptions.hpp index 7a967f80b..6cff2e8c1 100644 --- a/src/coordination/include/coordination/coordinator_exceptions.hpp +++ b/src/coordination/include/coordination/coordinator_exceptions.hpp @@ -94,5 +94,16 @@ class InvalidRaftLogActionException final : public utils::BasicException { SPECIALIZE_GET_EXCEPTION_NAME(InvalidRaftLogActionException) }; +class InvalidRoutingTableException final : public utils::BasicException { + public: + explicit InvalidRoutingTableException(std::string_view what) noexcept : BasicException(what) {} + + template + explicit InvalidRoutingTableException(fmt::format_string fmt, Args &&...args) noexcept + : InvalidRoutingTableException(fmt::format(fmt, std::forward(args)...)) {} + + SPECIALIZE_GET_EXCEPTION_NAME(InvalidRoutingTableException) +}; + } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_instance.hpp b/src/coordination/include/coordination/coordinator_instance.hpp index 10549f468..a778d1238 100644 --- a/src/coordination/include/coordination/coordinator_instance.hpp +++ b/src/coordination/include/coordination/coordinator_instance.hpp @@ -26,6 +26,8 @@ namespace memgraph::coordination { +using RoutingTable = std::vector, std::string>>; + struct NewMainRes { std::string most_up_to_date_instance; std::string latest_epoch; @@ -36,8 +38,14 @@ using InstanceNameDbHistories = std::pair RegisterInstanceCoordinatorStatus; [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) -> UnregisterInstanceCoordinatorStatus; @@ -48,15 +56,15 @@ class CoordinatorInstance { auto TryFailover() -> void; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; + + auto GetRoutingTable(std::map const &routing) -> RoutingTable; static auto ChooseMostUpToDateInstance(std::span histories) -> NewMainRes; private: HealthCheckClientCallback client_succ_cb_, client_fail_cb_; - auto OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void; - auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &; void MainFailCallback(std::string_view); @@ -71,7 +79,6 @@ class CoordinatorInstance { auto IsReplica(std::string_view instance_name) const -> bool; // NOTE: Must be std::list because we rely on pointer stability. - // Leader and followers should both have same view on repl_instances_ std::list repl_instances_; mutable utils::ResourceLock coord_instance_lock_{}; diff --git a/src/coordination/include/coordination/coordinator_rpc.hpp b/src/coordination/include/coordination/coordinator_rpc.hpp index d799b2955..b0b466859 100644 --- a/src/coordination/include/coordination/coordinator_rpc.hpp +++ b/src/coordination/include/coordination/coordinator_rpc.hpp @@ -14,7 +14,7 @@ #include "utils/uuid.hpp" #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "rpc/messages.hpp" #include "slk/serialization.hpp" @@ -28,14 +28,13 @@ struct PromoteReplicaToMainReq { static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader); static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder); - explicit PromoteReplicaToMainReq(const utils::UUID &uuid, - std::vector replication_clients_info) + explicit PromoteReplicaToMainReq(const utils::UUID &uuid, std::vector replication_clients_info) : main_uuid_(uuid), replication_clients_info(std::move(replication_clients_info)) {} PromoteReplicaToMainReq() = default; // get uuid here utils::UUID main_uuid_; - std::vector replication_clients_info; + std::vector replication_clients_info; }; struct PromoteReplicaToMainRes { @@ -60,12 +59,12 @@ struct DemoteMainToReplicaReq { static void Load(DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader); static void Save(const DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder); - explicit DemoteMainToReplicaReq(CoordinatorClientConfig::ReplicationClientInfo replication_client_info) + explicit DemoteMainToReplicaReq(ReplicationClientInfo replication_client_info) : replication_client_info(std::move(replication_client_info)) {} DemoteMainToReplicaReq() = default; - CoordinatorClientConfig::ReplicationClientInfo replication_client_info; + ReplicationClientInfo replication_client_info; }; struct DemoteMainToReplicaRes { diff --git a/src/coordination/include/coordination/coordinator_server.hpp b/src/coordination/include/coordination/coordinator_server.hpp index 2a261bc32..52a0befc5 100644 --- a/src/coordination/include/coordination/coordinator_server.hpp +++ b/src/coordination/include/coordination/coordinator_server.hpp @@ -13,14 +13,14 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "rpc/server.hpp" namespace memgraph::coordination { class CoordinatorServer { public: - explicit CoordinatorServer(const CoordinatorServerConfig &config); + explicit CoordinatorServer(const ManagementServerConfig &config); CoordinatorServer(const CoordinatorServer &) = delete; CoordinatorServer(CoordinatorServer &&) = delete; CoordinatorServer &operator=(const CoordinatorServer &) = delete; diff --git a/src/coordination/include/coordination/coordinator_slk.hpp b/src/coordination/include/coordination/coordinator_slk.hpp index ee393b7b6..3d809da26 100644 --- a/src/coordination/include/coordination/coordinator_slk.hpp +++ b/src/coordination/include/coordination/coordinator_slk.hpp @@ -13,27 +13,37 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "slk/serialization.hpp" #include "slk/streams.hpp" namespace memgraph::slk { -using ReplicationClientInfo = coordination::CoordinatorClientConfig::ReplicationClientInfo; +using ReplicationClientInfo = coordination::ReplicationClientInfo; -inline void Save(const ReplicationClientInfo &obj, Builder *builder) { +inline void Save(io::network::Endpoint const &obj, Builder *builder) { + Save(obj.address, builder); + Save(obj.port, builder); + Save(obj.family, builder); +} + +inline void Load(io::network::Endpoint *obj, Reader *reader) { + Load(&obj->address, reader); + Load(&obj->port, reader); + Load(&obj->family, reader); +} + +inline void Save(ReplicationClientInfo const &obj, Builder *builder) { Save(obj.instance_name, builder); Save(obj.replication_mode, builder); - Save(obj.replication_ip_address, builder); - Save(obj.replication_port, builder); + Save(obj.replication_server, builder); } inline void Load(ReplicationClientInfo *obj, Reader *reader) { Load(&obj->instance_name, reader); Load(&obj->replication_mode, reader); - Load(&obj->replication_ip_address, reader); - Load(&obj->replication_port, reader); + Load(&obj->replication_server, reader); } inline void Save(const replication_coordination_glue::DatabaseHistory &obj, Builder *builder) { diff --git a/src/coordination/include/coordination/coordinator_state.hpp b/src/coordination/include/coordination/coordinator_state.hpp index 400c36940..f2a88e9b8 100644 --- a/src/coordination/include/coordination/coordinator_state.hpp +++ b/src/coordination/include/coordination/coordinator_state.hpp @@ -33,7 +33,7 @@ class CoordinatorState { CoordinatorState(CoordinatorState &&) noexcept = delete; CoordinatorState &operator=(CoordinatorState &&) noexcept = delete; - [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config) + [[nodiscard]] auto RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus; [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) -> UnregisterInstanceCoordinatorStatus; @@ -42,11 +42,13 @@ class CoordinatorState { auto ShowInstances() const -> std::vector; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; // NOTE: The client code must check that the server exists before calling this method. auto GetCoordinatorServer() const -> CoordinatorServer &; + auto GetRoutingTable(std::map const &routing) -> RoutingTable; + private: struct CoordinatorMainReplicaData { std::unique_ptr coordinator_server_; diff --git a/src/coordination/include/coordination/raft_state.hpp b/src/coordination/include/coordination/raft_state.hpp index 34da3e2a6..6e322ab78 100644 --- a/src/coordination/include/coordination/raft_state.hpp +++ b/src/coordination/include/coordination/raft_state.hpp @@ -23,7 +23,7 @@ namespace memgraph::coordination { class CoordinatorInstance; -struct CoordinatorClientConfig; +struct CoordinatorToReplicaConfig; using BecomeLeaderCb = std::function; using BecomeFollowerCb = std::function; @@ -58,24 +58,27 @@ class RaftState { auto InstanceName() const -> std::string; auto RaftSocketAddress() const -> std::string; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; auto GetAllCoordinators() const -> std::vector>; auto RequestLeadership() -> bool; auto IsLeader() const -> bool; - auto FindCurrentMainInstanceName() const -> std::optional; auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; - auto AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool; + auto AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool; auto AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool; auto AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool; auto AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool; auto AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool; + auto AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool; + + auto GetReplicationInstances() const -> std::vector; + // TODO: (andi) Do we need then GetAllCoordinators? + auto GetCoordinatorInstances() const -> std::vector; - auto GetInstances() const -> std::vector; auto GetUUID() const -> utils::UUID; private: diff --git a/src/coordination/include/coordination/replication_instance.hpp b/src/coordination/include/coordination/replication_instance.hpp index 7b5d73b81..1e6c042c5 100644 --- a/src/coordination/include/coordination/replication_instance.hpp +++ b/src/coordination/include/coordination/replication_instance.hpp @@ -32,7 +32,7 @@ using HealthCheckInstanceCallback = void (CoordinatorInstance::*)(std::string_vi class ReplicationInstance { public: - ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, HealthCheckClientCallback succ_cb, + ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb, HealthCheckInstanceCallback fail_instance_cb); @@ -67,7 +67,7 @@ class ReplicationInstance { auto PauseFrequentCheck() -> void; auto ResumeFrequentCheck() -> void; - auto ReplicationClientInfo() const -> ReplClientInfo; + auto ReplicationClientInfo() const -> ReplicationClientInfo; auto EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool; diff --git a/src/coordination/include/nuraft/coordinator_cluster_state.hpp b/src/coordination/include/nuraft/coordinator_cluster_state.hpp index 11d539a14..5d9afe89e 100644 --- a/src/coordination/include/nuraft/coordinator_cluster_state.hpp +++ b/src/coordination/include/nuraft/coordinator_cluster_state.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "nuraft/raft_log_action.hpp" #include "replication_coordination_glue/role.hpp" #include "utils/resource_lock.hpp" @@ -32,19 +32,29 @@ namespace memgraph::coordination { using replication_coordination_glue::ReplicationRole; -struct InstanceState { - CoordinatorClientConfig config; +struct ReplicationInstanceState { + CoordinatorToReplicaConfig config; ReplicationRole status; - friend auto operator==(InstanceState const &lhs, InstanceState const &rhs) -> bool { + friend auto operator==(ReplicationInstanceState const &lhs, ReplicationInstanceState const &rhs) -> bool { return lhs.config == rhs.config && lhs.status == rhs.status; } }; -void to_json(nlohmann::json &j, InstanceState const &instance_state); -void from_json(nlohmann::json const &j, InstanceState &instance_state); +// NOTE: Currently instance of coordinator doesn't change from the registration. Hence, just wrap +// CoordinatorToCoordinatorConfig. +struct CoordinatorInstanceState { + CoordinatorToCoordinatorConfig config; -using TRaftLog = std::variant; + friend auto operator==(CoordinatorInstanceState const &lhs, CoordinatorInstanceState const &rhs) -> bool { + return lhs.config == rhs.config; + } +}; + +void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state); +void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state); + +using TRaftLog = std::variant; using nuraft::buffer; using nuraft::buffer_serializer; @@ -53,7 +63,7 @@ using nuraft::ptr; class CoordinatorClusterState { public: CoordinatorClusterState() = default; - explicit CoordinatorClusterState(std::map> instances); + explicit CoordinatorClusterState(std::map> instances); CoordinatorClusterState(CoordinatorClusterState const &); CoordinatorClusterState &operator=(CoordinatorClusterState const &); @@ -62,15 +72,13 @@ class CoordinatorClusterState { CoordinatorClusterState &operator=(CoordinatorClusterState &&other) noexcept; ~CoordinatorClusterState() = default; - auto FindCurrentMainInstanceName() const -> std::optional; - auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; - auto InsertInstance(std::string instance_name, InstanceState instance_state) -> void; + auto InsertInstance(std::string instance_name, ReplicationInstanceState instance_state) -> void; auto DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void; @@ -78,12 +86,15 @@ class CoordinatorClusterState { static auto Deserialize(buffer &data) -> CoordinatorClusterState; - auto GetInstances() const -> std::vector; + auto GetReplicationInstances() const -> std::vector; + + auto GetCoordinatorInstances() const -> std::vector; auto GetUUID() const -> utils::UUID; private: - std::map> instances_{}; + std::vector coordinators_{}; + std::map> repl_instances_{}; utils::UUID uuid_{}; mutable utils::ResourceLock log_lock_{}; }; diff --git a/src/coordination/include/nuraft/coordinator_state_machine.hpp b/src/coordination/include/nuraft/coordinator_state_machine.hpp index 836ac17a6..6340cf604 100644 --- a/src/coordination/include/nuraft/coordinator_state_machine.hpp +++ b/src/coordination/include/nuraft/coordinator_state_machine.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "nuraft/coordinator_cluster_state.hpp" #include "nuraft/raft_log_action.hpp" @@ -42,17 +42,18 @@ class CoordinatorStateMachine : public state_machine { CoordinatorStateMachine &operator=(CoordinatorStateMachine &&) = delete; ~CoordinatorStateMachine() override {} - auto FindCurrentMainInstanceName() const -> std::optional; + // TODO: (andi) Check API of this class. auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; static auto CreateLog(nlohmann::json &&log) -> ptr; - static auto SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr; + static auto SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr; static auto SerializeUnregisterInstance(std::string_view instance_name) -> ptr; static auto SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr; static auto SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr; static auto SerializeUpdateUUID(utils::UUID const &uuid) -> ptr; + static auto SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> ptr; static auto DecodeLog(buffer &data) -> std::pair; @@ -80,7 +81,10 @@ class CoordinatorStateMachine : public state_machine { auto create_snapshot(snapshot &s, async_result::handler_type &when_done) -> void override; - auto GetInstances() const -> std::vector; + auto GetReplicationInstances() const -> std::vector; + + auto GetCoordinatorInstances() const -> std::vector; + auto GetUUID() const -> utils::UUID; private: diff --git a/src/coordination/include/nuraft/raft_log_action.hpp b/src/coordination/include/nuraft/raft_log_action.hpp index 3f1b26dfa..b9cdd233a 100644 --- a/src/coordination/include/nuraft/raft_log_action.hpp +++ b/src/coordination/include/nuraft/raft_log_action.hpp @@ -27,16 +27,16 @@ enum class RaftLogAction : uint8_t { UNREGISTER_REPLICATION_INSTANCE, SET_INSTANCE_AS_MAIN, SET_INSTANCE_AS_REPLICA, - UPDATE_UUID + UPDATE_UUID, + ADD_COORDINATOR_INSTANCE }; -NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, { - {RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"}, - {RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"}, - {RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"}, - {RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"}, - {RaftLogAction::UPDATE_UUID, "update_uuid"}, - }) +NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, {{RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"}, + {RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"}, + {RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"}, + {RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"}, + {RaftLogAction::UPDATE_UUID, "update_uuid"}, + {RaftLogAction::ADD_COORDINATOR_INSTANCE, "add_coordinator_instance"}}) } // namespace memgraph::coordination #endif diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index fd93160b6..6175fda4b 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -13,7 +13,7 @@ #include #include -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_exceptions.hpp" #include "coordination/raft_state.hpp" #include "utils/counter.hpp" @@ -113,10 +113,9 @@ auto RaftState::InstanceName() const -> std::string { auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } -auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) - -> void { - auto const endpoint = fmt::format("{}:{}", raft_address, raft_port); - srv_config const srv_config_to_add(static_cast(raft_server_id), endpoint); +auto RaftState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + auto const endpoint = config.coordinator_server.SocketAddress(); + srv_config const srv_config_to_add(static_cast(config.coordinator_server_id), endpoint); auto cmd_result = raft_server_->add_srv(srv_config_to_add); @@ -134,9 +133,9 @@ auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_po bool added{false}; while (!maybe_stop()) { std::this_thread::sleep_for(std::chrono::milliseconds(waiting_period)); - const auto server_config = raft_server_->get_srv_config(static_cast(raft_server_id)); + const auto server_config = raft_server_->get_srv_config(static_cast(config.coordinator_server_id)); if (server_config) { - spdlog::trace("Server with id {} added to cluster", raft_server_id); + spdlog::trace("Server with id {} added to cluster", config.coordinator_server_id); added = true; break; } @@ -158,7 +157,7 @@ auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); } auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); } -auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool { +auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool { auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config); auto const res = raft_server_->append_entries({new_log}); @@ -261,8 +260,26 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { return true; } -auto RaftState::FindCurrentMainInstanceName() const -> std::optional { - return state_machine_->FindCurrentMainInstanceName(); +auto RaftState::AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool { + auto new_log = CoordinatorStateMachine::SerializeAddCoordinatorInstance(config); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for adding coordinator instance {}. Most likely the reason is that the instance is " + "not the leader.", + config.coordinator_server_id); + return false; + } + + spdlog::info("Request for adding coordinator instance {} accepted", config.coordinator_server_id); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to add coordinator instance {} with error code {}", config.coordinator_server_id, + static_cast(res->get_result_code())); + return false; + } + + return true; } auto RaftState::MainExists() const -> bool { return state_machine_->MainExists(); } @@ -273,7 +290,13 @@ auto RaftState::IsReplica(std::string_view instance_name) const -> bool { return state_machine_->IsReplica(instance_name); } -auto RaftState::GetInstances() const -> std::vector { return state_machine_->GetInstances(); } +auto RaftState::GetReplicationInstances() const -> std::vector { + return state_machine_->GetReplicationInstances(); +} + +auto RaftState::GetCoordinatorInstances() const -> std::vector { + return state_machine_->GetCoordinatorInstances(); +} auto RaftState::GetUUID() const -> utils::UUID { return state_machine_->GetUUID(); } diff --git a/src/coordination/replication_instance.cpp b/src/coordination/replication_instance.cpp index ca7572ea7..34d889775 100644 --- a/src/coordination/replication_instance.cpp +++ b/src/coordination/replication_instance.cpp @@ -20,7 +20,7 @@ namespace memgraph::coordination { -ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, +ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb, HealthCheckInstanceCallback fail_instance_cb) @@ -82,7 +82,7 @@ auto ReplicationInstance::StopFrequentCheck() -> void { client_.StopFrequentChec auto ReplicationInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); } auto ReplicationInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); } -auto ReplicationInstance::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo { +auto ReplicationInstance::ReplicationClientInfo() const -> coordination::ReplicationClientInfo { return client_.ReplicationClientInfo(); } diff --git a/src/dbms/coordinator_handler.cpp b/src/dbms/coordinator_handler.cpp index 292d50d3d..1f64892bc 100644 --- a/src/dbms/coordinator_handler.cpp +++ b/src/dbms/coordinator_handler.cpp @@ -20,7 +20,7 @@ namespace memgraph::dbms { CoordinatorHandler::CoordinatorHandler(coordination::CoordinatorState &coordinator_state) : coordinator_state_(coordinator_state) {} -auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) +auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus { return coordinator_state_.RegisterReplicationInstance(config); } @@ -39,9 +39,8 @@ auto CoordinatorHandler::ShowInstances() const -> std::vector void { - coordinator_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); +auto CoordinatorHandler::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + coordinator_state_.AddCoordinatorInstance(config); } } // namespace memgraph::dbms diff --git a/src/dbms/coordinator_handler.hpp b/src/dbms/coordinator_handler.hpp index 1c456134d..f3640736a 100644 --- a/src/dbms/coordinator_handler.hpp +++ b/src/dbms/coordinator_handler.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_state.hpp" #include "coordination/instance_status.hpp" #include "coordination/register_main_replica_coordinator_status.hpp" @@ -30,7 +30,7 @@ class CoordinatorHandler { // TODO: (andi) When moving coordinator state on same instances, rename from RegisterReplicationInstance to // RegisterInstance - auto RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) + auto RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus; auto UnregisterReplicationInstance(std::string_view instance_name) @@ -40,7 +40,7 @@ class CoordinatorHandler { auto ShowInstances() const -> std::vector; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; private: coordination::CoordinatorState &coordinator_state_; diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index 6a48f15ca..51a444a30 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -249,6 +249,40 @@ std::pair, std::optional> SessionHL::Interpret( } } +using memgraph::communication::bolt::Value; + +#ifdef MG_ENTERPRISE +auto SessionHL::Route(std::map const &routing, + std::vector const & /*bookmarks*/, + std::map const & /*extra*/) -> std::map { + auto routing_map = ranges::views::transform( + routing, [](auto const &pair) { return std::pair(pair.first, pair.second.ValueString()); }) | + ranges::to>(); + + auto routing_table_res = interpreter_.Route(routing_map); + + auto create_server = [](auto const &server_info) -> Value { + auto const &[addresses, role] = server_info; + std::map server_map; + auto bolt_addresses = ranges::views::transform(addresses, [](auto const &addr) { return Value{addr}; }) | + ranges::to>(); + + server_map["addresses"] = std::move(bolt_addresses); + server_map["role"] = memgraph::communication::bolt::Value{role}; + return Value{std::move(server_map)}; + }; + + std::map communication_res; + communication_res["ttl"] = Value{routing_table_res.ttl}; + communication_res["db"] = Value{}; + + auto servers = ranges::views::transform(routing_table_res.servers, create_server) | ranges::to>(); + communication_res["servers"] = memgraph::communication::bolt::Value{std::move(servers)}; + + return {{"rt", memgraph::communication::bolt::Value{std::move(communication_res)}}}; +} +#endif + void SessionHL::RollbackTransaction() { try { interpreter_.RollbackTransaction(); diff --git a/src/glue/SessionHL.hpp b/src/glue/SessionHL.hpp index cf0280fcc..9360f96b2 100644 --- a/src/glue/SessionHL.hpp +++ b/src/glue/SessionHL.hpp @@ -55,6 +55,13 @@ class SessionHL final : public memgraph::communication::bolt::Session ¶ms, const std::map &extra) override; +#ifdef MG_ENTERPRISE + auto Route(std::map const &routing, + std::vector const &bookmarks, + std::map const &extra) + -> std::map override; +#endif + std::map Pull(TEncoder *encoder, std::optional n, std::optional qid) override; diff --git a/src/io/network/endpoint.cpp b/src/io/network/endpoint.cpp index 6ed4a6753..c996055ff 100644 --- a/src/io/network/endpoint.cpp +++ b/src/io/network/endpoint.cpp @@ -82,8 +82,7 @@ bool Endpoint::IsResolvableAddress(std::string_view address, uint16_t port) { return status == 0; } -std::optional Endpoint::ParseSocketOrAddress(std::string_view address, - std::optional default_port) { +std::optional Endpoint::ParseSocketOrAddress(std::string_view address, std::optional default_port) { auto const parts = utils::SplitView(address, delimiter); if (parts.size() > 2) { @@ -109,13 +108,13 @@ std::optional Endpoint::ParseSocketOrAddress(std::string_view add }(); if (GetIpFamily(addr) == IpFamily::NONE) { - if (IsResolvableAddress(addr, *port)) { // NOLINT - return std::pair{addr, *port}; // NOLINT + if (IsResolvableAddress(addr, *port)) { // NOLINT + return Endpoint{std::string(addr), *port}; // NOLINT } return std::nullopt; } - return std::pair{addr, *port}; // NOLINT + return Endpoint{std::string(addr), *port}; // NOLINT } auto Endpoint::ValidatePort(std::optional port) -> bool { @@ -138,4 +137,14 @@ auto Endpoint::ValidatePort(std::optional port) -> bool { return true; } +void to_json(nlohmann::json &j, Endpoint const &config) { + j = nlohmann::json{{"address", config.address}, {"port", config.port}, {"family", config.family}}; +} + +void from_json(nlohmann::json const &j, Endpoint &config) { + config.address = j.at("address").get(); + config.port = j.at("port").get(); + config.family = j.at("family").get(); +} + } // namespace memgraph::io::network diff --git a/src/io/network/endpoint.hpp b/src/io/network/endpoint.hpp index f46d28ace..c47c736ee 100644 --- a/src/io/network/endpoint.hpp +++ b/src/io/network/endpoint.hpp @@ -17,9 +17,9 @@ #include #include -namespace memgraph::io::network { +#include "json/json.hpp" -using ParsedAddress = std::pair; +namespace memgraph::io::network { struct Endpoint { static const struct needs_resolving_t { @@ -39,8 +39,8 @@ struct Endpoint { enum class IpFamily : std::uint8_t { NONE, IP4, IP6 }; - static std::optional ParseSocketOrAddress(std::string_view address, - std::optional default_port = {}); + static std::optional ParseSocketOrAddress(std::string_view address, + std::optional default_port = {}); std::string SocketAddress() const; @@ -59,4 +59,7 @@ struct Endpoint { static auto ValidatePort(std::optional port) -> bool; }; +void to_json(nlohmann::json &j, Endpoint const &config); +void from_json(nlohmann::json const &j, Endpoint &config); + } // namespace memgraph::io::network diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 332054485..2fba0addb 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -328,15 +328,14 @@ class ReplQueryHandler { const auto repl_mode = convertToReplicationMode(sync_mode); - const auto maybe_ip_and_port = + auto maybe_endpoint = io::network::Endpoint::ParseSocketOrAddress(socket_address, memgraph::replication::kDefaultReplicationPort); - if (maybe_ip_and_port) { - const auto [ip, port] = *maybe_ip_and_port; + if (maybe_endpoint) { const auto replication_config = replication::ReplicationClientConfig{.name = name, .mode = repl_mode, - .ip_address = std::string(ip), - .port = port, + .ip_address = std::move(maybe_endpoint->address), + .port = maybe_endpoint->port, .replica_check_frequency = replica_check_frequency, .ssl = std::nullopt}; @@ -413,39 +412,41 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - void RegisterReplicationInstance(std::string_view coordinator_socket_address, - std::string_view replication_socket_address, + void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server, + std::string_view replication_server, std::chrono::seconds const &instance_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, std::string_view instance_name, CoordinatorQuery::SyncMode sync_mode) override { - const auto maybe_replication_ip_port = io::network::Endpoint::ParseSocketOrAddress(replication_socket_address); - if (!maybe_replication_ip_port) { + auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server); + if (!maybe_bolt_server) { + throw QueryRuntimeException("Invalid bolt socket address!"); + } + + auto const maybe_management_server = io::network::Endpoint::ParseSocketOrAddress(management_server); + if (!maybe_management_server) { + throw QueryRuntimeException("Invalid management socket address!"); + } + + auto const maybe_replication_server = io::network::Endpoint::ParseSocketOrAddress(replication_server); + if (!maybe_replication_server) { throw QueryRuntimeException("Invalid replication socket address!"); } - const auto maybe_coordinator_ip_port = io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address); - if (!maybe_replication_ip_port) { - throw QueryRuntimeException("Invalid replication socket address!"); - } - - const auto [replication_ip, replication_port] = *maybe_replication_ip_port; - const auto [coordinator_server_ip, coordinator_server_port] = *maybe_coordinator_ip_port; - const auto repl_config = coordination::CoordinatorClientConfig::ReplicationClientInfo{ - .instance_name = std::string(instance_name), - .replication_mode = convertFromCoordinatorToReplicationMode(sync_mode), - .replication_ip_address = std::string(replication_ip), - .replication_port = replication_port}; + auto const repl_config = + coordination::ReplicationClientInfo{.instance_name = std::string(instance_name), + .replication_mode = convertFromCoordinatorToReplicationMode(sync_mode), + .replication_server = *maybe_replication_server}; auto coordinator_client_config = - coordination::CoordinatorClientConfig{.instance_name = std::string(instance_name), - .ip_address = std::string(coordinator_server_ip), - .port = coordinator_server_port, - .instance_health_check_frequency_sec = instance_check_frequency, - .instance_down_timeout_sec = instance_down_timeout, - .instance_get_uuid_frequency_sec = instance_get_uuid_frequency, - .replication_client_info = repl_config, - .ssl = std::nullopt}; + coordination::CoordinatorToReplicaConfig{.instance_name = std::string(instance_name), + .mgt_server = *maybe_management_server, + .bolt_server = *maybe_bolt_server, + .replication_client_info = repl_config, + .instance_health_check_frequency_sec = instance_check_frequency, + .instance_down_timeout_sec = instance_down_timeout, + .instance_get_uuid_frequency_sec = instance_get_uuid_frequency, + .ssl = std::nullopt}; auto status = coordinator_handler_.RegisterReplicationInstance(coordinator_client_config); switch (status) { @@ -473,15 +474,25 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view raft_socket_address) -> void override { - auto const maybe_ip_and_port = io::network::Endpoint::ParseSocketOrAddress(raft_socket_address); - if (maybe_ip_and_port) { - auto const [ip, port] = *maybe_ip_and_port; - spdlog::info("Adding instance {} with raft socket address {}:{}.", raft_server_id, ip, port); - coordinator_handler_.AddCoordinatorInstance(raft_server_id, port, ip); - } else { - spdlog::error("Invalid raft socket address {}.", raft_socket_address); + auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + std::string_view coordinator_server) -> void override { + auto const maybe_coordinator_server = io::network::Endpoint::ParseSocketOrAddress(coordinator_server); + if (!maybe_coordinator_server) { + throw QueryRuntimeException("Invalid coordinator socket address!"); } + + auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server); + if (!maybe_bolt_server) { + throw QueryRuntimeException("Invalid bolt socket address!"); + } + + auto const coord_coord_config = + coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = raft_server_id, + .bolt_server = *maybe_bolt_server, + .coordinator_server = *maybe_coordinator_server}; + + coordinator_handler_.AddCoordinatorInstance(coord_coord_config); + spdlog::info("Added instance on coordinator server {}", maybe_coordinator_server->SocketAddress()); } void SetReplicationInstanceToMain(std::string_view instance_name) override { @@ -1197,8 +1208,9 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param auto coord_server_id = coordinator_query->coordinator_server_id_->Accept(evaluator).ValueInt(); callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coord_server_id, + bolt_server = bolt_server_it->second, coordinator_server = coordinator_server_it->second]() mutable { - handler.AddCoordinatorInstance(coord_server_id, coordinator_server); + handler.AddCoordinatorInstance(coord_server_id, bolt_server, coordinator_server); return std::vector>(); }; @@ -1243,15 +1255,15 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param callback.fn = [handler = CoordQueryHandler{*coordinator_state}, instance_health_check_frequency_sec = config.instance_health_check_frequency_sec, - management_server = management_server_it->second, - replication_server = replication_server_it->second, bolt_server = bolt_server_it->second, + bolt_server = bolt_server_it->second, management_server = management_server_it->second, + replication_server = replication_server_it->second, instance_name = coordinator_query->instance_name_, instance_down_timeout_sec = config.instance_down_timeout_sec, instance_get_uuid_frequency_sec = config.instance_get_uuid_frequency_sec, sync_mode = coordinator_query->sync_mode_]() mutable { - handler.RegisterReplicationInstance(management_server, replication_server, instance_health_check_frequency_sec, - instance_down_timeout_sec, instance_get_uuid_frequency_sec, instance_name, - sync_mode); + handler.RegisterReplicationInstance(bolt_server, management_server, replication_server, + instance_health_check_frequency_sec, instance_down_timeout_sec, + instance_get_uuid_frequency_sec, instance_name, sync_mode); return std::vector>(); }; @@ -4266,6 +4278,28 @@ void Interpreter::RollbackTransaction() { ResetInterpreter(); } +#ifdef MG_ENTERPRISE +auto Interpreter::Route(std::map const &routing) -> RouteResult { + // TODO: (andi) Test + if (!FLAGS_raft_server_id) { + auto const &address = routing.find("address"); + if (address == routing.end()) { + throw QueryException("Routing table must contain address field."); + } + + auto result = RouteResult{}; + if (interpreter_context_->repl_state->IsMain()) { + result.servers.emplace_back(std::vector{address->second}, "WRITE"); + } else { + result.servers.emplace_back(std::vector{address->second}, "READ"); + } + return result; + } + + return RouteResult{.servers = interpreter_context_->coordinator_state_->GetRoutingTable(routing)}; +} +#endif + #if MG_ENTERPRISE // Before Prepare or during Prepare, but single-threaded. // TODO: Is there any cleanup? diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 5366b4472..5d10a24de 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -143,8 +143,8 @@ class CoordinatorQueryHandler { }; /// @throw QueryRuntimeException if an error ocurred. - virtual void RegisterReplicationInstance(std::string_view coordinator_socket_address, - std::string_view replication_socket_address, + virtual void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server, + std::string_view replication_server, std::chrono::seconds const &instance_health_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, @@ -160,7 +160,8 @@ class CoordinatorQueryHandler { virtual std::vector ShowInstances() const = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view coordinator_socket_address) -> void = 0; + virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + std::string_view coordinator_server) -> void = 0; }; #endif @@ -247,6 +248,14 @@ class Interpreter final { std::optional db; }; +#ifdef MG_ENTERPRISE + struct RouteResult { + int ttl{300}; + std::string db{}; // Currently not used since we don't have any specific replication groups etc. + coordination::RoutingTable servers{}; + }; +#endif + std::shared_ptr user_or_role_{}; bool in_explicit_transaction_{false}; CurrentDB current_db_; @@ -272,6 +281,10 @@ class Interpreter final { const std::map ¶ms, QueryExtras const &extras); +#ifdef MG_ENTERPRISE + auto Route(std::map const &routing) -> RouteResult; +#endif + /** * Execute the last prepared query and stream *all* of the results into the * given stream. diff --git a/tests/drivers/go/v5/docs_quick_start.go b/tests/drivers/go/v5/docs_quick_start.go index 69805acc1..5788ed703 100644 --- a/tests/drivers/go/v5/docs_quick_start.go +++ b/tests/drivers/go/v5/docs_quick_start.go @@ -13,12 +13,13 @@ func handle_if_error(err error) { } func main() { - dbUri := "bolt://localhost:7687" - driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", "")) - if err != nil { - log.Fatal("An error occurred opening conn: %s", err) - } - defer driver.Close() + fmt.Println("Started running docs_quick_start.go test") + dbUri := "bolt://localhost:7687" + driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", "")) + if err != nil { + log.Fatal("An error occurred opening conn: %s", err) + } + defer driver.Close() session := driver.NewSession(neo4j.SessionConfig{}) defer session.Close() @@ -33,7 +34,7 @@ func main() { _,err = session.WriteTransaction(testAll) handle_if_error(err) - fmt.Println("All ok!") + fmt.Println("doc_quick_start.go test finished successfully.") } func clearDatabase(tx neo4j.Transaction) (interface{}, error) { @@ -75,15 +76,14 @@ func testAll(tx neo4j.Transaction) (interface{}, error) { handle_if_error(err) age, err := neo4j.GetProperty[int64](node_value, "age") handle_if_error(err) - + if label != "Person" && name != "Alice" && age != 22 { return nil, fmt.Errorf("Data doesn't match.") } - + fmt.Println("Label", label) fmt.Println("name", name) fmt.Println("age", age) return result.Consume() } - diff --git a/tests/drivers/go/v5/go.mod b/tests/drivers/go/v5/go.mod index a44baf405..f05f98dc6 100644 --- a/tests/drivers/go/v5/go.mod +++ b/tests/drivers/go/v5/go.mod @@ -3,6 +3,6 @@ module bolt-test go 1.18 require ( - github.com/neo4j/neo4j-go-driver/v5 v5.13.0 // indirect + github.com/neo4j/neo4j-go-driver/v5 v5.18.0 // indirect golang.org/dl v0.0.0-20230502172222-5216546bad51 // indirect ) diff --git a/tests/drivers/go/v5/go.sum b/tests/drivers/go/v5/go.sum index dc85aef95..1c956d94a 100644 --- a/tests/drivers/go/v5/go.sum +++ b/tests/drivers/go/v5/go.sum @@ -8,5 +8,7 @@ github.com/neo4j/neo4j-go-driver/v5 v5.9.0 h1:TYxT0RSiwnvVFia90V7TLnRXv8HkdQQ6rT github.com/neo4j/neo4j-go-driver/v5 v5.9.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= github.com/neo4j/neo4j-go-driver/v5 v5.13.0 h1:NmyUxh4LYTdcJdI6EnazHyUKu1f0/BPiHCYUZUZIGQw= github.com/neo4j/neo4j-go-driver/v5 v5.13.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= +github.com/neo4j/neo4j-go-driver/v5 v5.18.0 h1:3dmYsCYt/Fc/bPeSyGRGGfn/T6h06/OmHm72OFQKa3c= +github.com/neo4j/neo4j-go-driver/v5 v5.18.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= golang.org/dl v0.0.0-20230502172222-5216546bad51 h1:Bmo/kmR2hzyhGt3jjtl1ghkCqa5LINbB9D3QTkiLJIY= golang.org/dl v0.0.0-20230502172222-5216546bad51/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ= diff --git a/tests/drivers/go/v5/read_routing.go b/tests/drivers/go/v5/read_routing.go new file mode 100644 index 000000000..e8c2ffba2 --- /dev/null +++ b/tests/drivers/go/v5/read_routing.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func read_messages(uri string) { + username := "" + password := "" + + // Connect to Memgraph + driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + if err != nil { + panic(err) + } + defer driver.Close() + + // Use AccessModeRead for read transactions + session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead}) + defer session.Close() + + greeting, err := session.ReadTransaction(func(transaction neo4j.Transaction) (interface{}, error) { + result, err := transaction.Run("MATCH (n:Greeting) RETURN n.message AS message LIMIT 1", nil) + if err != nil { + return nil, err + } + + if result.Next() { + return result.Record().Values[0], nil + } + + return nil, result.Err() + }) + + if err != nil { + panic(err) + } + + fmt.Println(greeting) + +} + +// Test checks that you can use bolt+routing for connecting to main and coordinators for reading. +func main() { + fmt.Println("Started running read_route.go test") + read_messages("neo4j://localhost:7690") // coordinator_1 + read_messages("neo4j://localhost:7691") // coordinator_2 + read_messages("neo4j://localhost:7692") // coordinator_3 + fmt.Println("Successfully finished running coordinator_route.go test") +} diff --git a/tests/drivers/go/v5/run.sh b/tests/drivers/go/v5/run.sh index cbe31bd26..344495f15 100755 --- a/tests/drivers/go/v5/run.sh +++ b/tests/drivers/go/v5/run.sh @@ -18,4 +18,3 @@ done go get github.com/neo4j/neo4j-go-driver/v5 go run docs_quick_start.go -# go run parallel_edge_import.go diff --git a/tests/drivers/go/v5/run_cluster_tests.sh b/tests/drivers/go/v5/run_cluster_tests.sh new file mode 100755 index 000000000..9ccd7b0c0 --- /dev/null +++ b/tests/drivers/go/v5/run_cluster_tests.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e + +GO_VERSION="1.18.9" +GO_VERSION_DIR="/opt/go$GO_VERSION" +if [ -f "$GO_VERSION_DIR/go/bin/go" ]; then + export GOROOT="$GO_VERSION_DIR/go" + export GOPATH="$HOME/go$GO_VERSION" + export PATH="$GO_VERSION_DIR/go/bin:$PATH" +fi + +# check if go is installed +for i in go; do + if ! which $i >/dev/null; then + echo "Please install $i!" + exit 1 + fi +done + +go get github.com/neo4j/neo4j-go-driver/v5 +go run write_routing.go +go run read_routing.go diff --git a/tests/drivers/go/v5/write_routing.go b/tests/drivers/go/v5/write_routing.go new file mode 100644 index 000000000..f77dd29ca --- /dev/null +++ b/tests/drivers/go/v5/write_routing.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func create_message(uri string) { + username := "" + password := "" + + // Connect to Memgraph + driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + if err != nil { + panic(err) + } + defer driver.Close() + + session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeWrite}) + defer session.Close() + + greeting, err := session.WriteTransaction(func(transaction neo4j.Transaction) (interface{}, error) { + result, err := transaction.Run("CREATE (n:Greeting) SET n.message = $message RETURN n.message", map[string]interface{}{ + "message": "Hello, World!", + }) + if err != nil { + return nil, err + } + + if result.Next() { + return result.Record().Values[0], nil + } + + return nil, result.Err() + }) + + if err != nil { + panic(err) + } + + fmt.Println(greeting) +} + +// Test checks that you can use bolt+routing for connecting to main and coordinators for writing. +func main() { + fmt.Println("Started running main_route.go test") + create_message("neo4j://localhost:7690") // coordinator_1 + create_message("neo4j://localhost:7691") // coordinator_2 + create_message("neo4j://localhost:7692") // coordinator_3 + fmt.Println("Successfully finished running main_route.go test") +} diff --git a/tests/drivers/java/v5_8/pom.xml b/tests/drivers/java/v5_8/pom.xml index 6db6a6ded..6db821683 100644 --- a/tests/drivers/java/v5_8/pom.xml +++ b/tests/drivers/java/v5_8/pom.xml @@ -104,6 +104,45 @@ single + + build-e + + + + memgraph.WriteRouting + + + + jar-with-dependencies + + false + WriteRouting + + package + + single + + + + build-f + + + + memgraph.ReadRouting + + + + jar-with-dependencies + + false + ReadRouting + + package + + single + + + diff --git a/tests/drivers/java/v5_8/run.sh b/tests/drivers/java/v5_8/run.sh index 03400e385..cb3ebb2ca 100755 --- a/tests/drivers/java/v5_8/run.sh +++ b/tests/drivers/java/v5_8/run.sh @@ -36,4 +36,3 @@ mvn clean package java -jar target/DocsHowToQuery.jar java -jar target/MaxQueryLength.jar java -jar target/Transactions.jar -# java -jar target/ParallelEdgeImport.jar diff --git a/tests/drivers/java/v5_8/run_cluster_tests.sh b/tests/drivers/java/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..0b01d5de4 --- /dev/null +++ b/tests/drivers/java/v5_8/run_cluster_tests.sh @@ -0,0 +1,37 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +if [ -d "/usr/lib/jvm/java-17-oracle" ]; then + export JAVA_HOME="/usr/lib/jvm/java-17-oracle" +fi +if [ -d "/usr/lib/jvm/java-17-openjdk-amd64" ]; then + export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" +fi +if [ -d "/opt/apache-maven-3.9.3" ]; then + export M2_HOME="/opt/apache-maven-3.9.3" +fi +export PATH="$JAVA_HOME/bin:$M2_HOME/bin:$PATH" + +for i in java mvn; do + if ! which $i >/dev/null; then + echo "Please install $i!" + exit 1 + fi +done + +JAVA_VER=$(java -version 2>&1 >/dev/null | grep 'version' | cut -d "\"" -f2 | cut -d "." -f1) +if [ $JAVA_VER -ne 17 ] +then + echo "neo4j-java-driver v5.8 requires Java 17. Please install it!" + exit 1 +fi + +# CentOS 7 doesn't have Java version that supports var keyword +source ../../../../environment/util.sh + +mvn clean package + +java -jar target/WriteRouting.jar +java -jar target/ReadRouting.jar diff --git a/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java b/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java new file mode 100644 index 000000000..b8654a890 --- /dev/null +++ b/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java @@ -0,0 +1,35 @@ +package memgraph; + +import static org.neo4j.driver.Values.parameters; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import org.neo4j.driver.AuthTokens; +import org.neo4j.driver.Driver; +import org.neo4j.driver.GraphDatabase; +import org.neo4j.driver.Session; +import org.neo4j.driver.Transaction; + +public class ReadRouting { + private Driver driver; + + private void readMessage(String uri) { + driver = GraphDatabase.driver(uri, AuthTokens.basic("", "")); + try (Session session = driver.session()) { + String greeting = session.readTransaction(tx -> { + var result = tx.run("MATCH (n:Greeting) RETURN n.message AS message"); + System.out.println("Read txn passed!"); + return "OK"; + }); + } + } + + public static void main(String... args) { + System.out.println("Started running ReadRoutingTest..."); + ReadRouting greeter = new ReadRouting(); + greeter.readMessage("neo4j://localhost:7690"); // coordinator_1 + greeter.readMessage("neo4j://localhost:7691"); // coordinator_2 + greeter.readMessage("neo4j://localhost:7692"); // coordinator_3 + System.out.println("All good!"); + } +} diff --git a/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java b/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java new file mode 100644 index 000000000..df3948558 --- /dev/null +++ b/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java @@ -0,0 +1,44 @@ +package memgraph; + +import static org.neo4j.driver.Values.parameters; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import org.neo4j.driver.AuthTokens; +import org.neo4j.driver.Config; +import org.neo4j.driver.Driver; +import org.neo4j.driver.GraphDatabase; +import org.neo4j.driver.Result; +import org.neo4j.driver.Session; +import org.neo4j.driver.Transaction; +import org.neo4j.driver.TransactionWork; +import org.neo4j.driver.exceptions.ClientException; +import org.neo4j.driver.exceptions.TransientException; + +public class WriteRouting { + private Driver driver; + + private void createMessage(String uri) { + driver = GraphDatabase.driver(uri, AuthTokens.basic("", "")); + try (Session session = driver.session()) { + String greeting = session.writeTransaction(tx -> { + var result = tx.run("CREATE (n:Greeting) SET n.message = $message RETURN n.message", + parameters("message", "Hello, World!")); + if (result.hasNext()) { + return result.single().get(0).asString(); + } + throw new RuntimeException("No result found."); + }); + System.out.println(greeting); + } + } + + public static void main(String... args) { + System.out.println("Started running WriteRoutingTest..."); + WriteRouting greeter = new WriteRouting(); + greeter.createMessage("neo4j://localhost:7690"); // coordinator_1 + greeter.createMessage("neo4j://localhost:7691"); // coordinator_2 + greeter.createMessage("neo4j://localhost:7692"); // coordinator_3 + System.out.println("All good!"); + } +} diff --git a/tests/drivers/node/v5_8/read_routing.js b/tests/drivers/node/v5_8/read_routing.js new file mode 100644 index 000000000..905b184d3 --- /dev/null +++ b/tests/drivers/node/v5_8/read_routing.js @@ -0,0 +1,59 @@ +const neo4j = require('neo4j-driver'); + +function die() { + session.close(); + driver.close(); + process.exit(1); +} + +function Neo4jService(uri) { + const driver = neo4j.driver(uri, neo4j.auth.basic("", "")); + + async function readGreeting() { + const session = driver.session({ defaultAccessMode: neo4j.session.READ }); + try { + const result = await session.readTransaction(tx => + tx.run('MATCH (n:Greeting) RETURN n.message AS message') + ); + console.log("Read txn finished"); + } finally { + await session.close(); + } + } + + async function close() { + await driver.close(); + } + + return { + readGreeting, + close + }; +} + +async function readGreetingsFromUri(uri) { + const service = Neo4jService(uri); + await service.readGreeting(); + await service.close(); +} + +async function main() { + console.log("Started reading route"); + const uris = [ + 'neo4j://localhost:7690', + 'neo4j://localhost:7691', + 'neo4j://localhost:7692' + ]; + + try { + for (const uri of uris) { + await readGreetingsFromUri(uri); + } + } catch (error) { + console.error('An error occurred:', error); + die(); + } + console.log("Finished reading route"); +} + +main().catch(error => console.error(error)); diff --git a/tests/drivers/node/v5_8/run.sh b/tests/drivers/node/v5_8/run.sh index 276fdbb2b..a24c5110c 100755 --- a/tests/drivers/node/v5_8/run.sh +++ b/tests/drivers/node/v5_8/run.sh @@ -15,4 +15,3 @@ fi node docs_how_to_query.js node max_query_length.js -# node parallel_edge_import.js diff --git a/tests/drivers/node/v5_8/run_cluster_tests.sh b/tests/drivers/node/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..3f4fee5ff --- /dev/null +++ b/tests/drivers/node/v5_8/run_cluster_tests.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +if ! which node >/dev/null; then + echo "Please install nodejs!" + exit 1 +fi + +if [ ! -d node_modules ]; then + # Driver generated with: `npm install neo4j-driver` + npm install --no-package-lock --no-save neo4j-driver@5.8.0 +fi + +node write_routing.js +node read_routing.js diff --git a/tests/drivers/node/v5_8/write_routing.js b/tests/drivers/node/v5_8/write_routing.js new file mode 100644 index 000000000..fdb4b74d9 --- /dev/null +++ b/tests/drivers/node/v5_8/write_routing.js @@ -0,0 +1,59 @@ +const neo4j = require('neo4j-driver'); + +function die() { + session.close(); + driver.close(); + process.exit(1); +} + +function Neo4jService(uri) { + const driver = neo4j.driver(uri, neo4j.auth.basic("", "")); + + async function createGreeting() { + const session = driver.session({ defaultAccessMode: neo4j.session.WRITE }); + try { + const result = await session.writeTransaction(tx => + tx.run('CREATE (n:Greeting {message: "Hello NodeJs"}) RETURN n.message AS message') + ); + console.log("Write txn finished"); + } finally { + await session.close(); + } + } + + async function close() { + await driver.close(); + } + + return { + createGreeting, + close + }; +} + +async function createGreetingsFromUri(uri) { + const service = Neo4jService(uri); + await service.createGreeting(); + await service.close(); +} + +async function main() { + console.log("Started writing route"); + const uris = [ + 'neo4j://localhost:7690', + 'neo4j://localhost:7691', + 'neo4j://localhost:7692' + ]; + + try { + for (const uri of uris) { + await createGreetingsFromUri(uri); + } + } catch (error) { + console.error('An error occurred:', error); + die(); + } + console.log("Finished writing route"); +} + +main().catch(error => console.error(error)); diff --git a/tests/drivers/python/v5_8/read_routing.py b/tests/drivers/python/v5_8/read_routing.py new file mode 100644 index 000000000..b08982aa3 --- /dev/null +++ b/tests/drivers/python/v5_8/read_routing.py @@ -0,0 +1,41 @@ +from neo4j import GraphDatabase + + +class Neo4jService: + def __init__(self, uri, user="", password=""): + self.driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self.driver.close() + + def read_greeting(self): + with self.driver.session() as session: + session.execute_read(self._create_and_return_greeting) + print("Read txn passed!") + + @staticmethod + def _create_and_return_greeting(tx): + tx.run("MATCH (n:Greeting) RETURN n.message AS message") + + +def read_greetings_from_uri(uri): + service = Neo4jService(uri) + service.read_greeting() + service.close() + + +def main(): + print("Started reading route") + uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"] + + try: + for uri in uris: + read_greetings_from_uri(uri) + except Exception as error: + print(f"An error occurred: {error}") + exit(-1) + print("Finished reading route") + + +if __name__ == "__main__": + main() diff --git a/tests/drivers/python/v5_8/run_cluster_tests.sh b/tests/drivers/python/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..f22c1a8da --- /dev/null +++ b/tests/drivers/python/v5_8/run_cluster_tests.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +# system check +if ! which virtualenv >/dev/null; then + echo "Please install virtualenv!" + exit 1 +fi + +# setup virtual environment +if [ ! -d "ve3" ]; then + virtualenv -p python3 ve3 || exit 1 + source ve3/bin/activate + python3 -m pip install neo4j==5.8.0 || exit 1 + deactivate +fi + +# activate virtualenv +source ve3/bin/activate + +# execute test +python3 write_routing.py || exit 1 +python3 read_routing.py || exit 1 diff --git a/tests/drivers/python/v5_8/write_routing.py b/tests/drivers/python/v5_8/write_routing.py new file mode 100644 index 000000000..427d6e6f2 --- /dev/null +++ b/tests/drivers/python/v5_8/write_routing.py @@ -0,0 +1,41 @@ +from neo4j import GraphDatabase + + +class Neo4jService: + def __init__(self, uri, user="", password=""): + self.driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self.driver.close() + + def create_greeting(self): + with self.driver.session() as session: + session.execute_write(self._create_and_return_greeting) + print("Write txn passed!") + + @staticmethod + def _create_and_return_greeting(tx): + tx.run("CREATE (n:Greeting {message: 'Hello from Python'}) RETURN n.message AS message") + + +def create_greetings_from_uri(uri): + service = Neo4jService(uri) + service.create_greeting() + service.close() + + +def main(): + print("Started writing route") + uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"] + + try: + for uri in uris: + create_greetings_from_uri(uri) + except Exception as error: + print(f"An error occurred: {error}") + exit(-1) + print("Finished writing route") + + +if __name__ == "__main__": + main() diff --git a/tests/drivers/run_cluster.sh b/tests/drivers/run_cluster.sh new file mode 100755 index 000000000..b5f75f2ef --- /dev/null +++ b/tests/drivers/run_cluster.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +pushd () { command pushd "$@" > /dev/null; } +popd () { command popd "$@" > /dev/null; } + +function wait_for_server { + port=$1 + while ! nc -z -w 1 127.0.0.1 $port; do + sleep 0.1 + done + sleep 1 +} + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +# create a temporary directory. +tmpdir=/tmp/memgraph_drivers +if [ -d $tmpdir ]; then + rm -rf $tmpdir +fi + +mkdir -p $tmpdir + +# find memgraph binaries. +binary_dir="$DIR/../../build" + +# Start instance_1 +$binary_dir/memgraph \ + --bolt-port=7687 \ + --data-directory=$tmpdir/instance_1/ \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance1.log \ + --also-log-to-stderr \ + --coordinator-server-port=10011 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_1=$! +wait_for_server 7687 + +# Start instance_2 +$binary_dir/memgraph \ + --bolt-port=7688 \ + --data-directory=$tmpdir/instance_2 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance2.log \ + --also-log-to-stderr \ + --coordinator-server-port=10012 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_2=$! +wait_for_server 7688 + +# Start instance_3 +$binary_dir/memgraph \ + --bolt-port=7689 \ + --data-directory=$tmpdir/instance_3 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance3.log \ + --also-log-to-stderr \ + --coordinator-server-port=10013 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_3=$! +wait_for_server 7689 + + +# Start coordinator_1 +$binary_dir/memgraph \ + --bolt-port=7690 \ + --data-directory=$tmpdir/coordinator_1 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator1.log \ + --also-log-to-stderr \ + --raft-server-id=1 \ + --raft-server-port=10111 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_1=$! +wait_for_server 7690 + +# Start coordinator_2 +$binary_dir/memgraph \ + --bolt-port=7691 \ + --data-directory=$tmpdir/coordinator_2 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator2.log \ + --also-log-to-stderr \ + --raft-server-id=2 \ + --raft-server-port=10112 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_2=$! +wait_for_server 7691 + +# Start coordinator_3 +$binary_dir/memgraph \ + --bolt-port=7692 \ + --data-directory=$tmpdir/coordinator_3 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator3.log \ + --also-log-to-stderr \ + --raft-server-id=3 \ + --raft-server-port=10113 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_3=$! +wait_for_server 7692 + +sleep 5 + +echo 'ADD COORDINATOR 2 WITH CONFIG {"bolt_server": "127.0.0.1:7691", "coordinator_server": "127.0.0.1:10112"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'ADD COORDINATOR 3 WITH CONFIG {"bolt_server": "127.0.0.1:7692", "coordinator_server": "127.0.0.1:10113"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "127.0.0.1:7687", "management_server": "127.0.0.1:10011", "replication_server": "127.0.0.1:10001"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": "127.0.0.1:7688", "management_server": "127.0.0.1:10012", "replication_server": "127.0.0.1:10002"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_3 WITH CONFIG {"bolt_server": "127.0.0.1:7689", "management_server": "127.0.0.1:10013", "replication_server": "127.0.0.1:10003"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'SET INSTANCE instance_1 TO MAIN;' | $binary_dir/bin/mgconsole --port 7690 + + +code_test=0 +for lang in *; do + if [ ! -d $lang ]; then continue; fi + pushd $lang + echo "Running tests for language: $lang" + for version in *; do + if [ ! -d $version ]; then continue; fi + pushd $version + if [ -f "run_cluster_tests.sh" ]; then + echo "Running version: $version" + ./run_cluster_tests.sh + code_test=$? + if [ $code_test -ne 0 ]; then + echo "FAILED: $lang-$version" + break + fi + fi + popd + done; + popd +done + + +# Function to stop a process by PID and check its exit code +stop_process() { + local pid=$1 # Capture the PID from the first argument + + # Stop the process + kill $pid + wait $pid + local exit_code=$? # Capture the exit code + + # Check the process's exit code + if [ $exit_code -ne 0 ]; then + echo "The process with PID $pid didn't terminate properly!" + exit $exit_code + else + echo "Process with PID $pid terminated successfully." + fi +} + +echo "Stopping coordinator1" +stop_process $pid_coordinator_1 +echo "Stopping coordinator2" +stop_process $pid_coordinator_2 +echo "Stopping coordinator3" +stop_process $pid_coordinator_3 + +echo "Stopping instance1" +stop_process $pid_instance_1 +echo "Stopping instance2" +stop_process $pid_instance_2 +echo "Stopping instance3" +stop_process $pid_instance_3 + + +# Check test exit code. +if [ $code_test -ne 0 ]; then + echo "One of the tests failed!" + exit $code_test +fi + +# Temporary directory cleanup. +if [ -d $tmpdir ]; then + rm -rf $tmpdir +fi diff --git a/tests/e2e/high_availability/common.py b/tests/e2e/high_availability/common.py index 2157b29ca..adfabd87a 100644 --- a/tests/e2e/high_availability/common.py +++ b/tests/e2e/high_availability/common.py @@ -30,14 +30,3 @@ def safe_execute(function, *args): function(*args) except: pass - - -# NOTE: Repeated execution because it can fail if Raft server is not up -def add_coordinator(cursor, query): - for _ in range(10): - try: - execute_and_fetch_all(cursor, query) - return True - except Exception: - pass - return False diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 13aaf27fe..89279b23d 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -16,7 +16,7 @@ import tempfile import interactive_mg_runner import pytest -from common import add_coordinator, connect, execute_and_fetch_all, safe_execute +from common import connect, execute_and_fetch_all, safe_execute from mg_utils import mg_sleep_and_assert interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -110,134 +110,134 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { } -def test_register_repl_instances_then_coordinators(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - - -def test_register_coordinator_then_repl_instances(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# def test_register_repl_instances_then_coordinators(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_coord3 = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# +# +# def test_register_coordinator_then_repl_instances(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_coord3 = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) def test_coordinators_communication_with_restarts(): @@ -246,11 +246,11 @@ def test_coordinators_communication_with_restarts(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", ) - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) @@ -310,284 +310,284 @@ def test_coordinators_communication_with_restarts(): # # TODO: (andi) Test when dealing with distributed coordinators that you can register on one coordinator and unregister from any other coordinator -@pytest.mark.parametrize( - "kill_instance", - [True, False], -) -def test_unregister_replicas(kill_instance): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - main_cursor = connect(host="localhost", port=7689).cursor() - - def check_main(): - return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - expected_replicas = [ - ( - "instance_1", - "127.0.0.1:10001", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - if kill_instance: - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - expected_replicas = [ - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - if kill_instance: - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_3", "", "", "unknown", "main"), - ] - expected_replicas = [] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - -def test_unregister_main(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - - try: - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") - except Exception as e: - assert ( - str(e) - == "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!" - ) - - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "main"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "main"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "main"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "main"), - ("instance_2", "", "", "unknown", "replica"), - ] - - expected_replicas = [ - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - main_cursor = connect(host="localhost", port=7687).cursor() - - def check_main(): - return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) +# @pytest.mark.parametrize( +# "kill_instance", +# [True, False], +# ) +# def test_unregister_replicas(kill_instance): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# main_cursor = connect(host="localhost", port=7689).cursor() +# +# def check_main(): +# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# expected_replicas = [ +# ( +# "instance_1", +# "127.0.0.1:10001", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# if kill_instance: +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# expected_replicas = [ +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# if kill_instance: +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# expected_replicas = [] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# +# def test_unregister_main(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# +# try: +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") +# except Exception as e: +# assert ( +# str(e) +# == "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!" +# ) +# +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "main"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "main"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "main"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "main"), +# ("instance_2", "", "", "unknown", "replica"), +# ] +# +# expected_replicas = [ +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# main_cursor = connect(host="localhost", port=7687).cursor() +# +# def check_main(): +# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) if __name__ == "__main__": diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index 517bf346f..e61eb4eb8 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -16,7 +16,7 @@ import tempfile import interactive_mg_runner import pytest -from common import add_coordinator, connect, execute_and_fetch_all, safe_execute +from common import connect, execute_and_fetch_all, safe_execute from mg_utils import mg_sleep_and_assert interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -137,11 +137,11 @@ def test_writing_disabled_on_main_restart(): "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", ) - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) diff --git a/tests/e2e/replication/common.hpp b/tests/e2e/replication/common.hpp index 1938eb0f3..e2ec43978 100644 --- a/tests/e2e/replication/common.hpp +++ b/tests/e2e/replication/common.hpp @@ -37,10 +37,9 @@ auto ParseDatabaseEndpoints(const std::string &database_endpoints_str) { const auto db_endpoints_strs = memgraph::utils::SplitView(database_endpoints_str, ","); std::vector database_endpoints; for (const auto &db_endpoint_str : db_endpoints_strs) { - const auto maybe_host_port = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687); - MG_ASSERT(maybe_host_port); - auto const [ip, port] = *maybe_host_port; - database_endpoints.emplace_back(std::string(ip), port); + auto maybe_endpoint = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687); + MG_ASSERT(maybe_endpoint); + database_endpoints.emplace_back(std::move(*maybe_endpoint)); } return database_endpoints; } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 44b24b6f6..008211af3 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -446,9 +446,16 @@ target_link_libraries(${test_prefix}raft_log_serialization gflags mg-coordinatio target_include_directories(${test_prefix}raft_log_serialization PRIVATE ${CMAKE_SOURCE_DIR}/include) endif() -# Test Raft log serialization +# Test CoordinatorClusterState if(MG_ENTERPRISE) add_unit_test(coordinator_cluster_state.cpp) target_link_libraries(${test_prefix}coordinator_cluster_state gflags mg-coordination mg-repl_coord_glue) target_include_directories(${test_prefix}coordinator_cluster_state PRIVATE ${CMAKE_SOURCE_DIR}/include) endif() + +# Test Raft log serialization +if(MG_ENTERPRISE) +add_unit_test(routing_table.cpp) +target_link_libraries(${test_prefix}routing_table gflags mg-coordination mg-repl_coord_glue) +target_include_directories(${test_prefix}routing_table PRIVATE ${CMAKE_SOURCE_DIR}/include) +endif() diff --git a/tests/unit/bolt_session.cpp b/tests/unit/bolt_session.cpp index f0f3ae14c..411e13e3d 100644 --- a/tests/unit/bolt_session.cpp +++ b/tests/unit/bolt_session.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -114,6 +114,14 @@ class TestSession final : public Session { bool Authenticate(const std::string & /*username*/, const std::string & /*password*/) override { return true; } +#ifdef MG_ENTERPRISE + auto Route(std::map const & /*routing*/, + std::vector const & /*bookmarks*/, + std::map const & /*extra*/) -> std::map override { + return {}; + } +#endif + std::optional GetServerNameForInit() override { return std::nullopt; } void Configure(const std::map &) override {} @@ -1027,104 +1035,115 @@ TEST(BoltSession, Noop) { } } -TEST(BoltSession, Route) { - // Memgraph does not support route message, but it handles it - { - SCOPED_TRACE("v1"); - INIT_VARS; +TEST(BoltSession, Route){{SCOPED_TRACE("v1"); +INIT_VARS; - ExecuteHandshake(input_stream, session, output); - ExecuteInit(input_stream, session, output); - ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException); - EXPECT_EQ(session.state_, State::Close); - } - { - SCOPED_TRACE("v4"); - INIT_VARS; +ExecuteHandshake(input_stream, session, output); +ExecuteInit(input_stream, session, output); +ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException); +EXPECT_EQ(session.state_, State::Close); +} +#ifdef MG_ENTERPRISE +{ + SCOPED_TRACE("v4"); + INIT_VARS; - ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); - ExecuteInit(input_stream, session, output, true); - ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); - static constexpr uint8_t expected_resp[] = { - 0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/, - 0x40, - 0xb1 /*TinyStruct1*/, - 0x7f /*Failure*/, - 0xa2 /*TinyMap with 2 items*/, - 0x84 /*TinyString with 4 chars*/, - 'c', - 'o', - 'd', - 'e', - 0x82 /*TinyString with 2 chars*/, - '6', - '6', - 0x87 /*TinyString with 7 chars*/, - 'm', - 'e', - 's', - 's', - 'a', - 'g', - 'e', - 0xd0 /*String*/, - 0x2b /*With 43 chars*/, - 'R', - 'o', - 'u', - 't', - 'e', - ' ', - 'm', - 'e', - 's', - 's', - 'a', - 'g', - 'e', - ' ', - 'i', - 's', - ' ', - 'n', - 'o', - 't', - ' ', - 's', - 'u', - 'p', - 'p', - 'o', - 'r', - 't', - 'e', - 'd', - ' ', - 'i', - 'n', - ' ', - 'M', - 'e', - 'm', - 'g', - 'r', - 'a', - 'p', - 'h', - '!', - 0x00 /*Terminating zeros*/, - 0x00, - }; - EXPECT_EQ(input_stream.size(), 0U); - CheckOutput(output, expected_resp, sizeof(expected_resp)); - EXPECT_EQ(session.state_, State::Error); + ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); + ExecuteInit(input_stream, session, output, true); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); - SCOPED_TRACE("Try to reset connection after ROUTE failed"); - ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req))); - EXPECT_EQ(input_stream.size(), 0U); - CheckOutput(output, success_resp, sizeof(success_resp)); - EXPECT_EQ(session.state_, State::Idle); - } + EXPECT_EQ(session.state_, State::Idle); + CheckSuccessMessage(output); +} +#else +{ + SCOPED_TRACE("v4"); + INIT_VARS; + + ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); + ExecuteInit(input_stream, session, output, true); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); + static constexpr uint8_t expected_resp[] = { + 0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/, + 0x40, + 0xb1 /*TinyStruct1*/, + 0x7f /*Failure*/, + 0xa2 /*TinyMap with 2 items*/, + 0x84 /*TinyString with 4 chars*/, + 'c', + 'o', + 'd', + 'e', + 0x82 /*TinyString with 2 chars*/, + '6', + '6', + 0x87 /*TinyString with 7 chars*/, + 'm', + 'e', + 's', + 's', + 'a', + 'g', + 'e', + 0xd0 /*String*/, + 0x2b /*With 43 chars*/, + 'R', + 'o', + 'u', + 't', + 'e', + ' ', + 'm', + 'e', + 's', + 's', + 'a', + 'g', + 'e', + ' ', + 'i', + 's', + ' ', + 'n', + 'o', + 't', + ' ', + 's', + 'u', + 'p', + 'p', + 'o', + 'r', + 't', + 'e', + 'd', + ' ', + 'i', + 'n', + ' ', + 'M', + 'e', + 'm', + 'g', + 'r', + 'a', + 'p', + 'h', + '!', + 0x00 /*Terminating zeros*/, + 0x00, + }; + EXPECT_EQ(input_stream.size(), 0U); + CheckOutput(output, expected_resp, sizeof(expected_resp)); + EXPECT_EQ(session.state_, State::Error); + + SCOPED_TRACE("Try to reset connection after ROUTE failed"); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req))); + EXPECT_EQ(input_stream.size(), 0U); + CheckOutput(output, success_resp, sizeof(success_resp)); + EXPECT_EQ(session.state_, State::Idle); +} +#endif } TEST(BoltSession, Rollback) { diff --git a/tests/unit/coordinator_cluster_state.cpp b/tests/unit/coordinator_cluster_state.cpp index 8df2797f2..e7ccf2ada 100644 --- a/tests/unit/coordinator_cluster_state.cpp +++ b/tests/unit/coordinator_cluster_state.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include "nuraft/coordinator_cluster_state.hpp" +#include "io/network/endpoint.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "replication_coordination_glue/role.hpp" @@ -21,11 +22,12 @@ #include "libnuraft/nuraft.hxx" -using memgraph::coordination::CoordinatorClientConfig; using memgraph::coordination::CoordinatorClusterState; using memgraph::coordination::CoordinatorStateMachine; -using memgraph::coordination::InstanceState; +using memgraph::coordination::CoordinatorToReplicaConfig; using memgraph::coordination::RaftLogAction; +using memgraph::coordination::ReplicationInstanceState; +using memgraph::io::network::Endpoint; using memgraph::replication_coordination_glue::ReplicationMode; using memgraph::replication_coordination_glue::ReplicationRole; using nuraft::buffer; @@ -42,20 +44,22 @@ class CoordinatorClusterStateTest : public ::testing::Test { "MG_tests_unit_coordinator_cluster_state"}; }; -TEST_F(CoordinatorClusterStateTest, InstanceStateSerialization) { - InstanceState instance_state{ - CoordinatorClientConfig{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}, +TEST_F(CoordinatorClusterStateTest, ReplicationInstanceStateSerialization) { + ReplicationInstanceState instance_state{ + CoordinatorToReplicaConfig{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}, ReplicationRole::MAIN}; nlohmann::json j = instance_state; - InstanceState deserialized_instance_state = j.get(); + ReplicationInstanceState deserialized_instance_state = j.get(); EXPECT_EQ(instance_state.config, deserialized_instance_state.config); EXPECT_EQ(instance_state.status, deserialized_instance_state.status); @@ -65,13 +69,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { auto coordinator_cluster_state = memgraph::coordination::CoordinatorClusterState{}; { - CoordinatorClientConfig config{"instance1", - "127.0.0.1", - 10111, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance1", + .mgt_server = Endpoint{"127.0.0.1", 10111}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = {.instance_name = "instance1", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -80,13 +87,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance2", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10002}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance2", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .bolt_server = Endpoint{"127.0.0.1", 7688}, + .replication_client_info = {.instance_name = "instance2", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10002}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -95,13 +105,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10113, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10003}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10113}, + .bolt_server = Endpoint{"127.0.0.1", 7689}, + .replication_client_info = {.instance_name = "instance3", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10003}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -110,13 +123,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance4", - "127.0.0.1", - 10114, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10004}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance4", + .mgt_server = Endpoint{"127.0.0.1", 10114}, + .bolt_server = Endpoint{"127.0.0.1", 7690}, + .replication_client_info = {.instance_name = "instance4", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10004}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -125,13 +141,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance5", - "127.0.0.1", - 10115, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10005}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance5", + .mgt_server = Endpoint{"127.0.0.1", 10115}, + .bolt_server = Endpoint{"127.0.0.1", 7691}, + .replication_client_info = {.instance_name = "instance5", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10005}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -140,13 +159,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance6", - "127.0.0.1", - 10116, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10006}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance6", + .mgt_server = Endpoint{"127.0.0.1", 10116}, + .bolt_server = Endpoint{"127.0.0.1", 7692}, + .replication_client_info = {.instance_name = "instance6", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10006}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -159,5 +181,6 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.Serialize(data); auto deserialized_coordinator_cluster_state = CoordinatorClusterState::Deserialize(*data); - ASSERT_EQ(coordinator_cluster_state.GetInstances(), deserialized_coordinator_cluster_state.GetInstances()); + ASSERT_EQ(coordinator_cluster_state.GetReplicationInstances(), + deserialized_coordinator_cluster_state.GetReplicationInstances()); } diff --git a/tests/unit/raft_log_serialization.cpp b/tests/unit/raft_log_serialization.cpp index 8550cf5b8..bda690855 100644 --- a/tests/unit/raft_log_serialization.cpp +++ b/tests/unit/raft_log_serialization.cpp @@ -9,7 +9,8 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" +#include "io/network/endpoint.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "nuraft/raft_log_action.hpp" #include "utils/file.hpp" @@ -19,10 +20,11 @@ #include #include "json/json.hpp" -using memgraph::coordination::CoordinatorClientConfig; using memgraph::coordination::CoordinatorStateMachine; +using memgraph::coordination::CoordinatorToReplicaConfig; using memgraph::coordination::RaftLogAction; -using memgraph::coordination::ReplClientInfo; +using memgraph::coordination::ReplicationClientInfo; +using memgraph::io::network::Endpoint; using memgraph::replication_coordination_glue::ReplicationMode; using memgraph::utils::UUID; @@ -36,26 +38,29 @@ class RaftLogSerialization : public ::testing::Test { }; TEST_F(RaftLogSerialization, ReplClientInfo) { - ReplClientInfo info{"instance_name", ReplicationMode::SYNC, "127.0.0.1", 10111}; + ReplicationClientInfo info{.instance_name = "instance_name", + .replication_mode = ReplicationMode::SYNC, + .replication_server = Endpoint{"127.0.0.1", 10111}}; nlohmann::json j = info; - ReplClientInfo info2 = j.get(); + ReplicationClientInfo info2 = j.get(); ASSERT_EQ(info, info2); } -TEST_F(RaftLogSerialization, CoordinatorClientConfig) { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}; +TEST_F(RaftLogSerialization, CoordinatorToReplicaConfig) { + CoordinatorToReplicaConfig config{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}; nlohmann::json j = config; - CoordinatorClientConfig config2 = j.get(); + CoordinatorToReplicaConfig config2 = j.get(); ASSERT_EQ(config, config2); } @@ -106,19 +111,20 @@ TEST_F(RaftLogSerialization, RaftLogActionUpdateUUID) { } TEST_F(RaftLogSerialization, RegisterInstance) { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}; + CoordinatorToReplicaConfig config{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); ASSERT_EQ(action, RaftLogAction::REGISTER_REPLICATION_INSTANCE); - ASSERT_EQ(config, std::get(payload)); + ASSERT_EQ(config, std::get(payload)); } TEST_F(RaftLogSerialization, UnregisterInstance) { diff --git a/tests/unit/routing_table.cpp b/tests/unit/routing_table.cpp new file mode 100644 index 000000000..42815d461 --- /dev/null +++ b/tests/unit/routing_table.cpp @@ -0,0 +1,176 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "auth/auth.hpp" +#include "coordination/coordinator_instance.hpp" +#include "flags/run_time_configurable.hpp" +#include "interpreter_faker.hpp" +#include "io/network/endpoint.hpp" +#include "license/license.hpp" +#include "replication_handler/replication_handler.hpp" +#include "storage/v2/config.hpp" + +#include "utils/file.hpp" + +#include +#include +#include "json/json.hpp" + +using memgraph::coordination::CoordinatorInstance; +using memgraph::coordination::CoordinatorToCoordinatorConfig; +using memgraph::coordination::CoordinatorToReplicaConfig; +using memgraph::coordination::RaftState; +using memgraph::coordination::ReplicationClientInfo; +using memgraph::io::network::Endpoint; +using memgraph::replication::ReplicationHandler; +using memgraph::replication_coordination_glue::ReplicationMode; +using memgraph::storage::Config; + +// class MockCoordinatorInstance : CoordinatorInstance { +// auto AddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> void override {} +// }; + +class RoutingTableTest : public ::testing::Test { + protected: + std::filesystem::path main_data_directory{std::filesystem::temp_directory_path() / + "MG_tests_unit_coordinator_cluster_state"}; + std::filesystem::path repl1_data_directory{std::filesystem::temp_directory_path() / + "MG_test_unit_storage_v2_replication_repl"}; + std::filesystem::path repl2_data_directory{std::filesystem::temp_directory_path() / + "MG_test_unit_storage_v2_replication_repl2"}; + void SetUp() override { Clear(); } + + void TearDown() override { Clear(); } + + Config main_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, main_data_directory); + return config; + }(); + Config repl1_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, repl1_data_directory); + return config; + }(); + Config repl2_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, repl2_data_directory); + return config; + }(); + + const std::string local_host = ("127.0.0.1"); + const std::array ports{10000, 20000}; + const std::array replicas = {"REPLICA1", "REPLICA2"}; + + private: + void Clear() { + if (std::filesystem::exists(main_data_directory)) std::filesystem::remove_all(main_data_directory); + if (std::filesystem::exists(repl1_data_directory)) std::filesystem::remove_all(repl1_data_directory); + if (std::filesystem::exists(repl2_data_directory)) std::filesystem::remove_all(repl2_data_directory); + } +}; + +struct MinMemgraph { + MinMemgraph(const memgraph::storage::Config &conf) + : auth{conf.durability.storage_directory / "auth", memgraph::auth::Auth::Config{/* default */}}, + repl_state{ReplicationStateRootPath(conf)}, + dbms{conf, repl_state +#ifdef MG_ENTERPRISE + , + auth, true +#endif + }, + db_acc{dbms.Get()}, + db{*db_acc.get()}, + repl_handler(repl_state, dbms +#ifdef MG_ENTERPRISE + , + system_, auth +#endif + ) { + } + memgraph::auth::SynchedAuth auth; + memgraph::system::System system_; + memgraph::replication::ReplicationState repl_state; + memgraph::dbms::DbmsHandler dbms; + memgraph::dbms::DatabaseAccess db_acc; + memgraph::dbms::Database &db; + ReplicationHandler repl_handler; +}; +; + +TEST_F(RoutingTableTest, GetSingleRouterRoutingTable) { + CoordinatorInstance instance1; + auto routing = std::map{{"address", "localhost:7688"}}; + auto routing_table = instance1.GetRoutingTable(routing); + + ASSERT_EQ(routing_table.size(), 1); + + auto const routers = routing_table[0]; + ASSERT_EQ(routers.first, std::vector{"localhost:7688"}); + ASSERT_EQ(routers.second, "ROUTE"); +} + +TEST_F(RoutingTableTest, GetMixedRoutingTable) { + auto instance1 = RaftState::MakeRaftState([]() {}, []() {}); + auto routing = std::map{{"address", "localhost:7690"}}; + instance1.AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig{ + .instance_name = "instance2", + .mgt_server = Endpoint{"127.0.0.1", 10011}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = ReplicationClientInfo{.instance_name = "instance2", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}}); + instance1.GetAllCoordinators(); + // auto routing_table = instance1.GetRoutingTable(routing); + + // ASSERT_EQ(routing_table.size(), 1); + // auto const routers = routing_table[0]; + // ASSERT_EQ(routers.second, "ROUTE"); +} + +// TEST_F(RoutingTableTest, GetMultipleRoutersRoutingTable) { +// +// CoordinatorInstance instance1; +// instance1.AddCoordinatorInstance(CoordinatorToCoordinatorConfig{.coordinator_server_id = 1, +// .bolt_server = Endpoint{"127.0.0.1", 7689}, +// .coordinator_server = Endpoint{"127.0.0.1", +// 10111}}); +// +// auto routing = std::map{{"address", "localhost:7688"}}; +// auto routing_table = instance1.GetRoutingTable(routing); +// +// ASSERT_EQ(routing_table.size(), 1); +// +// auto const routers = routing_table[0]; +// ASSERT_EQ(routers.second, "ROUTE"); +// ASSERT_EQ(routers.first.size(), 2); +// auto const expected_routers = std::vector{"localhost:7689", "localhost:7688"}; +// ASSERT_EQ(routers.first, expected_routers); +// } diff --git a/tests/unit/slk_advanced.cpp b/tests/unit/slk_advanced.cpp index f41946388..46254746a 100644 --- a/tests/unit/slk_advanced.cpp +++ b/tests/unit/slk_advanced.cpp @@ -11,8 +11,9 @@ #include -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_slk.hpp" +#include "io/network/endpoint.hpp" #include "replication/config.hpp" #include "replication_coordination_glue/mode.hpp" #include "slk_common.hpp" @@ -20,6 +21,8 @@ #include "storage/v2/replication/slk.hpp" #include "storage/v2/temporal.hpp" +using memgraph::io::network::Endpoint; + TEST(SlkAdvanced, PropertyValueList) { std::vector original{ memgraph::storage::PropertyValue("hello world!"), @@ -119,24 +122,19 @@ TEST(SlkAdvanced, PropertyValueComplex) { } TEST(SlkAdvanced, ReplicationClientConfigs) { - using ReplicationClientInfo = memgraph::coordination::CoordinatorClientConfig::ReplicationClientInfo; + using ReplicationClientInfo = memgraph::coordination::ReplicationClientInfo; using ReplicationClientInfoVec = std::vector; using ReplicationMode = memgraph::replication_coordination_glue::ReplicationMode; ReplicationClientInfoVec original{ReplicationClientInfo{.instance_name = "replica1", .replication_mode = ReplicationMode::SYNC, - .replication_ip_address = "127.0.0.1", - .replication_port = 10000}, + .replication_server = Endpoint{"127.0.0.1", 10000}}, ReplicationClientInfo{.instance_name = "replica2", .replication_mode = ReplicationMode::ASYNC, - .replication_ip_address = "127.0.1.1", - .replication_port = 10010}, - ReplicationClientInfo{ - .instance_name = "replica3", - .replication_mode = ReplicationMode::ASYNC, - .replication_ip_address = "127.1.1.1", - .replication_port = 1110, - }}; + .replication_server = Endpoint{"127.0.0.1", 10010}}, + ReplicationClientInfo{.instance_name = "replica3", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10011}}}; memgraph::slk::Loopback loopback; auto builder = loopback.GetBuilder(); From 0913e951678260a0ede3ef8256085336998aba67 Mon Sep 17 00:00:00 2001 From: Andi Date: Thu, 21 Mar 2024 10:12:28 +0100 Subject: [PATCH 2/5] Rename HA startup flags (#1820) --- src/coordination/coordinator_state.cpp | 6 +- .../include/coordination/raft_state.hpp | 4 +- src/coordination/raft_state.cpp | 14 ++-- src/flags/replication.cpp | 6 +- src/flags/replication.hpp | 6 +- src/memgraph.cpp | 2 +- src/query/interpreter.cpp | 32 ++++---- src/query/interpreter.hpp | 2 +- src/replication/state.cpp | 2 +- .../replication_handler.hpp | 2 +- src/storage/v2/config.hpp | 2 +- .../v2/replication/replication_client.cpp | 2 +- tests/drivers/run_cluster.sh | 18 ++-- tests/e2e/configuration/default_config.py | 6 +- .../coord_cluster_registration.py | 18 ++-- .../disable_writing_on_main_after_restart.py | 18 ++-- .../high_availability/distributed_coords.py | 82 +++++++++---------- .../manual_setting_replicas.py | 2 +- .../not_replicate_from_old_main.py | 14 ++-- .../high_availability/single_coordinator.py | 56 ++++++------- tests/e2e/high_availability/workloads.yaml | 8 +- 21 files changed, 151 insertions(+), 151 deletions(-) diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index 149a9cb97..0d6ce17c4 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -25,15 +25,15 @@ namespace memgraph::coordination { CoordinatorState::CoordinatorState() { - MG_ASSERT(!(FLAGS_raft_server_id && FLAGS_coordinator_server_port), + MG_ASSERT(!(FLAGS_coordinator_id && FLAGS_management_port), "Instance cannot be a coordinator and have registered coordinator server."); spdlog::info("Executing coordinator constructor"); - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { spdlog::info("Coordinator server port set"); auto const config = ManagementServerConfig{ .ip_address = kDefaultReplicationServerIp, - .port = static_cast(FLAGS_coordinator_server_port), + .port = static_cast(FLAGS_management_port), }; spdlog::info("Executing coordinator constructor main replica"); diff --git a/src/coordination/include/coordination/raft_state.hpp b/src/coordination/include/coordination/raft_state.hpp index 6e322ab78..c4958a5ba 100644 --- a/src/coordination/include/coordination/raft_state.hpp +++ b/src/coordination/include/coordination/raft_state.hpp @@ -40,7 +40,7 @@ using raft_result = nuraft::cmd_result>; class RaftState { private: - explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id, + explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id, uint32_t raft_port, std::string raft_address); auto InitRaftServer() -> void; @@ -84,7 +84,7 @@ class RaftState { private: // TODO: (andi) I think variables below can be abstracted/clean them. io::network::Endpoint raft_endpoint_; - uint32_t raft_server_id_; + uint32_t coordinator_id_; ptr state_machine_; ptr state_manager_; diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index 6175fda4b..3c1cbd158 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -31,12 +31,12 @@ using nuraft::raft_server; using nuraft::srv_config; using raft_result = cmd_result>; -RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id, +RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id, uint32_t raft_port, std::string raft_address) : raft_endpoint_(raft_address, raft_port), - raft_server_id_(raft_server_id), + coordinator_id_(coordinator_id), state_machine_(cs_new()), - state_manager_(cs_new(raft_server_id_, raft_endpoint_.SocketAddress())), + state_manager_(cs_new(coordinator_id_, raft_endpoint_.SocketAddress())), logger_(nullptr), become_leader_cb_(std::move(become_leader_cb)), become_follower_cb_(std::move(become_follower_cb)) {} @@ -95,11 +95,11 @@ auto RaftState::InitRaftServer() -> void { } auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState { - uint32_t raft_server_id = FLAGS_raft_server_id; - uint32_t raft_port = FLAGS_raft_server_port; + uint32_t coordinator_id = FLAGS_coordinator_id; + uint32_t raft_port = FLAGS_coordinator_port; auto raft_state = - RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_server_id, raft_port, "127.0.0.1"); + RaftState(std::move(become_leader_cb), std::move(become_follower_cb), coordinator_id, raft_port, "127.0.0.1"); raft_state.InitRaftServer(); return raft_state; @@ -108,7 +108,7 @@ auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerC RaftState::~RaftState() { launcher_.shutdown(); } auto RaftState::InstanceName() const -> std::string { - return fmt::format("coordinator_{}", std::to_string(raft_server_id_)); + return fmt::format("coordinator_{}", std::to_string(coordinator_id_)); } auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } diff --git a/src/flags/replication.cpp b/src/flags/replication.cpp index e6b71b942..3f8fd2400 100644 --- a/src/flags/replication.cpp +++ b/src/flags/replication.cpp @@ -13,11 +13,11 @@ #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(coordinator_server_port, 0, "Port on which coordinator servers will be started."); +DEFINE_uint32(management_port, 0, "Port on which coordinator servers will be started."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(raft_server_port, 0, "Port on which raft servers will be started."); +DEFINE_uint32(coordinator_port, 0, "Port on which raft servers will be started."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(raft_server_id, 0, "Unique ID of the raft server."); +DEFINE_uint32(coordinator_id, 0, "Unique ID of the raft server."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_uint32(instance_down_timeout_sec, 5, "Time duration after which an instance is considered down."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) diff --git a/src/flags/replication.hpp b/src/flags/replication.hpp index 0a4982f12..e0d1aff8c 100644 --- a/src/flags/replication.hpp +++ b/src/flags/replication.hpp @@ -15,11 +15,11 @@ #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(coordinator_server_port); +DECLARE_uint32(management_port); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(raft_server_port); +DECLARE_uint32(coordinator_port); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(raft_server_id); +DECLARE_uint32(coordinator_id); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_uint32(instance_down_timeout_sec); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 9bf50131d..107cccb59 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -429,7 +429,7 @@ int main(int argc, char **argv) { #ifdef MG_ENTERPRISE // MAIN or REPLICA instance - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { memgraph::dbms::CoordinatorHandlers::Register(coordinator_state.GetCoordinatorServer(), replication_handler); MG_ASSERT(coordinator_state.GetCoordinatorServer().Start(), "Failed to start coordinator server!"); } diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 2fba0addb..87eccca87 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -474,7 +474,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server, std::string_view coordinator_server) -> void override { auto const maybe_coordinator_server = io::network::Endpoint::ParseSocketOrAddress(coordinator_server); if (!maybe_coordinator_server) { @@ -487,7 +487,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } auto const coord_coord_config = - coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = raft_server_id, + coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = coordinator_id, .bolt_server = *maybe_bolt_server, .coordinator_server = *maybe_coordinator_server}; @@ -942,10 +942,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & switch (repl_query->action_) { case ReplicationQuery::Action::SET_REPLICATION_ROLE: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator can't set roles!"); } - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't set role manually on instance with coordinator server port."); } #endif @@ -972,7 +972,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator doesn't have a replication role!"); } #endif @@ -993,7 +993,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::REGISTER_REPLICA: { #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't register replica manually on instance with coordinator server port."); } #endif @@ -1014,7 +1014,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & case ReplicationQuery::Action::DROP_REPLICA: { #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't drop replica manually on instance with coordinator server port."); } #endif @@ -1029,7 +1029,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::SHOW_REPLICAS: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator cannot call SHOW REPLICAS! Use SHOW INSTANCES instead."); } #endif @@ -1176,7 +1176,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param Callback callback; switch (coordinator_query->action_) { case CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can add coordinator instance!"); } @@ -1220,7 +1220,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::REGISTER_INSTANCE: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } // TODO: MemoryResource for EvaluationContext, it should probably be passed as @@ -1273,7 +1273,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::UNREGISTER_INSTANCE: - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } callback.fn = [handler = CoordQueryHandler{*coordinator_state}, @@ -1288,7 +1288,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; case CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } // TODO: MemoryResource for EvaluationContext, it should probably be passed as @@ -1305,7 +1305,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::SHOW_INSTANCES: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can run SHOW INSTANCES."); } @@ -4281,7 +4281,7 @@ void Interpreter::RollbackTransaction() { #ifdef MG_ENTERPRISE auto Interpreter::Route(std::map const &routing) -> RouteResult { // TODO: (andi) Test - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { auto const &address = routing.find("address"); if (address == routing.end()) { throw QueryException("Routing table must contain address field."); @@ -4417,7 +4417,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id && !utils::Downcast(parsed_query.query) && + if (FLAGS_coordinator_id && !utils::Downcast(parsed_query.query) && !utils::Downcast(parsed_query.query)) { throw QueryRuntimeException("Coordinator can run only coordinator queries!"); } @@ -4548,7 +4548,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, throw QueryException("Write query forbidden on the replica!"); } #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port && !interpreter_context_->repl_state->IsMainWriteable()) { + if (FLAGS_management_port && !interpreter_context_->repl_state->IsMainWriteable()) { query_execution = nullptr; throw QueryException( "Write query forbidden on the main! Coordinator needs to enable writing on main by sending RPC message."); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 5d10a24de..b6cb869a4 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -160,7 +160,7 @@ class CoordinatorQueryHandler { virtual std::vector ShowInstances() const = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + virtual auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server, std::string_view coordinator_server) -> void = 0; }; #endif diff --git a/src/replication/state.cpp b/src/replication/state.cpp index 1155fdb51..2e00670ec 100644 --- a/src/replication/state.cpp +++ b/src/replication/state.cpp @@ -56,7 +56,7 @@ ReplicationState::ReplicationState(std::optional durabili } auto replication_data = std::move(fetched_replication_data).GetValue(); #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port && std::holds_alternative(replication_data)) { + if (FLAGS_management_port && std::holds_alternative(replication_data)) { spdlog::trace("Restarted replication uuid for replica"); std::get(replication_data).uuid_.reset(); } diff --git a/src/replication_handler/include/replication_handler/replication_handler.hpp b/src/replication_handler/include/replication_handler/replication_handler.hpp index e1da19bfa..452ccce19 100644 --- a/src/replication_handler/include/replication_handler/replication_handler.hpp +++ b/src/replication_handler/include/replication_handler/replication_handler.hpp @@ -213,7 +213,7 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { // We force sync replicas in other situation if (state == storage::replication::ReplicaState::DIVERGED_FROM_MAIN) { #ifdef MG_ENTERPRISE - return FLAGS_coordinator_server_port != 0; + return FLAGS_management_port != 0; #else return false; #endif diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index 419f29b85..2d06ffe0d 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -132,7 +132,7 @@ struct Config { inline auto ReplicationStateRootPath(memgraph::storage::Config const &config) -> std::optional { if (!config.durability.restore_replication_state_on_startup #ifdef MG_ENTERPRISE - && !FLAGS_coordinator_server_port + && !FLAGS_management_port #endif ) { spdlog::warn( diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index ee1394fdb..008d4b619 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -92,7 +92,7 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce client_name, client_name, client_name); }; #ifdef MG_ENTERPRISE - if (!FLAGS_coordinator_server_port) { + if (!FLAGS_management_port) { log_error(); return; } diff --git a/tests/drivers/run_cluster.sh b/tests/drivers/run_cluster.sh index b5f75f2ef..6931c082b 100755 --- a/tests/drivers/run_cluster.sh +++ b/tests/drivers/run_cluster.sh @@ -35,7 +35,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance1.log \ --also-log-to-stderr \ - --coordinator-server-port=10011 \ + --management-port=10011 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_1=$! @@ -51,7 +51,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance2.log \ --also-log-to-stderr \ - --coordinator-server-port=10012 \ + --management-port=10012 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_2=$! @@ -67,7 +67,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance3.log \ --also-log-to-stderr \ - --coordinator-server-port=10013 \ + --management-port=10013 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_3=$! @@ -84,8 +84,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator1.log \ --also-log-to-stderr \ - --raft-server-id=1 \ - --raft-server-port=10111 \ + --coordinator-id=1 \ + --coordinator-port=10111 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_1=$! @@ -101,8 +101,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator2.log \ --also-log-to-stderr \ - --raft-server-id=2 \ - --raft-server-port=10112 \ + --coordinator-id=2 \ + --coordinator-port=10112 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_2=$! @@ -118,8 +118,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator3.log \ --also-log-to-stderr \ - --raft-server-id=3 \ - --raft-server-port=10113 \ + --coordinator-id=3 \ + --coordinator-port=10113 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_3=$! diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 11435da65..d2ba5c279 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -59,9 +59,9 @@ startup_config_dict = { "Time in seconds after which inactive Bolt sessions will be closed.", ), "cartesian_product_enabled": ("true", "true", "Enable cartesian product expansion."), - "coordinator_server_port": ("0", "0", "Port on which coordinator servers will be started."), - "raft_server_port": ("0", "0", "Port on which raft servers will be started."), - "raft_server_id": ("0", "0", "Unique ID of the raft server."), + "management_port": ("0", "0", "Port on which coordinator servers will be started."), + "coordinator_port": ("0", "0", "Port on which raft servers will be started."), + "coordinator_id": ("0", "0", "Unique ID of the raft server."), "instance_down_timeout_sec": ("5", "5", "Time duration after which an instance is considered down."), "instance_health_check_frequency_sec": ("1", "1", "The time duration between two health checks/pings."), "instance_get_uuid_frequency_sec": ("10", "10", "The time duration between two instance uuid checks."), diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 89279b23d..16f91214d 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -50,7 +50,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -64,7 +64,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -77,8 +77,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -89,8 +89,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -101,8 +101,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index e61eb4eb8..66264fe0d 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -55,7 +55,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -74,7 +74,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -92,8 +92,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -104,8 +104,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -116,8 +116,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", "--also-log-to-stderr", ], "log_file": "coordinator3.log", diff --git a/tests/e2e/high_availability/distributed_coords.py b/tests/e2e/high_availability/distributed_coords.py index 59e083545..b863ca519 100644 --- a/tests/e2e/high_availability/distributed_coords.py +++ b/tests/e2e/high_availability/distributed_coords.py @@ -40,7 +40,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -54,7 +54,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -68,7 +68,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -81,8 +81,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -93,8 +93,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -105,8 +105,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [ @@ -130,7 +130,7 @@ def get_instances_description_no_setup(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -144,7 +144,7 @@ def get_instances_description_no_setup(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -158,7 +158,7 @@ def get_instances_description_no_setup(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -171,8 +171,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "data_directory": f"{TEMP_DIR}/coordinator_1", @@ -184,8 +184,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "data_directory": f"{TEMP_DIR}/coordinator_2", @@ -197,8 +197,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "data_directory": f"{TEMP_DIR}/coordinator_3", @@ -640,7 +640,7 @@ def test_registering_4_coords(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -654,7 +654,7 @@ def test_registering_4_coords(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -668,7 +668,7 @@ def test_registering_4_coords(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -681,8 +681,8 @@ def test_registering_4_coords(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -693,8 +693,8 @@ def test_registering_4_coords(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -705,8 +705,8 @@ def test_registering_4_coords(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], @@ -717,8 +717,8 @@ def test_registering_4_coords(): "--bolt-port", "7693", "--log-level=TRACE", - "--raft-server-id=4", - "--raft-server-port=10114", + "--coordinator-id=4", + "--coordinator-port=10114", ], "log_file": "coordinator4.log", "setup_queries": [ @@ -775,7 +775,7 @@ def test_registering_coord_log_store(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -789,7 +789,7 @@ def test_registering_coord_log_store(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -803,7 +803,7 @@ def test_registering_coord_log_store(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -816,8 +816,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -828,8 +828,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -840,8 +840,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], @@ -852,8 +852,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7693", "--log-level=TRACE", - "--raft-server-id=4", - "--raft-server-port=10114", + "--coordinator-id=4", + "--coordinator-port=10114", ], "log_file": "coordinator4.log", "setup_queries": [ @@ -911,7 +911,7 @@ def test_registering_coord_log_store(): bolt_port = f"--bolt-port={bolt_port_id}" - manag_server_port = f"--coordinator-server-port={manag_port_id}" + manag_server_port = f"--management-port={manag_port_id}" args_desc.append(bolt_port) args_desc.append(manag_server_port) diff --git a/tests/e2e/high_availability/manual_setting_replicas.py b/tests/e2e/high_availability/manual_setting_replicas.py index b0b0965bc..02d0ea4e9 100644 --- a/tests/e2e/high_availability/manual_setting_replicas.py +++ b/tests/e2e/high_availability/manual_setting_replicas.py @@ -31,7 +31,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "main.log", diff --git a/tests/e2e/high_availability/not_replicate_from_old_main.py b/tests/e2e/high_availability/not_replicate_from_old_main.py index d9729f650..3e328a544 100644 --- a/tests/e2e/high_availability/not_replicate_from_old_main.py +++ b/tests/e2e/high_availability/not_replicate_from_old_main.py @@ -153,7 +153,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -167,7 +167,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -180,8 +180,8 @@ def test_not_replicate_old_main_register_new_cluster(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -220,7 +220,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -233,8 +233,8 @@ def test_not_replicate_old_main_register_new_cluster(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10112", + "--coordinator-id=1", + "--coordinator-port=10112", ], "log_file": "coordinator.log", "setup_queries": [], diff --git a/tests/e2e/high_availability/single_coordinator.py b/tests/e2e/high_availability/single_coordinator.py index 1d839b4fc..6582ddfec 100644 --- a/tests/e2e/high_availability/single_coordinator.py +++ b/tests/e2e/high_availability/single_coordinator.py @@ -35,7 +35,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -52,7 +52,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -69,7 +69,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -85,8 +85,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -126,7 +126,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -144,7 +144,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -162,7 +162,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -180,8 +180,8 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -337,7 +337,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -355,7 +355,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -373,7 +373,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -392,7 +392,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -410,8 +410,8 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -624,7 +624,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -642,7 +642,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -660,7 +660,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -679,7 +679,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -697,8 +697,8 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -911,7 +911,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -929,7 +929,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -947,7 +947,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -966,7 +966,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -984,8 +984,8 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ diff --git a/tests/e2e/high_availability/workloads.yaml b/tests/e2e/high_availability/workloads.yaml index aaf76fc6b..9d3bd3126 100644 --- a/tests/e2e/high_availability/workloads.yaml +++ b/tests/e2e/high_availability/workloads.yaml @@ -1,19 +1,19 @@ ha_cluster: &ha_cluster cluster: replica_1: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--management-port=10011"] log_file: "replication-e2e-replica1.log" setup_queries: [] replica_2: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--management-port=10012"] log_file: "replication-e2e-replica2.log" setup_queries: [] main: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--management-port=10013"] log_file: "replication-e2e-main.log" setup_queries: [] coordinator: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--raft-server-id=1", "--raft-server-port=10111"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--coordinator-id=1", "--coordinator-port=10111"] log_file: "replication-e2e-coordinator.log" setup_queries: [ "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", From a3d2474c5b68a5bbef60667caa8c9e4829a37479 Mon Sep 17 00:00:00 2001 From: DavIvek Date: Thu, 21 Mar 2024 11:50:55 +0100 Subject: [PATCH 3/5] Fix timestamps saving on-disk (#1811) --- src/storage/v2/disk/durable_metadata.cpp | 2 +- src/storage/v2/disk/durable_metadata.hpp | 2 +- src/storage/v2/disk/storage.cpp | 7 +++--- src/storage/v2/disk/storage.hpp | 2 ++ tests/unit/clearing_old_disk_data.cpp | 32 ++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/storage/v2/disk/durable_metadata.cpp b/src/storage/v2/disk/durable_metadata.cpp index 13d515af2..c1f44a587 100644 --- a/src/storage/v2/disk/durable_metadata.cpp +++ b/src/storage/v2/disk/durable_metadata.cpp @@ -42,7 +42,7 @@ DurableMetadata::DurableMetadata(const Config &config) DurableMetadata::DurableMetadata(DurableMetadata &&other) noexcept : durability_kvstore_(std::move(other.durability_kvstore_)), config_(std::move(other.config_)) {} -void DurableMetadata::SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) { +void DurableMetadata::UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) { durability_kvstore_.Put(kLastTransactionStartTimeStamp, std::to_string(timestamp)); durability_kvstore_.Put(kVertexCountDescr, std::to_string(vertex_count)); durability_kvstore_.Put(kEdgeDountDescr, std::to_string(edge_count)); diff --git a/src/storage/v2/disk/durable_metadata.hpp b/src/storage/v2/disk/durable_metadata.hpp index 4aaa8a707..06a26ac15 100644 --- a/src/storage/v2/disk/durable_metadata.hpp +++ b/src/storage/v2/disk/durable_metadata.hpp @@ -41,7 +41,7 @@ class DurableMetadata { std::optional> LoadExistenceConstraintInfoIfExists() const; std::optional> LoadUniqueConstraintInfoIfExists() const; - void SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count); + void UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count); bool PersistLabelIndexCreation(LabelId label); diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 4dbd248f7..9aa6613c7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -274,8 +274,8 @@ DiskStorage::DiskStorage(Config config) } DiskStorage::~DiskStorage() { - durable_metadata_.SaveBeforeClosingDB(timestamp_, vertex_count_.load(std::memory_order_acquire), - edge_count_.load(std::memory_order_acquire)); + durable_metadata_.UpdateMetaData(timestamp_, vertex_count_.load(std::memory_order_acquire), + edge_count_.load(std::memory_order_acquire)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->vertex_chandle)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->edge_chandle)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->out_edges_chandle)); @@ -1786,7 +1786,8 @@ utils::BasicResult DiskStorage::DiskAccessor::Co if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { disk_storage->indices_.text_index_.Commit(); } - + disk_storage->durable_metadata_.UpdateMetaData(disk_storage->timestamp_, disk_storage->vertex_count_, + disk_storage->edge_count_); is_transaction_active_ = false; return {}; diff --git a/src/storage/v2/disk/storage.hpp b/src/storage/v2/disk/storage.hpp index 349a7454a..74f4f4136 100644 --- a/src/storage/v2/disk/storage.hpp +++ b/src/storage/v2/disk/storage.hpp @@ -301,6 +301,8 @@ class DiskStorage final : public Storage { EdgeImportMode GetEdgeImportMode() const; + DurableMetadata *GetDurableMetadata() { return &durable_metadata_; } + private: void LoadPersistingMetadataInfo(); diff --git a/tests/unit/clearing_old_disk_data.cpp b/tests/unit/clearing_old_disk_data.cpp index 395391e12..58682a845 100644 --- a/tests/unit/clearing_old_disk_data.cpp +++ b/tests/unit/clearing_old_disk_data.cpp @@ -179,3 +179,35 @@ TEST_F(ClearingOldDiskDataTest, TestNumOfEntriesWithEdgeValueUpdate) { ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 5); } + +TEST_F(ClearingOldDiskDataTest, TestTimestampAfterCommit) { + auto *tx_db = disk_storage->GetRocksDBStorage()->db_; + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 0); + + auto acc1 = disk_storage->Access(ReplicationRole::MAIN); + auto vertex1 = acc1->CreateVertex(); + auto label1 = acc1->NameToLabel("DiskLabel"); + auto property1 = acc1->NameToProperty("DiskProperty"); + ASSERT_TRUE(vertex1.AddLabel(label1).HasValue()); + ASSERT_TRUE(vertex1.SetProperty(property1, memgraph::storage::PropertyValue(10)).HasValue()); + ASSERT_FALSE(acc1->Commit().HasError()); + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 1); + + auto saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists(); + ASSERT_EQ(saved_timestamp.has_value(), true); + ASSERT_EQ(disk_storage->timestamp_, saved_timestamp); + + auto acc2 = disk_storage->Access(ReplicationRole::MAIN); + auto vertex2 = acc2->CreateVertex(); + auto label2 = acc2->NameToLabel("DiskLabel2"); + auto property2 = acc2->NameToProperty("DiskProperty2"); + + ASSERT_TRUE(vertex2.AddLabel(label2).HasValue()); + ASSERT_TRUE(vertex2.SetProperty(property2, memgraph::storage::PropertyValue(10)).HasValue()); + ASSERT_FALSE(acc2->Commit().HasError()); + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 2); + + saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists(); + ASSERT_EQ(saved_timestamp.has_value(), true); + ASSERT_EQ(disk_storage->timestamp_, saved_timestamp); +} From 56be736d30fc9df48b840e421a8ce7afea997947 Mon Sep 17 00:00:00 2001 From: DavIvek Date: Thu, 21 Mar 2024 13:34:59 +0100 Subject: [PATCH 4/5] Fix and update mgbench (#1838) --- tests/mgbench/benchmark.py | 6 +- tests/mgbench/graph_bench.py | 2 - tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh | 99 +++++++++++++++++++ tests/mgbench/runners.py | 2 +- tests/mgbench/workloads/base.py | 30 +++--- tests/mgbench/workloads/disk_pokec.py | 52 +++++++--- .../importers/disk_importer_pokec.py | 2 +- tests/mgbench/workloads/pokec.py | 38 ++++++- 8 files changed, 195 insertions(+), 36 deletions(-) create mode 100644 tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh diff --git a/tests/mgbench/benchmark.py b/tests/mgbench/benchmark.py index cd3fb846f..9c8f1a7d2 100755 --- a/tests/mgbench/benchmark.py +++ b/tests/mgbench/benchmark.py @@ -632,10 +632,12 @@ def run_isolated_workload_without_authorization(vendor_runner, client, queries, def setup_indices_and_import_dataset(client, vendor_runner, generated_queries, workload, storage_mode): - vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT) + if benchmark_context.vendor_name == "memgraph": + # Neo4j will get started just before import -> without this if statement it would try to start it twice + vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT) log.info("Executing database index setup") start_time = time.time() - + import_results = None if generated_queries: client.execute(queries=workload.indexes_generator(), num_workers=1) log.info("Finished setting up indexes.") diff --git a/tests/mgbench/graph_bench.py b/tests/mgbench/graph_bench.py index f329cfcb7..bcba55324 100644 --- a/tests/mgbench/graph_bench.py +++ b/tests/mgbench/graph_bench.py @@ -127,8 +127,6 @@ def run_full_benchmarks( ], ] - assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!" - if realistic: # Configurations for full workload for count, write, read, update, analytical in realistic: diff --git a/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh b/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh new file mode 100644 index 000000000..0381448fa --- /dev/null +++ b/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Currently only pokec dataset is modified to be used with memgraph on-disk storage + +pushd () { command pushd "$@" > /dev/null; } +popd () { command popd "$@" > /dev/null; } +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +pushd "$SCRIPT_DIR" + +# Help function +function show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " -n, --neo4j-path Path to Neo4j binary" + echo " -m, --memgraph-path Path to Memgraph binary" + echo " -w, --num-workers Number of workers for benchmark and import" + echo " -d, --dataset_size dataset_size (small, medium, large)" + echo " -h, --help Show this help message" + exit 0 +} + +# Default values +neo4j_path="/usr/share/neo4j/bin/neo4j" +memgraph_path="../../build/memgraph" +num_workers=12 +dataset_size="small" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -n|--neo4j-path) + neo4j_path="$2" + shift + shift + ;; + -m|--memgraph-path) + memgraph_path="$2" + shift + shift + ;; + -w|--num-workers) + num_workers="$2" + shift + shift + ;; + -d|--dataset_size) + dataset_size="$2" + shift + shift + ;; + -h|--help) + show_help + ;; + *) + echo "Invalid option: $1" + show_help + ;; + esac +done + +if [ ! -d "pokec_${dataset_size}_results" ]; then + mkdir "pokec_${dataset_size}_results" +fi + +# Run Python: Mgbench - Neo4j +echo "Running Python: Mgbench - Neo4j" +python3 benchmark.py vendor-native \ + --vendor-binary "$neo4j_path" \ + --vendor-name neo4j \ + --num-workers-for-benchmark "$num_workers" \ + --num-workers-for-import "$num_workers" \ + --no-load-query-counts \ + --export-results "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \ + "pokec_disk/${dataset_size}/*/*" \ + --vendor-specific "config=$neo4j_path/conf/neo4j.conf" \ + --no-authorization + +# Run Python: Mgbench - Memgraph - on-disk +echo "Running Python: Mgbench - Memgraph - on-disk" +python3 benchmark.py vendor-native \ + --vendor-binary "$memgraph_path" \ + --vendor-name memgraph \ + --num-workers-for-benchmark "$num_workers" \ + --num-workers-for-import "$num_workers" \ + --no-load-query-counts \ + --export-results-on-disk-txn "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \ + --export-results "pokec_${dataset_size}_results/on_disk_export_${dataset_size}_pokec.json" \ + "pokec_disk/${dataset_size}/*/*" \ + --no-authorization \ + --vendor-specific "data-directory=benchmark_datadir" "storage-mode=ON_DISK_TRANSACTIONAL" + +echo "Comparing results" +python3 compare_results.py --compare \ + "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \ + "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \ + --output \ + "pokec_${dataset_size}_results/neo4j_vs_mg_ondisk_results.html" \ + --different-vendors diff --git a/tests/mgbench/runners.py b/tests/mgbench/runners.py index 155ceac06..005bcb60f 100644 --- a/tests/mgbench/runners.py +++ b/tests/mgbench/runners.py @@ -634,7 +634,7 @@ class Neo4j(BaseRunner): exit_proc = subprocess.run(args=[self._neo4j_binary, "stop"], capture_output=True, check=True) return exit_proc.returncode, usage else: - return 0 + return 0, 0 def start_db_init(self, workload): if self._performance_tracking: diff --git a/tests/mgbench/workloads/base.py b/tests/mgbench/workloads/base.py index 5264dcba9..ab4c21059 100644 --- a/tests/mgbench/workloads/base.py +++ b/tests/mgbench/workloads/base.py @@ -160,12 +160,7 @@ class Workload(ABC): raise ValueError("Vendor does not have INDEX for dataset!") def _set_local_files(self) -> None: - if not self.disk_workload: - if self.LOCAL_FILE is not None: - self._local_file = self.LOCAL_FILE.get(self._variant, None) - else: - self._local_file = None - else: + if self.disk_workload and self._vendor != "neo4j": if self.LOCAL_FILE_NODES is not None: self._local_file_nodes = self.LOCAL_FILE_NODES.get(self._variant, None) else: @@ -175,14 +170,14 @@ class Workload(ABC): self._local_file_edges = self.LOCAL_FILE_EDGES.get(self._variant, None) else: self._local_file_edges = None + else: + if self.LOCAL_FILE is not None: + self._local_file = self.LOCAL_FILE.get(self._variant, None) + else: + self._local_file = None def _set_url_files(self) -> None: - if not self.disk_workload: - if self.URL_FILE is not None: - self._url_file = self.URL_FILE.get(self._variant, None) - else: - self._url_file = None - else: + if self.disk_workload and self._vendor != "neo4j": if self.URL_FILE_NODES is not None: self._url_file_nodes = self.URL_FILE_NODES.get(self._variant, None) else: @@ -191,6 +186,11 @@ class Workload(ABC): self._url_file_edges = self.URL_FILE_EDGES.get(self._variant, None) else: self._url_file_edges = None + else: + if self.URL_FILE is not None: + self._url_file = self.URL_FILE.get(self._variant, None) + else: + self._url_file = None def _set_local_index_file(self) -> None: if self.LOCAL_INDEX_FILE is not None: @@ -205,10 +205,10 @@ class Workload(ABC): self._url_index = None def prepare(self, directory): - if not self.disk_workload: - self._prepare_dataset_for_in_memory_workload(directory) - else: + if self.disk_workload and self._vendor != "neo4j": self._prepare_dataset_for_on_disk_workload(directory) + else: + self._prepare_dataset_for_in_memory_workload(directory) if self._local_index is not None: print("Using local index file:", self._local_index) diff --git a/tests/mgbench/workloads/disk_pokec.py b/tests/mgbench/workloads/disk_pokec.py index f19110a0c..a296e4836 100644 --- a/tests/mgbench/workloads/disk_pokec.py +++ b/tests/mgbench/workloads/disk_pokec.py @@ -13,7 +13,8 @@ import random from benchmark_context import BenchmarkContext from workloads.base import Workload -from workloads.importers.disk_importer_pokec import ImporterPokec +from workloads.importers.disk_importer_pokec import DiskImporterPokec +from workloads.importers.importer_pokec import ImporterPokec class Pokec(Workload): @@ -22,6 +23,12 @@ class Pokec(Workload): DEFAULT_VARIANT = "small" FILE = None + URL_FILE = { + "small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_small_import.cypher", + "medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_medium_import.cypher", + "large": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_large.setup.cypher.gz", + } + URL_FILE_NODES = { "small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_small_import_nodes.cypher", "medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_medium_import_nodes.cypher", @@ -42,7 +49,7 @@ class Pokec(Workload): URL_INDEX_FILE = { "memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/memgraph.cypher", - "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/neo4j.cypher", + "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/neo4j.cypher", } PROPERTIES_ON_EDGES = False @@ -51,15 +58,26 @@ class Pokec(Workload): super().__init__(variant, benchmark_context=benchmark_context, disk_workload=True) def custom_import(self) -> bool: - importer = ImporterPokec( - benchmark_context=self.benchmark_context, - dataset_name=self.NAME, - index_file=self._file_index, - dataset_nodes_file=self._node_file, - dataset_edges_file=self._edge_file, - variant=self._variant, - ) - return importer.execute_import() + if self._vendor == "neo4j": + importer = ImporterPokec( + benchmark_context=self.benchmark_context, + dataset_name=self.NAME, + index_file=self._file_index, + dataset_file=self._file, + variant=self._variant, + ) + return importer.execute_import() + + else: + importer = DiskImporterPokec( + benchmark_context=self.benchmark_context, + dataset_name=self.NAME, + index_file=self._file_index, + dataset_nodes_file=self._node_file, + dataset_edges_file=self._edge_file, + variant=self._variant, + ) + return importer.execute_import() # Helpers used to generate the queries def _get_random_vertex(self): @@ -214,12 +232,22 @@ class Pokec(Workload): # OK def benchmark__arango__allshortest_paths(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p = allShortestPaths((n)-[*..2]->(m)) " + "RETURN [node in nodes(p) | node.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "neo4j": + return neo4j + else: + return memgraph # Our benchmark queries diff --git a/tests/mgbench/workloads/importers/disk_importer_pokec.py b/tests/mgbench/workloads/importers/disk_importer_pokec.py index 560d7da9e..f487dc8f3 100644 --- a/tests/mgbench/workloads/importers/disk_importer_pokec.py +++ b/tests/mgbench/workloads/importers/disk_importer_pokec.py @@ -17,7 +17,7 @@ from constants import * from runners import BaseRunner -class ImporterPokec: +class DiskImporterPokec: def __init__( self, benchmark_context: BenchmarkContext, diff --git a/tests/mgbench/workloads/pokec.py b/tests/mgbench/workloads/pokec.py index 6733d38f2..4c05796b2 100644 --- a/tests/mgbench/workloads/pokec.py +++ b/tests/mgbench/workloads/pokec.py @@ -167,30 +167,62 @@ class Pokec(Workload): def benchmark__arango__shortest_path(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*bfs..15]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p=shortestPath((n)-[*..15]->(m)) " + "RETURN [n in nodes(p) | n.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j def benchmark__arango__shortest_path_with_filter(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p=shortestPath((n)-[*..15]->(m)) " + "WHERE all(node in nodes(p) WHERE node.age >= 18) " + "RETURN [n in nodes(p) | n.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j + def benchmark__arango__allshortest_paths(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p = allShortestPaths((n)-[*..2]->(m)) " + "RETURN [node in nodes(p) | node.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j # Our benchmark queries From 89e13109d77003e4d28835d981765e55a52ce6cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Bari=C5=A1i=C4=87?= <48765171+MarkoBarisic@users.noreply.github.com> Date: Thu, 21 Mar 2024 18:39:40 +0100 Subject: [PATCH 5/5] Fix jepsen nodes not starting up healthy (#1846) * add a loop to check if all nodes started correctly and restart if any failed --- tests/jepsen/run.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/jepsen/run.sh b/tests/jepsen/run.sh index 040491c3f..d94dbf8de 100755 --- a/tests/jepsen/run.sh +++ b/tests/jepsen/run.sh @@ -178,8 +178,16 @@ PROCESS_RESULTS() { CLUSTER_UP() { PRINT_CONTEXT - "$script_dir/jepsen/docker/bin/up" --daemon - sleep 10 + local cnt=0 + while [[ "$cnt" < 5 ]]; do + if ! "$script_dir/jepsen/docker/bin/up" --daemon; then + cnt=$((cnt + 1)) + continue + else + sleep 10 + break + fi + done # Ensure all SSH connections between Jepsen containers work for node in $(docker ps --filter name=jepsen* --filter status=running --format "{{.Names}}"); do if [ "$node" == "jepsen-control" ]; then