Compare commits
35 Commits
master
...
support-re
Author | SHA1 | Date | |
---|---|---|---|
|
ccabbe7a63 | ||
|
aac169c5b3 | ||
|
d0cb85e642 | ||
|
34a7fed59a | ||
|
8124226ba9 | ||
|
eea21dd73e | ||
|
a6483fc6a7 | ||
|
45def388c7 | ||
|
da3db9f3bc | ||
|
34b2e360a9 | ||
|
9bc0d9425e | ||
|
62d1b68c2f | ||
|
ec46cca7a3 | ||
|
326a04c6a3 | ||
|
b21f9e71ed | ||
|
c2e0b310f5 | ||
|
b492c7d34f | ||
|
303608982a | ||
|
c0f979fdcb | ||
|
5242427686 | ||
|
d3168ded5a | ||
|
0a28fee34b | ||
|
dc9a2c45c4 | ||
|
1133bb8ecb | ||
|
e9c5cc3b82 | ||
|
3f4ac0dd58 | ||
|
a0ecea7d1c | ||
|
567e1fa1cb | ||
|
ef37c44149 | ||
|
ab34b060c0 | ||
|
67c1874e81 | ||
|
9d457eafa8 | ||
|
986ea37ead | ||
|
afe7d47a5c | ||
|
8884a0ea78 |
2
.github/workflows/diff.yaml
vendored
2
.github/workflows/diff.yaml
vendored
@ -96,7 +96,7 @@ jobs:
|
||||
|
||||
- name: Python code analysis
|
||||
run: |
|
||||
CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only)
|
||||
CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only --diff-filter=d)
|
||||
for file in ${CHANGED_FILES}; do
|
||||
echo ${file}
|
||||
if [[ ${file} == *.py ]]; then
|
||||
|
@ -7,19 +7,24 @@ target_sources(mg-coordination
|
||||
include/coordination/coordinator_rpc.hpp
|
||||
include/coordination/coordinator_server.hpp
|
||||
include/coordination/coordinator_config.hpp
|
||||
include/coordination/coordinator_entity_info.hpp
|
||||
include/coordination/coordinator_exceptions.hpp
|
||||
include/coordination/coordinator_instance.hpp
|
||||
include/coordination/coordinator_slk.hpp
|
||||
include/coordination/coordinator_data.hpp
|
||||
include/coordination/constants.hpp
|
||||
include/coordination/failover_status.hpp
|
||||
include/coordination/coordinator_cluster_config.hpp
|
||||
|
||||
PRIVATE
|
||||
coordinator_client.cpp
|
||||
coordinator_state.cpp
|
||||
coordinator_rpc.cpp
|
||||
coordinator_server.cpp
|
||||
coordinator_data.cpp
|
||||
coordinator_instance.cpp
|
||||
)
|
||||
target_include_directories(mg-coordination PUBLIC include)
|
||||
|
||||
target_link_libraries(mg-coordination
|
||||
PUBLIC mg::utils mg::rpc mg::slk mg::io mg::repl_coord_glue
|
||||
PUBLIC mg::utils mg::rpc mg::slk mg::io mg::repl_coord_glue lib::rangev3
|
||||
)
|
||||
|
@ -27,84 +27,77 @@ auto CreateClientContext(const memgraph::coordination::CoordinatorClientConfig &
|
||||
}
|
||||
} // namespace
|
||||
|
||||
CoordinatorClient::CoordinatorClient(const CoordinatorClientConfig &config)
|
||||
CoordinatorClient::CoordinatorClient(CoordinatorData *coord_data, CoordinatorClientConfig config,
|
||||
HealthCheckCallback succ_cb, HealthCheckCallback fail_cb)
|
||||
: rpc_context_{CreateClientContext(config)},
|
||||
rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port),
|
||||
&rpc_context_},
|
||||
config_{config} {}
|
||||
config_{std::move(config)},
|
||||
coord_data_{coord_data},
|
||||
succ_cb_{std::move(succ_cb)},
|
||||
fail_cb_{std::move(fail_cb)} {}
|
||||
|
||||
CoordinatorClient::~CoordinatorClient() {
|
||||
auto exit_job = utils::OnScopeExit([&] {
|
||||
StopFrequentCheck();
|
||||
thread_pool_.Shutdown();
|
||||
});
|
||||
const auto endpoint = rpc_client_.Endpoint();
|
||||
// Logging can throw
|
||||
spdlog::trace("Closing replication client on {}:{}", endpoint.address, endpoint.port);
|
||||
}
|
||||
auto CoordinatorClient::InstanceName() const -> std::string { return config_.instance_name; }
|
||||
auto CoordinatorClient::SocketAddress() const -> std::string { return rpc_client_.Endpoint().SocketAddress(); }
|
||||
|
||||
void CoordinatorClient::StartFrequentCheck() {
|
||||
MG_ASSERT(config_.health_check_frequency_sec > std::chrono::seconds(0),
|
||||
"Health check frequency must be greater than 0");
|
||||
replica_checker_.Run(
|
||||
"Coord checker", config_.health_check_frequency_sec,
|
||||
[last_response_time = &last_response_time_, rpc_client = &rpc_client_] {
|
||||
|
||||
instance_checker_.Run(
|
||||
config_.instance_name, config_.health_check_frequency_sec, [this, instance_name = config_.instance_name] {
|
||||
try {
|
||||
{
|
||||
auto stream{rpc_client->Stream<memgraph::replication_coordination_glue::FrequentHeartbeatRpc>()};
|
||||
spdlog::trace("Sending frequent heartbeat to machine {} on {}", instance_name,
|
||||
rpc_client_.Endpoint().SocketAddress());
|
||||
auto stream{rpc_client_.Stream<memgraph::replication_coordination_glue::FrequentHeartbeatRpc>()};
|
||||
stream.AwaitResponse();
|
||||
last_response_time->store(std::chrono::system_clock::now(), std::memory_order_acq_rel);
|
||||
}
|
||||
succ_cb_(coord_data_, instance_name);
|
||||
} catch (const rpc::RpcFailedException &) {
|
||||
// Nothing to do...wait for a reconnect
|
||||
fail_cb_(coord_data_, instance_name);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void CoordinatorClient::StopFrequentCheck() { replica_checker_.Stop(); }
|
||||
void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); }
|
||||
void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); }
|
||||
void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); }
|
||||
|
||||
bool CoordinatorClient::DoHealthCheck() const {
|
||||
auto current_time = std::chrono::system_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::seconds>(current_time -
|
||||
last_response_time_.load(std::memory_order_acquire));
|
||||
return duration.count() <= alive_response_time_difference_sec_;
|
||||
auto CoordinatorClient::SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void {
|
||||
succ_cb_ = std::move(succ_cb);
|
||||
fail_cb_ = std::move(fail_cb);
|
||||
}
|
||||
|
||||
auto CoordinatorClient::InstanceName() const -> std::string_view { return config_.instance_name; }
|
||||
auto CoordinatorClient::Endpoint() const -> io::network::Endpoint const & { return rpc_client_.Endpoint(); }
|
||||
auto CoordinatorClient::Config() const -> CoordinatorClientConfig const & { return config_; }
|
||||
|
||||
auto CoordinatorClient::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo const & {
|
||||
MG_ASSERT(config_.replication_client_info.has_value(), "No ReplicationClientInfo for MAIN instance!");
|
||||
return *config_.replication_client_info;
|
||||
}
|
||||
|
||||
////// AF design choice
|
||||
auto CoordinatorClient::ReplicationClientInfo() -> std::optional<CoordinatorClientConfig::ReplicationClientInfo> & {
|
||||
MG_ASSERT(config_.replication_client_info.has_value(), "No ReplicationClientInfo for MAIN instance!");
|
||||
auto CoordinatorClient::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo {
|
||||
return config_.replication_client_info;
|
||||
}
|
||||
|
||||
void CoordinatorClient::UpdateTimeCheck(const std::chrono::system_clock::time_point &last_checked_time) {
|
||||
last_response_time_.store(last_checked_time, std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
auto CoordinatorClient::GetLastTimeResponse() -> std::chrono::system_clock::time_point { return last_response_time_; }
|
||||
|
||||
auto CoordinatorClient::SendPromoteReplicaToMainRpc(
|
||||
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info) const -> bool {
|
||||
try {
|
||||
{
|
||||
auto stream{rpc_client_.Stream<PromoteReplicaToMainRpc>(std::move(replication_clients_info))};
|
||||
if (!stream.AwaitResponse().success) {
|
||||
spdlog::error("Failed to perform failover!");
|
||||
spdlog::error("Failed to receive successful RPC failover response!");
|
||||
return false;
|
||||
}
|
||||
spdlog::info("Sent failover RPC from coordinator to new main!");
|
||||
return true;
|
||||
}
|
||||
} catch (const rpc::RpcFailedException &) {
|
||||
spdlog::error("Failed to send failover RPC from coordinator to new main!");
|
||||
spdlog::error("RPC error occurred while sending failover RPC!");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
auto CoordinatorClient::DemoteToReplica() const -> bool {
|
||||
const auto instance_name = config_.instance_name;
|
||||
try {
|
||||
auto stream{rpc_client_.Stream<SetMainToReplicaRpc>(config_.replication_client_info)};
|
||||
if (!stream.AwaitResponse().success) {
|
||||
spdlog::error("Failed to receive successful RPC response for setting instance {} to replica!", instance_name);
|
||||
return false;
|
||||
}
|
||||
spdlog::info("Sent request RPC from coordinator to instance to set it as replica!");
|
||||
return true;
|
||||
} catch (const rpc::RpcFailedException &) {
|
||||
spdlog::error("Failed to set instance {} to replica!", instance_name);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
224
src/coordination/coordinator_data.cpp
Normal file
224
src/coordination/coordinator_data.cpp
Normal file
@ -0,0 +1,224 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#include "coordination/coordinator_instance.hpp"
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_data.hpp"
|
||||
|
||||
#include <range/v3/view.hpp>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
CoordinatorData::CoordinatorData() {
|
||||
auto find_instance = [](CoordinatorData *coord_data, std::string_view instance_name) -> CoordinatorInstance & {
|
||||
auto instance = std::ranges::find_if(
|
||||
coord_data->registered_instances_,
|
||||
[instance_name](const CoordinatorInstance &instance) { return instance.InstanceName() == instance_name; });
|
||||
|
||||
MG_ASSERT(instance != coord_data->registered_instances_.end(), "Instance {} not found during callback!",
|
||||
instance_name);
|
||||
return *instance;
|
||||
};
|
||||
|
||||
replica_succ_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
|
||||
auto lock = std::lock_guard{coord_data->coord_data_lock_};
|
||||
spdlog::trace("Instance {} performing replica successful callback", instance_name);
|
||||
auto &instance = find_instance(coord_data, instance_name);
|
||||
instance.OnSuccessPing();
|
||||
};
|
||||
|
||||
replica_fail_cb_ = [find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
|
||||
auto lock = std::lock_guard{coord_data->coord_data_lock_};
|
||||
spdlog::trace("Instance {} performing replica failure callback", instance_name);
|
||||
auto &instance = find_instance(coord_data, instance_name);
|
||||
instance.OnFailPing();
|
||||
};
|
||||
|
||||
main_succ_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
|
||||
auto lock = std::lock_guard{coord_data->coord_data_lock_};
|
||||
spdlog::trace("Instance {} performing main successful callback", instance_name);
|
||||
|
||||
auto &instance = find_instance(coord_data, instance_name);
|
||||
|
||||
if (instance.IsAlive()) {
|
||||
instance.OnSuccessPing();
|
||||
} else {
|
||||
auto const new_role = coord_data->ClusterHasAliveMain() ? replication_coordination_glue::ReplicationRole::REPLICA
|
||||
: replication_coordination_glue::ReplicationRole::MAIN;
|
||||
if (new_role == replication_coordination_glue::ReplicationRole::REPLICA) {
|
||||
thread_pool_.AddTask([&instance, coord_data, instance_name]() {
|
||||
auto lock = std::lock_guard{coord_data->coord_data_lock_};
|
||||
spdlog::info("Demoting instance {} to replica", instance_name);
|
||||
instance.PauseFrequentCheck();
|
||||
utils::OnScopeExit scope_exit{[&instance] { instance.ResumeFrequentCheck(); }};
|
||||
auto const status = instance.DemoteToReplica(coord_data->replica_succ_cb_, coord_data->replica_fail_cb_);
|
||||
if (!status) {
|
||||
spdlog::error("Instance {} failed to demote to replica", instance_name);
|
||||
} else {
|
||||
spdlog::info("Instance {} demoted to replica", instance_name);
|
||||
instance.OnSuccessPing();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
instance.OnSuccessPing();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
main_fail_cb_ = [this, find_instance](CoordinatorData *coord_data, std::string_view instance_name) -> void {
|
||||
auto lock = std::lock_guard{coord_data->coord_data_lock_};
|
||||
spdlog::trace("Instance {} performing main failure callback", instance_name);
|
||||
auto &instance = find_instance(coord_data, instance_name);
|
||||
instance.OnFailPing();
|
||||
|
||||
if (!ClusterHasAliveMain()) {
|
||||
spdlog::info("Cluster without main instance, starting automatic failover");
|
||||
switch (auto failover_status = DoFailover(); failover_status) {
|
||||
using enum DoFailoverStatus;
|
||||
case ALL_REPLICAS_DOWN:
|
||||
spdlog::warn("Failover aborted since all replicas are down!");
|
||||
break;
|
||||
case MAIN_ALIVE:
|
||||
spdlog::warn("Failover aborted since main is alive!");
|
||||
break;
|
||||
case RPC_FAILED:
|
||||
spdlog::warn("Failover aborted since promoting replica to main failed!");
|
||||
break;
|
||||
case SUCCESS:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
auto CoordinatorData::ClusterHasAliveMain() const -> bool {
|
||||
auto const alive_main = [](const CoordinatorInstance &instance) { return instance.IsMain() && instance.IsAlive(); };
|
||||
return std::ranges::any_of(registered_instances_, alive_main);
|
||||
}
|
||||
|
||||
auto CoordinatorData::DoFailover() -> DoFailoverStatus {
|
||||
auto replica_instances = registered_instances_ | ranges::views::filter(&CoordinatorInstance::IsReplica);
|
||||
|
||||
auto chosen_replica_instance = std::ranges::find_if(replica_instances, &CoordinatorInstance::IsAlive);
|
||||
if (chosen_replica_instance == replica_instances.end()) {
|
||||
return DoFailoverStatus::ALL_REPLICAS_DOWN;
|
||||
}
|
||||
|
||||
chosen_replica_instance->PauseFrequentCheck();
|
||||
utils::OnScopeExit scope_exit{[&chosen_replica_instance] { chosen_replica_instance->ResumeFrequentCheck(); }};
|
||||
|
||||
std::vector<ReplClientInfo> repl_clients_info;
|
||||
repl_clients_info.reserve(std::ranges::distance(replica_instances));
|
||||
|
||||
auto const not_chosen_replica_instance = [&chosen_replica_instance](const CoordinatorInstance &instance) {
|
||||
return instance != *chosen_replica_instance;
|
||||
};
|
||||
|
||||
std::ranges::transform(replica_instances | ranges::views::filter(not_chosen_replica_instance),
|
||||
std::back_inserter(repl_clients_info),
|
||||
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
|
||||
|
||||
if (!chosen_replica_instance->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
|
||||
return DoFailoverStatus::RPC_FAILED;
|
||||
}
|
||||
return DoFailoverStatus::SUCCESS;
|
||||
}
|
||||
|
||||
auto CoordinatorData::ShowInstances() const -> std::vector<CoordinatorInstanceStatus> {
|
||||
std::vector<CoordinatorInstanceStatus> instances_status;
|
||||
instances_status.reserve(registered_instances_.size());
|
||||
|
||||
auto const stringify_repl_role = [](const CoordinatorInstance &instance) -> std::string {
|
||||
if (!instance.IsAlive()) return "unknown";
|
||||
if (instance.IsMain()) return "main";
|
||||
return "replica";
|
||||
};
|
||||
|
||||
auto const instance_to_status =
|
||||
[&stringify_repl_role](const CoordinatorInstance &instance) -> CoordinatorInstanceStatus {
|
||||
return {.instance_name = instance.InstanceName(),
|
||||
.socket_address = instance.SocketAddress(),
|
||||
.replication_role = stringify_repl_role(instance),
|
||||
.is_alive = instance.IsAlive()};
|
||||
};
|
||||
|
||||
{
|
||||
auto lock = std::shared_lock{coord_data_lock_};
|
||||
std::ranges::transform(registered_instances_, std::back_inserter(instances_status), instance_to_status);
|
||||
}
|
||||
|
||||
return instances_status;
|
||||
}
|
||||
|
||||
auto CoordinatorData::SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus {
|
||||
auto lock = std::lock_guard{coord_data_lock_};
|
||||
|
||||
auto const is_new_main = [&instance_name](const CoordinatorInstance &instance) {
|
||||
return instance.InstanceName() == instance_name;
|
||||
};
|
||||
auto new_main = std::ranges::find_if(registered_instances_, is_new_main);
|
||||
|
||||
if (new_main == registered_instances_.end()) {
|
||||
spdlog::error("Instance {} not registered. Please register it using REGISTER INSTANCE {}", instance_name,
|
||||
instance_name);
|
||||
return SetInstanceToMainCoordinatorStatus::NO_INSTANCE_WITH_NAME;
|
||||
}
|
||||
|
||||
new_main->PauseFrequentCheck();
|
||||
utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }};
|
||||
|
||||
std::vector<CoordinatorClientConfig::ReplicationClientInfo> repl_clients_info;
|
||||
repl_clients_info.reserve(registered_instances_.size() - 1);
|
||||
|
||||
auto const is_not_new_main = [&instance_name](const CoordinatorInstance &instance) {
|
||||
return instance.InstanceName() != instance_name;
|
||||
};
|
||||
std::ranges::transform(registered_instances_ | ranges::views::filter(is_not_new_main),
|
||||
std::back_inserter(repl_clients_info),
|
||||
[](const CoordinatorInstance &instance) { return instance.ReplicationClientInfo(); });
|
||||
|
||||
if (!new_main->PromoteToMain(std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) {
|
||||
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
|
||||
}
|
||||
|
||||
spdlog::info("Instance {} promoted to main", instance_name);
|
||||
return SetInstanceToMainCoordinatorStatus::SUCCESS;
|
||||
}
|
||||
|
||||
auto CoordinatorData::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus {
|
||||
auto lock = std::lock_guard{coord_data_lock_};
|
||||
if (std::ranges::any_of(registered_instances_, [&config](const CoordinatorInstance &instance) {
|
||||
return instance.InstanceName() == config.instance_name;
|
||||
})) {
|
||||
return RegisterInstanceCoordinatorStatus::NAME_EXISTS;
|
||||
}
|
||||
|
||||
if (std::ranges::any_of(registered_instances_, [&config](const CoordinatorInstance &instance) {
|
||||
return instance.SocketAddress() == config.SocketAddress();
|
||||
})) {
|
||||
return RegisterInstanceCoordinatorStatus::END_POINT_EXISTS;
|
||||
}
|
||||
|
||||
try {
|
||||
registered_instances_.emplace_back(this, std::move(config), replica_succ_cb_, replica_fail_cb_);
|
||||
return RegisterInstanceCoordinatorStatus::SUCCESS;
|
||||
|
||||
} catch (CoordinatorRegisterInstanceException const &) {
|
||||
return RegisterInstanceCoordinatorStatus::RPC_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
|
||||
#endif
|
84
src/coordination/coordinator_instance.cpp
Normal file
84
src/coordination/coordinator_instance.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_instance.hpp"
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
CoordinatorInstance::CoordinatorInstance(CoordinatorData *data, CoordinatorClientConfig config,
|
||||
HealthCheckCallback succ_cb, HealthCheckCallback fail_cb)
|
||||
: client_(data, std::move(config), std::move(succ_cb), std::move(fail_cb)),
|
||||
replication_role_(replication_coordination_glue::ReplicationRole::REPLICA),
|
||||
is_alive_(true) {
|
||||
if (!client_.DemoteToReplica()) {
|
||||
throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica", client_.InstanceName());
|
||||
}
|
||||
client_.StartFrequentCheck();
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::OnSuccessPing() -> void {
|
||||
last_response_time_ = std::chrono::system_clock::now();
|
||||
is_alive_ = true;
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::OnFailPing() -> bool {
|
||||
is_alive_ =
|
||||
std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now() - last_response_time_).count() <
|
||||
CoordinatorClusterConfig::alive_response_time_difference_sec_;
|
||||
return is_alive_;
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::InstanceName() const -> std::string { return client_.InstanceName(); }
|
||||
auto CoordinatorInstance::SocketAddress() const -> std::string { return client_.SocketAddress(); }
|
||||
auto CoordinatorInstance::IsAlive() const -> bool { return is_alive_; }
|
||||
|
||||
auto CoordinatorInstance::IsReplica() const -> bool {
|
||||
return replication_role_ == replication_coordination_glue::ReplicationRole::REPLICA;
|
||||
}
|
||||
auto CoordinatorInstance::IsMain() const -> bool {
|
||||
return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN;
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
|
||||
HealthCheckCallback main_fail_cb) -> bool {
|
||||
if (!client_.SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
replication_role_ = replication_coordination_glue::ReplicationRole::MAIN;
|
||||
client_.SetCallbacks(std::move(main_succ_cb), std::move(main_fail_cb));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb)
|
||||
-> bool {
|
||||
if (!client_.DemoteToReplica()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
replication_role_ = replication_coordination_glue::ReplicationRole::REPLICA;
|
||||
client_.SetCallbacks(std::move(replica_succ_cb), std::move(replica_fail_cb));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
auto CoordinatorInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); }
|
||||
auto CoordinatorInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); }
|
||||
|
||||
auto CoordinatorInstance::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo {
|
||||
return client_.ReplicationClientInfo();
|
||||
}
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -36,6 +36,22 @@ void PromoteReplicaToMainRes::Load(PromoteReplicaToMainRes *self, memgraph::slk:
|
||||
memgraph::slk::Load(self, reader);
|
||||
}
|
||||
|
||||
void SetMainToReplicaReq::Save(const SetMainToReplicaReq &self, memgraph::slk::Builder *builder) {
|
||||
memgraph::slk::Save(self, builder);
|
||||
}
|
||||
|
||||
void SetMainToReplicaReq::Load(SetMainToReplicaReq *self, memgraph::slk::Reader *reader) {
|
||||
memgraph::slk::Load(self, reader);
|
||||
}
|
||||
|
||||
void SetMainToReplicaRes::Save(const SetMainToReplicaRes &self, memgraph::slk::Builder *builder) {
|
||||
memgraph::slk::Save(self, builder);
|
||||
}
|
||||
|
||||
void SetMainToReplicaRes::Load(SetMainToReplicaRes *self, memgraph::slk::Reader *reader) {
|
||||
memgraph::slk::Load(self, reader);
|
||||
}
|
||||
|
||||
} // namespace coordination
|
||||
|
||||
constexpr utils::TypeInfo coordination::PromoteReplicaToMainReq::kType{utils::TypeId::COORD_FAILOVER_REQ,
|
||||
@ -44,6 +60,12 @@ constexpr utils::TypeInfo coordination::PromoteReplicaToMainReq::kType{utils::Ty
|
||||
constexpr utils::TypeInfo coordination::PromoteReplicaToMainRes::kType{utils::TypeId::COORD_FAILOVER_RES,
|
||||
"CoordPromoteReplicaToMainRes", nullptr};
|
||||
|
||||
constexpr utils::TypeInfo coordination::SetMainToReplicaReq::kType{utils::TypeId::COORD_SET_REPL_MAIN_REQ,
|
||||
"CoordDemoteToReplicaReq", nullptr};
|
||||
|
||||
constexpr utils::TypeInfo coordination::SetMainToReplicaRes::kType{utils::TypeId::COORD_SET_REPL_MAIN_RES,
|
||||
"CoordDemoteToReplicaRes", nullptr};
|
||||
|
||||
namespace slk {
|
||||
|
||||
void Save(const memgraph::coordination::PromoteReplicaToMainRes &self, memgraph::slk::Builder *builder) {
|
||||
@ -62,6 +84,22 @@ void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::
|
||||
memgraph::slk::Load(&self->replication_clients_info, reader);
|
||||
}
|
||||
|
||||
void Save(const memgraph::coordination::SetMainToReplicaReq &self, memgraph::slk::Builder *builder) {
|
||||
memgraph::slk::Save(self.replication_client_info, builder);
|
||||
}
|
||||
|
||||
void Load(memgraph::coordination::SetMainToReplicaReq *self, memgraph::slk::Reader *reader) {
|
||||
memgraph::slk::Load(&self->replication_client_info, reader);
|
||||
}
|
||||
|
||||
void Save(const memgraph::coordination::SetMainToReplicaRes &self, memgraph::slk::Builder *builder) {
|
||||
memgraph::slk::Save(self.success, builder);
|
||||
}
|
||||
|
||||
void Load(memgraph::coordination::SetMainToReplicaRes *self, memgraph::slk::Reader *reader) {
|
||||
memgraph::slk::Load(&self->success, reader);
|
||||
}
|
||||
|
||||
} // namespace slk
|
||||
|
||||
} // namespace memgraph
|
||||
|
@ -9,207 +9,75 @@
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#include "coordination/coordinator_state.hpp"
|
||||
#include <span>
|
||||
#include "coordination/coordinator_client.hpp"
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_state.hpp"
|
||||
|
||||
#include "coordination/coordinator_config.hpp"
|
||||
#include "coordination/coordinator_entity_info.hpp"
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
#include "flags/replication.hpp"
|
||||
#include "spdlog/spdlog.h"
|
||||
#include "utils/logging.hpp"
|
||||
#include "utils/variant_helpers.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <exception>
|
||||
#include <optional>
|
||||
#include <algorithm>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
namespace {
|
||||
|
||||
bool CheckName(const std::list<CoordinatorClient> &replicas, const CoordinatorClientConfig &config) {
|
||||
auto name_matches = [&instance_name = config.instance_name](auto const &replica) {
|
||||
return replica.InstanceName() == instance_name;
|
||||
};
|
||||
return std::any_of(replicas.begin(), replicas.end(), name_matches);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
CoordinatorState::CoordinatorState() {
|
||||
MG_ASSERT(!(FLAGS_coordinator && FLAGS_coordinator_server_port),
|
||||
"Instance cannot be a coordinator and have registered coordinator server.");
|
||||
|
||||
spdlog::info("Executing coordinator constructor");
|
||||
if (FLAGS_coordinator_server_port) {
|
||||
spdlog::info("Coordinator server port set");
|
||||
auto const config = CoordinatorServerConfig{
|
||||
.ip_address = kDefaultReplicationServerIp,
|
||||
.port = static_cast<uint16_t>(FLAGS_coordinator_server_port),
|
||||
};
|
||||
spdlog::info("Executing coordinator constructor main replica");
|
||||
|
||||
data_ = CoordinatorMainReplicaData{.coordinator_server_ = std::make_unique<CoordinatorServer>(config)};
|
||||
}
|
||||
}
|
||||
|
||||
auto CoordinatorState::RegisterReplica(const CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus, CoordinatorClient *> {
|
||||
const auto name_endpoint_status =
|
||||
std::visit(memgraph::utils::Overloaded{[](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) {
|
||||
return RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR;
|
||||
},
|
||||
[&config](const CoordinatorData &coordinator_data) {
|
||||
if (memgraph::coordination::CheckName(
|
||||
coordinator_data.registered_replicas_, config)) {
|
||||
return RegisterMainReplicaCoordinatorStatus::NAME_EXISTS;
|
||||
}
|
||||
return RegisterMainReplicaCoordinatorStatus::SUCCESS;
|
||||
}},
|
||||
data_);
|
||||
auto CoordinatorState::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_),
|
||||
"Coordinator cannot register replica since variant holds wrong alternative");
|
||||
|
||||
if (name_endpoint_status != RegisterMainReplicaCoordinatorStatus::SUCCESS) {
|
||||
return name_endpoint_status;
|
||||
}
|
||||
|
||||
// Maybe no need to return client if you can start replica client here
|
||||
return &std::get<CoordinatorData>(data_).registered_replicas_.emplace_back(config);
|
||||
}
|
||||
|
||||
auto CoordinatorState::RegisterMain(const CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus, CoordinatorClient *> {
|
||||
const auto endpoint_status = std::visit(
|
||||
return std::visit(
|
||||
memgraph::utils::Overloaded{
|
||||
[](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) {
|
||||
return RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR;
|
||||
return RegisterInstanceCoordinatorStatus::NOT_COORDINATOR;
|
||||
},
|
||||
[](const CoordinatorData & /*coordinator_data*/) { return RegisterMainReplicaCoordinatorStatus::SUCCESS; }},
|
||||
[config](CoordinatorData &coordinator_data) { return coordinator_data.RegisterInstance(config); }},
|
||||
data_);
|
||||
|
||||
if (endpoint_status != RegisterMainReplicaCoordinatorStatus::SUCCESS) {
|
||||
return endpoint_status;
|
||||
}
|
||||
|
||||
auto ®istered_main = std::get<CoordinatorData>(data_).registered_main_;
|
||||
registered_main = std::make_unique<CoordinatorClient>(config);
|
||||
return registered_main.get();
|
||||
}
|
||||
|
||||
auto CoordinatorState::ShowReplicas() const -> std::vector<CoordinatorEntityInfo> {
|
||||
auto CoordinatorState::SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_),
|
||||
"Can't call show replicas on data_, as variant holds wrong alternative");
|
||||
std::vector<CoordinatorEntityInfo> result;
|
||||
const auto ®istered_replicas = std::get<CoordinatorData>(data_).registered_replicas_;
|
||||
result.reserve(registered_replicas.size());
|
||||
std::ranges::transform(registered_replicas, std::back_inserter(result), [](const auto &replica) {
|
||||
return CoordinatorEntityInfo{replica.InstanceName(), replica.Endpoint()};
|
||||
});
|
||||
return result;
|
||||
"Coordinator cannot register replica since variant holds wrong alternative");
|
||||
|
||||
return std::visit(
|
||||
memgraph::utils::Overloaded{[](const CoordinatorMainReplicaData & /*coordinator_main_replica_data*/) {
|
||||
return SetInstanceToMainCoordinatorStatus::NOT_COORDINATOR;
|
||||
},
|
||||
[&instance_name](CoordinatorData &coordinator_data) {
|
||||
return coordinator_data.SetInstanceToMain(instance_name);
|
||||
}},
|
||||
data_);
|
||||
}
|
||||
|
||||
auto CoordinatorState::ShowMain() const -> std::optional<CoordinatorEntityInfo> {
|
||||
auto CoordinatorState::ShowInstances() const -> std::vector<CoordinatorInstanceStatus> {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_),
|
||||
"Can't call show main on data_, as variant holds wrong alternative");
|
||||
const auto ®istered_main = std::get<CoordinatorData>(data_).registered_main_;
|
||||
if (registered_main) {
|
||||
return CoordinatorEntityInfo{registered_main->InstanceName(), registered_main->Endpoint()};
|
||||
}
|
||||
return std::nullopt;
|
||||
"Can't call show instances on data_, as variant holds wrong alternative");
|
||||
return std::get<CoordinatorData>(data_).ShowInstances();
|
||||
}
|
||||
|
||||
auto CoordinatorState::PingReplicas() const -> std::unordered_map<std::string_view, bool> {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_),
|
||||
"Can't call ping replicas on data_, as variant holds wrong alternative");
|
||||
std::unordered_map<std::string_view, bool> result;
|
||||
const auto ®istered_replicas = std::get<CoordinatorData>(data_).registered_replicas_;
|
||||
result.reserve(registered_replicas.size());
|
||||
for (const CoordinatorClient &replica_client : registered_replicas) {
|
||||
result.emplace(replica_client.InstanceName(), replica_client.DoHealthCheck());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto CoordinatorState::PingMain() const -> std::optional<CoordinatorEntityHealthInfo> {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_),
|
||||
"Can't call show main on data_, as variant holds wrong alternative");
|
||||
const auto ®istered_main = std::get<CoordinatorData>(data_).registered_main_;
|
||||
if (registered_main) {
|
||||
return CoordinatorEntityHealthInfo{registered_main->InstanceName(), registered_main->DoHealthCheck()};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto CoordinatorState::DoFailover() -> DoFailoverStatus {
|
||||
// 1. MAIN is already down, stop sending frequent checks
|
||||
// 2. find new replica (coordinator)
|
||||
// 3. make copy replica's client as potential new main client (coordinator)
|
||||
// 4. send failover RPC to new main (coordinator and new main)
|
||||
// 5. exchange old main to new main (coordinator)
|
||||
// 6. remove replica which was promoted to main from all replicas -> this will shut down RPC frequent check client
|
||||
// (coordinator)
|
||||
// 7. for new main start frequent checks (coordinator)
|
||||
|
||||
[[nodiscard]] auto CoordinatorState::DoFailover() -> DoFailoverStatus {
|
||||
MG_ASSERT(std::holds_alternative<CoordinatorData>(data_), "Cannot do failover since variant holds wrong alternative");
|
||||
using ReplicationClientInfo = CoordinatorClientConfig::ReplicationClientInfo;
|
||||
|
||||
// 1.
|
||||
auto ¤t_main = std::get<CoordinatorData>(data_).registered_main_;
|
||||
|
||||
if (!current_main) {
|
||||
return DoFailoverStatus::CLUSTER_UNINITIALIZED;
|
||||
}
|
||||
|
||||
if (current_main->DoHealthCheck()) {
|
||||
return DoFailoverStatus::MAIN_ALIVE;
|
||||
}
|
||||
current_main->StopFrequentCheck();
|
||||
|
||||
// 2.
|
||||
// Get all replicas and find new main
|
||||
auto ®istered_replicas = std::get<CoordinatorData>(data_).registered_replicas_;
|
||||
|
||||
const auto chosen_replica = std::ranges::find_if(
|
||||
registered_replicas, [](const CoordinatorClient &replica) { return replica.DoHealthCheck(); });
|
||||
if (chosen_replica == registered_replicas.end()) {
|
||||
return DoFailoverStatus::ALL_REPLICAS_DOWN;
|
||||
}
|
||||
|
||||
std::vector<ReplicationClientInfo> repl_clients_info;
|
||||
repl_clients_info.reserve(registered_replicas.size() - 1);
|
||||
std::ranges::for_each(registered_replicas, [&chosen_replica, &repl_clients_info](const CoordinatorClient &replica) {
|
||||
if (replica != *chosen_replica) {
|
||||
repl_clients_info.emplace_back(replica.ReplicationClientInfo());
|
||||
}
|
||||
});
|
||||
|
||||
// 3.
|
||||
// Set on coordinator data of new main
|
||||
// allocate resources for new main, clear replication info on this replica as main
|
||||
// set last response time
|
||||
auto potential_new_main = std::make_unique<CoordinatorClient>(chosen_replica->Config());
|
||||
potential_new_main->ReplicationClientInfo().reset();
|
||||
potential_new_main->UpdateTimeCheck(chosen_replica->GetLastTimeResponse());
|
||||
|
||||
// 4.
|
||||
if (!chosen_replica->SendPromoteReplicaToMainRpc(std::move(repl_clients_info))) {
|
||||
spdlog::error("Sent RPC message, but exception was caught, aborting Failover");
|
||||
// TODO: new status and rollback all changes that were done...
|
||||
MG_ASSERT(false, "RPC message failed");
|
||||
}
|
||||
|
||||
// 5.
|
||||
current_main = std::move(potential_new_main);
|
||||
|
||||
// 6. remove old replica
|
||||
// TODO: Stop pinging chosen_replica before failover.
|
||||
// Check that it doesn't fail when you call StopFrequentCheck if it is already stopped
|
||||
registered_replicas.erase(chosen_replica);
|
||||
|
||||
// 7.
|
||||
current_main->StartFrequentCheck();
|
||||
|
||||
return DoFailoverStatus::SUCCESS;
|
||||
auto &coord_state = std::get<CoordinatorData>(data_);
|
||||
return coord_state.DoFailover();
|
||||
}
|
||||
|
||||
auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & {
|
||||
|
@ -16,53 +16,56 @@
|
||||
#include "coordination/coordinator_config.hpp"
|
||||
#include "rpc/client.hpp"
|
||||
#include "utils/scheduler.hpp"
|
||||
#include "utils/thread_pool.hpp"
|
||||
|
||||
#include <string_view>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
class CoordinatorData;
|
||||
using HealthCheckCallback = std::function<void(CoordinatorData *, std::string_view)>;
|
||||
using ReplicationClientsInfo = std::vector<ReplClientInfo>;
|
||||
|
||||
class CoordinatorClient {
|
||||
public:
|
||||
explicit CoordinatorClient(const CoordinatorClientConfig &config);
|
||||
explicit CoordinatorClient(CoordinatorData *coord_data_, CoordinatorClientConfig config, HealthCheckCallback succ_cb,
|
||||
HealthCheckCallback fail_cb);
|
||||
|
||||
~CoordinatorClient();
|
||||
~CoordinatorClient() = default;
|
||||
|
||||
CoordinatorClient(CoordinatorClient &other) = delete;
|
||||
CoordinatorClient &operator=(CoordinatorClient const &other) = delete;
|
||||
CoordinatorClient(CoordinatorClient &) = delete;
|
||||
CoordinatorClient &operator=(CoordinatorClient const &) = delete;
|
||||
|
||||
CoordinatorClient(CoordinatorClient &&) noexcept = delete;
|
||||
CoordinatorClient &operator=(CoordinatorClient &&) noexcept = delete;
|
||||
|
||||
void StartFrequentCheck();
|
||||
void StopFrequentCheck();
|
||||
void PauseFrequentCheck();
|
||||
void ResumeFrequentCheck();
|
||||
|
||||
auto DoHealthCheck() const -> bool;
|
||||
auto SendPromoteReplicaToMainRpc(
|
||||
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info) const -> bool;
|
||||
auto InstanceName() const -> std::string;
|
||||
auto SocketAddress() const -> std::string;
|
||||
|
||||
auto InstanceName() const -> std::string_view;
|
||||
auto Endpoint() const -> io::network::Endpoint const &;
|
||||
auto Config() const -> CoordinatorClientConfig const &;
|
||||
auto ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo const &;
|
||||
auto ReplicationClientInfo() -> std::optional<CoordinatorClientConfig::ReplicationClientInfo> &;
|
||||
void UpdateTimeCheck(const std::chrono::system_clock::time_point &last_checked_time);
|
||||
auto GetLastTimeResponse() -> std::chrono::system_clock::time_point;
|
||||
[[nodiscard]] auto SendPromoteReplicaToMainRpc(ReplicationClientsInfo replication_clients_info) const -> bool;
|
||||
[[nodiscard]] auto DemoteToReplica() const -> bool;
|
||||
|
||||
auto ReplicationClientInfo() const -> ReplClientInfo;
|
||||
|
||||
auto SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void;
|
||||
|
||||
friend bool operator==(CoordinatorClient const &first, CoordinatorClient const &second) {
|
||||
return first.config_ == second.config_;
|
||||
}
|
||||
|
||||
private:
|
||||
utils::ThreadPool thread_pool_{1};
|
||||
utils::Scheduler replica_checker_;
|
||||
utils::Scheduler instance_checker_;
|
||||
|
||||
// TODO: (andi) Pimpl?
|
||||
communication::ClientContext rpc_context_;
|
||||
mutable rpc::Client rpc_client_;
|
||||
CoordinatorClientConfig config_;
|
||||
|
||||
std::atomic<std::chrono::system_clock::time_point> last_response_time_{};
|
||||
static constexpr int alive_response_time_difference_sec_{5};
|
||||
CoordinatorClientConfig config_;
|
||||
CoordinatorData *coord_data_;
|
||||
HealthCheckCallback succ_cb_;
|
||||
HealthCheckCallback fail_cb_;
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
|
@ -0,0 +1,22 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
namespace memgraph::coordination {
|
||||
|
||||
struct CoordinatorClusterConfig {
|
||||
static constexpr int alive_response_time_difference_sec_{5};
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -25,16 +25,14 @@ namespace memgraph::coordination {
|
||||
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
|
||||
|
||||
struct CoordinatorClientConfig {
|
||||
const std::string instance_name;
|
||||
const std::string ip_address;
|
||||
const uint16_t port{};
|
||||
std::string instance_name;
|
||||
std::string ip_address;
|
||||
uint16_t port{};
|
||||
std::chrono::seconds health_check_frequency_sec{1};
|
||||
|
||||
// Frequency with which coordinator pings main/replicas about it status
|
||||
const std::chrono::seconds health_check_frequency_sec{1};
|
||||
auto SocketAddress() const -> std::string { return ip_address + ":" + std::to_string(port); }
|
||||
|
||||
// Info which coordinator will send to new main when performing failover
|
||||
struct ReplicationClientInfo {
|
||||
// Should be the same as CoordinatorClientConfig's instance_name
|
||||
std::string instance_name;
|
||||
replication_coordination_glue::ReplicationMode replication_mode{};
|
||||
std::string replication_ip_address;
|
||||
@ -43,20 +41,22 @@ struct CoordinatorClientConfig {
|
||||
friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default;
|
||||
};
|
||||
|
||||
std::optional<ReplicationClientInfo> replication_client_info;
|
||||
ReplicationClientInfo replication_client_info;
|
||||
|
||||
struct SSL {
|
||||
const std::string key_file;
|
||||
const std::string cert_file;
|
||||
std::string key_file;
|
||||
std::string cert_file;
|
||||
|
||||
friend bool operator==(const SSL &, const SSL &) = default;
|
||||
};
|
||||
|
||||
const std::optional<SSL> ssl;
|
||||
std::optional<SSL> ssl;
|
||||
|
||||
friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default;
|
||||
};
|
||||
|
||||
using ReplClientInfo = CoordinatorClientConfig::ReplicationClientInfo;
|
||||
|
||||
struct CoordinatorServerConfig {
|
||||
std::string ip_address;
|
||||
uint16_t port{};
|
||||
|
53
src/coordination/include/coordination/coordinator_data.hpp
Normal file
53
src/coordination/include/coordination/coordinator_data.hpp
Normal file
@ -0,0 +1,53 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_instance.hpp"
|
||||
#include "coordination/coordinator_instance_status.hpp"
|
||||
#include "coordination/coordinator_server.hpp"
|
||||
#include "coordination/failover_status.hpp"
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
#include "utils/rw_lock.hpp"
|
||||
#include "utils/thread_pool.hpp"
|
||||
|
||||
#include <list>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
class CoordinatorData {
|
||||
public:
|
||||
CoordinatorData();
|
||||
|
||||
[[nodiscard]] auto DoFailover() -> DoFailoverStatus;
|
||||
|
||||
[[nodiscard]] auto RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus;
|
||||
[[nodiscard]] auto SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus;
|
||||
|
||||
auto ShowInstances() const -> std::vector<CoordinatorInstanceStatus>;
|
||||
|
||||
private:
|
||||
auto ClusterHasAliveMain() const -> bool;
|
||||
|
||||
mutable utils::RWLock coord_data_lock_{utils::RWLock::Priority::READ};
|
||||
HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_;
|
||||
// Must be std::list because we rely on pointer stability
|
||||
std::list<CoordinatorInstance> registered_instances_;
|
||||
utils::ThreadPool thread_pool_{1};
|
||||
};
|
||||
|
||||
struct CoordinatorMainReplicaData {
|
||||
std::unique_ptr<CoordinatorServer> coordinator_server_;
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -16,16 +16,16 @@
|
||||
#include "utils/exceptions.hpp"
|
||||
|
||||
namespace memgraph::coordination {
|
||||
class CoordinatorFailoverException final : public utils::BasicException {
|
||||
class CoordinatorRegisterInstanceException final : public utils::BasicException {
|
||||
public:
|
||||
explicit CoordinatorFailoverException(const std::string_view what) noexcept
|
||||
: BasicException("Failover didn't complete successfully: " + std::string(what)) {}
|
||||
explicit CoordinatorRegisterInstanceException(const std::string_view what) noexcept
|
||||
: BasicException("Failed to create instance: " + std::string(what)) {}
|
||||
|
||||
template <class... Args>
|
||||
explicit CoordinatorFailoverException(fmt::format_string<Args...> fmt, Args &&...args) noexcept
|
||||
: CoordinatorFailoverException(fmt::format(fmt, std::forward<Args>(args)...)) {}
|
||||
explicit CoordinatorRegisterInstanceException(fmt::format_string<Args...> fmt, Args &&...args) noexcept
|
||||
: CoordinatorRegisterInstanceException(fmt::format(fmt, std::forward<Args>(args)...)) {}
|
||||
|
||||
SPECIALIZE_GET_EXCEPTION_NAME(CoordinatorFailoverException)
|
||||
SPECIALIZE_GET_EXCEPTION_NAME(CoordinatorRegisterInstanceException)
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
|
@ -0,0 +1,68 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_client.hpp"
|
||||
#include "coordination/coordinator_cluster_config.hpp"
|
||||
#include "coordination/coordinator_exceptions.hpp"
|
||||
#include "replication_coordination_glue/role.hpp"
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
class CoordinatorData;
|
||||
|
||||
class CoordinatorInstance {
|
||||
public:
|
||||
CoordinatorInstance(CoordinatorData *data, CoordinatorClientConfig config, HealthCheckCallback succ_cb,
|
||||
HealthCheckCallback fail_cb);
|
||||
|
||||
CoordinatorInstance(CoordinatorInstance const &other) = delete;
|
||||
CoordinatorInstance &operator=(CoordinatorInstance const &other) = delete;
|
||||
CoordinatorInstance(CoordinatorInstance &&other) noexcept = delete;
|
||||
CoordinatorInstance &operator=(CoordinatorInstance &&other) noexcept = delete;
|
||||
~CoordinatorInstance() = default;
|
||||
|
||||
auto OnSuccessPing() -> void;
|
||||
auto OnFailPing() -> bool;
|
||||
|
||||
auto IsAlive() const -> bool;
|
||||
|
||||
auto InstanceName() const -> std::string;
|
||||
auto SocketAddress() const -> std::string;
|
||||
|
||||
auto IsReplica() const -> bool;
|
||||
auto IsMain() const -> bool;
|
||||
|
||||
auto PromoteToMain(ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb,
|
||||
HealthCheckCallback main_fail_cb) -> bool;
|
||||
auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool;
|
||||
|
||||
auto PauseFrequentCheck() -> void;
|
||||
auto ResumeFrequentCheck() -> void;
|
||||
|
||||
auto ReplicationClientInfo() const -> ReplClientInfo;
|
||||
|
||||
private:
|
||||
CoordinatorClient client_;
|
||||
replication_coordination_glue::ReplicationRole replication_role_;
|
||||
std::chrono::system_clock::time_point last_response_time_{};
|
||||
bool is_alive_{false};
|
||||
|
||||
friend bool operator==(CoordinatorInstance const &first, CoordinatorInstance const &second) {
|
||||
return first.client_ == second.client_ && first.replication_role_ == second.replication_role_;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -15,18 +15,15 @@
|
||||
|
||||
#include "io/network/endpoint.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
struct CoordinatorEntityInfo {
|
||||
std::string_view name;
|
||||
const io::network::Endpoint &endpoint;
|
||||
};
|
||||
|
||||
struct CoordinatorEntityHealthInfo {
|
||||
std::string_view name;
|
||||
bool alive;
|
||||
struct CoordinatorInstanceStatus {
|
||||
std::string instance_name;
|
||||
std::string socket_address;
|
||||
std::string replication_role;
|
||||
bool is_alive;
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
@ -48,6 +48,36 @@ struct PromoteReplicaToMainRes {
|
||||
|
||||
using PromoteReplicaToMainRpc = rpc::RequestResponse<PromoteReplicaToMainReq, PromoteReplicaToMainRes>;
|
||||
|
||||
struct SetMainToReplicaReq {
|
||||
static const utils::TypeInfo kType;
|
||||
static const utils::TypeInfo &GetTypeInfo() { return kType; }
|
||||
|
||||
static void Load(SetMainToReplicaReq *self, memgraph::slk::Reader *reader);
|
||||
static void Save(const SetMainToReplicaReq &self, memgraph::slk::Builder *builder);
|
||||
|
||||
explicit SetMainToReplicaReq(CoordinatorClientConfig::ReplicationClientInfo replication_client_info)
|
||||
: replication_client_info(std::move(replication_client_info)) {}
|
||||
|
||||
SetMainToReplicaReq() = default;
|
||||
|
||||
CoordinatorClientConfig::ReplicationClientInfo replication_client_info;
|
||||
};
|
||||
|
||||
struct SetMainToReplicaRes {
|
||||
static const utils::TypeInfo kType;
|
||||
static const utils::TypeInfo &GetTypeInfo() { return kType; }
|
||||
|
||||
static void Load(SetMainToReplicaRes *self, memgraph::slk::Reader *reader);
|
||||
static void Save(const SetMainToReplicaRes &self, memgraph::slk::Builder *builder);
|
||||
|
||||
explicit SetMainToReplicaRes(bool success) : success(success) {}
|
||||
SetMainToReplicaRes() = default;
|
||||
|
||||
bool success;
|
||||
};
|
||||
|
||||
using SetMainToReplicaRpc = rpc::RequestResponse<SetMainToReplicaReq, SetMainToReplicaRes>;
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
|
||||
// SLK serialization declarations
|
||||
@ -61,6 +91,14 @@ void Save(const memgraph::coordination::PromoteReplicaToMainReq &self, memgraph:
|
||||
|
||||
void Load(memgraph::coordination::PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
|
||||
|
||||
void Save(const memgraph::coordination::SetMainToReplicaRes &self, memgraph::slk::Builder *builder);
|
||||
|
||||
void Load(memgraph::coordination::SetMainToReplicaRes *self, memgraph::slk::Reader *reader);
|
||||
|
||||
void Save(const memgraph::coordination::SetMainToReplicaReq &self, memgraph::slk::Builder *builder);
|
||||
|
||||
void Load(memgraph::coordination::SetMainToReplicaReq *self, memgraph::slk::Reader *reader);
|
||||
|
||||
} // namespace memgraph::slk
|
||||
|
||||
#endif
|
||||
|
@ -13,29 +13,16 @@
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "coordination/coordinator_client.hpp"
|
||||
#include "coordination/coordinator_entity_info.hpp"
|
||||
#include "coordination/coordinator_data.hpp"
|
||||
#include "coordination/coordinator_instance_status.hpp"
|
||||
#include "coordination/coordinator_server.hpp"
|
||||
#include "rpc/server.hpp"
|
||||
#include "utils/result.hpp"
|
||||
#include "utils/rw_spin_lock.hpp"
|
||||
#include "utils/synchronized.hpp"
|
||||
#include "coordination/failover_status.hpp"
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
|
||||
#include <list>
|
||||
#include <variant>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
enum class RegisterMainReplicaCoordinatorStatus : uint8_t {
|
||||
NAME_EXISTS,
|
||||
END_POINT_EXISTS,
|
||||
COULD_NOT_BE_PERSISTED,
|
||||
NOT_COORDINATOR,
|
||||
SUCCESS
|
||||
};
|
||||
|
||||
enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, CLUSTER_UNINITIALIZED };
|
||||
|
||||
class CoordinatorState {
|
||||
public:
|
||||
CoordinatorState();
|
||||
@ -44,49 +31,21 @@ class CoordinatorState {
|
||||
CoordinatorState(const CoordinatorState &) = delete;
|
||||
CoordinatorState &operator=(const CoordinatorState &) = delete;
|
||||
|
||||
CoordinatorState(CoordinatorState &&other) noexcept : data_(std::move(other.data_)) {}
|
||||
CoordinatorState(CoordinatorState &&) noexcept = delete;
|
||||
CoordinatorState &operator=(CoordinatorState &&) noexcept = delete;
|
||||
|
||||
CoordinatorState &operator=(CoordinatorState &&other) noexcept {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
data_ = std::move(other.data_);
|
||||
return *this;
|
||||
}
|
||||
[[nodiscard]] auto RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus;
|
||||
|
||||
auto RegisterReplica(const CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus, CoordinatorClient *>;
|
||||
[[nodiscard]] auto SetInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus;
|
||||
|
||||
auto RegisterMain(const CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus, CoordinatorClient *>;
|
||||
|
||||
auto ShowReplicas() const -> std::vector<CoordinatorEntityInfo>;
|
||||
|
||||
auto PingReplicas() const -> std::unordered_map<std::string_view, bool>;
|
||||
|
||||
auto ShowMain() const -> std::optional<CoordinatorEntityInfo>;
|
||||
|
||||
auto PingMain() const -> std::optional<CoordinatorEntityHealthInfo>;
|
||||
auto ShowInstances() const -> std::vector<CoordinatorInstanceStatus>;
|
||||
|
||||
// The client code must check that the server exists before calling this method.
|
||||
auto GetCoordinatorServer() const -> CoordinatorServer &;
|
||||
|
||||
auto DoFailover() -> DoFailoverStatus;
|
||||
[[nodiscard]] auto DoFailover() -> DoFailoverStatus;
|
||||
|
||||
private:
|
||||
// TODO: Data is not thread safe
|
||||
|
||||
// Coordinator stores registered replicas and main
|
||||
struct CoordinatorData {
|
||||
std::list<CoordinatorClient> registered_replicas_;
|
||||
std::unique_ptr<CoordinatorClient> registered_main_;
|
||||
};
|
||||
|
||||
// Data which each main and replica stores
|
||||
struct CoordinatorMainReplicaData {
|
||||
std::unique_ptr<CoordinatorServer> coordinator_server_;
|
||||
};
|
||||
|
||||
std::variant<CoordinatorData, CoordinatorMainReplicaData> data_;
|
||||
};
|
||||
|
||||
|
21
src/coordination/include/coordination/failover_status.hpp
Normal file
21
src/coordination/include/coordination/failover_status.hpp
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, RPC_FAILED };
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -0,0 +1,36 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
|
||||
enum class RegisterInstanceCoordinatorStatus : uint8_t {
|
||||
NAME_EXISTS,
|
||||
END_POINT_EXISTS,
|
||||
NOT_COORDINATOR,
|
||||
RPC_FAILED,
|
||||
SUCCESS
|
||||
};
|
||||
|
||||
enum class SetInstanceToMainCoordinatorStatus : uint8_t {
|
||||
NO_INSTANCE_WITH_NAME,
|
||||
NOT_COORDINATOR,
|
||||
SUCCESS,
|
||||
COULD_NOT_PROMOTE_TO_MAIN,
|
||||
};
|
||||
|
||||
} // namespace memgraph::coordination
|
||||
#endif
|
@ -9,6 +9,7 @@
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
#ifdef MG_ENTERPRISE
|
||||
|
||||
#include "dbms/coordinator_handler.hpp"
|
||||
@ -19,81 +20,19 @@ namespace memgraph::dbms {
|
||||
|
||||
CoordinatorHandler::CoordinatorHandler(DbmsHandler &dbms_handler) : dbms_handler_(dbms_handler) {}
|
||||
|
||||
auto CoordinatorHandler::RegisterReplicaOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus> {
|
||||
auto instance_client = dbms_handler_.CoordinatorState().RegisterReplica(config);
|
||||
using repl_status = memgraph::coordination::RegisterMainReplicaCoordinatorStatus;
|
||||
using dbms_status = memgraph::dbms::RegisterMainReplicaCoordinatorStatus;
|
||||
if (instance_client.HasError()) {
|
||||
switch (instance_client.GetError()) {
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR:
|
||||
MG_ASSERT(false, "Only coordinator instance can register main and replica!");
|
||||
return {};
|
||||
case repl_status::NAME_EXISTS:
|
||||
return dbms_status::NAME_EXISTS;
|
||||
case repl_status::END_POINT_EXISTS:
|
||||
return dbms_status::END_POINT_EXISTS;
|
||||
case repl_status::COULD_NOT_BE_PERSISTED:
|
||||
return dbms_status::COULD_NOT_BE_PERSISTED;
|
||||
case repl_status::SUCCESS:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
instance_client.GetValue()->StartFrequentCheck();
|
||||
return {};
|
||||
auto CoordinatorHandler::RegisterInstance(memgraph::coordination::CoordinatorClientConfig config)
|
||||
-> coordination::RegisterInstanceCoordinatorStatus {
|
||||
return dbms_handler_.CoordinatorState().RegisterInstance(config);
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::RegisterMainOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus> {
|
||||
auto instance_client = dbms_handler_.CoordinatorState().RegisterMain(config);
|
||||
if (instance_client.HasError()) switch (instance_client.GetError()) {
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NOT_COORDINATOR:
|
||||
MG_ASSERT(false, "Only coordinator instance can register main and replica!");
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::NAME_EXISTS:
|
||||
return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::NAME_EXISTS;
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::END_POINT_EXISTS:
|
||||
return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::END_POINT_EXISTS;
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::COULD_NOT_BE_PERSISTED:
|
||||
return memgraph::dbms::RegisterMainReplicaCoordinatorStatus::COULD_NOT_BE_PERSISTED;
|
||||
case memgraph::coordination::RegisterMainReplicaCoordinatorStatus::SUCCESS:
|
||||
break;
|
||||
}
|
||||
|
||||
instance_client.GetValue()->StartFrequentCheck();
|
||||
return {};
|
||||
auto CoordinatorHandler::SetInstanceToMain(std::string instance_name)
|
||||
-> coordination::SetInstanceToMainCoordinatorStatus {
|
||||
return dbms_handler_.CoordinatorState().SetInstanceToMain(std::move(instance_name));
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::ShowReplicasOnCoordinator() const -> std::vector<coordination::CoordinatorEntityInfo> {
|
||||
return dbms_handler_.CoordinatorState().ShowReplicas();
|
||||
auto CoordinatorHandler::ShowInstances() const -> std::vector<coordination::CoordinatorInstanceStatus> {
|
||||
return dbms_handler_.CoordinatorState().ShowInstances();
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::PingReplicasOnCoordinator() const -> std::unordered_map<std::string_view, bool> {
|
||||
return dbms_handler_.CoordinatorState().PingReplicas();
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::ShowMainOnCoordinator() const -> std::optional<coordination::CoordinatorEntityInfo> {
|
||||
return dbms_handler_.CoordinatorState().ShowMain();
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::PingMainOnCoordinator() const -> std::optional<coordination::CoordinatorEntityHealthInfo> {
|
||||
return dbms_handler_.CoordinatorState().PingMain();
|
||||
}
|
||||
|
||||
auto CoordinatorHandler::DoFailover() const -> DoFailoverStatus {
|
||||
auto status = dbms_handler_.CoordinatorState().DoFailover();
|
||||
switch (status) {
|
||||
case memgraph::coordination::DoFailoverStatus::ALL_REPLICAS_DOWN:
|
||||
return memgraph::dbms::DoFailoverStatus::ALL_REPLICAS_DOWN;
|
||||
case memgraph::coordination::DoFailoverStatus::SUCCESS:
|
||||
return memgraph::dbms::DoFailoverStatus::SUCCESS;
|
||||
case memgraph::coordination::DoFailoverStatus::MAIN_ALIVE:
|
||||
return memgraph::dbms::DoFailoverStatus::MAIN_ALIVE;
|
||||
case memgraph::coordination::DoFailoverStatus::CLUSTER_UNINITIALIZED:
|
||||
return memgraph::dbms::DoFailoverStatus::CLUSTER_UNINITIALIZED;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace memgraph::dbms
|
||||
|
||||
#endif
|
||||
|
@ -15,49 +15,29 @@
|
||||
|
||||
#include "utils/result.hpp"
|
||||
|
||||
#include "coordination/coordinator_config.hpp"
|
||||
#include "coordination/coordinator_instance_status.hpp"
|
||||
#include "coordination/failover_status.hpp"
|
||||
#include "coordination/register_main_replica_coordinator_status.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace memgraph::coordination {
|
||||
struct CoordinatorEntityInfo;
|
||||
struct CoordinatorEntityHealthInfo;
|
||||
struct CoordinatorClientConfig;
|
||||
} // namespace memgraph::coordination
|
||||
|
||||
namespace memgraph::dbms {
|
||||
|
||||
enum class RegisterMainReplicaCoordinatorStatus : uint8_t {
|
||||
NAME_EXISTS,
|
||||
END_POINT_EXISTS,
|
||||
COULD_NOT_BE_PERSISTED,
|
||||
NOT_COORDINATOR,
|
||||
SUCCESS
|
||||
};
|
||||
|
||||
enum class DoFailoverStatus : uint8_t { SUCCESS, ALL_REPLICAS_DOWN, MAIN_ALIVE, CLUSTER_UNINITIALIZED };
|
||||
|
||||
class DbmsHandler;
|
||||
|
||||
class CoordinatorHandler {
|
||||
public:
|
||||
explicit CoordinatorHandler(DbmsHandler &dbms_handler);
|
||||
|
||||
auto RegisterReplicaOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus>;
|
||||
auto RegisterInstance(coordination::CoordinatorClientConfig config)
|
||||
-> coordination::RegisterInstanceCoordinatorStatus;
|
||||
|
||||
auto RegisterMainOnCoordinator(const memgraph::coordination::CoordinatorClientConfig &config)
|
||||
-> utils::BasicResult<RegisterMainReplicaCoordinatorStatus>;
|
||||
auto SetInstanceToMain(std::string instance_name) -> coordination::SetInstanceToMainCoordinatorStatus;
|
||||
|
||||
auto ShowReplicasOnCoordinator() const -> std::vector<memgraph::coordination::CoordinatorEntityInfo>;
|
||||
|
||||
auto ShowMainOnCoordinator() const -> std::optional<memgraph::coordination::CoordinatorEntityInfo>;
|
||||
|
||||
auto PingReplicasOnCoordinator() const -> std::unordered_map<std::string_view, bool>;
|
||||
|
||||
auto PingMainOnCoordinator() const -> std::optional<memgraph::coordination::CoordinatorEntityHealthInfo>;
|
||||
|
||||
auto DoFailover() const -> DoFailoverStatus;
|
||||
auto ShowInstances() const -> std::vector<coordination::CoordinatorInstanceStatus>;
|
||||
|
||||
private:
|
||||
DbmsHandler &dbms_handler_;
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include "dbms/dbms_handler.hpp"
|
||||
#include "dbms/replication_client.hpp"
|
||||
|
||||
#include "range/v3/view.hpp"
|
||||
|
||||
namespace memgraph::dbms {
|
||||
|
||||
void CoordinatorHandlers::Register(DbmsHandler &dbms_handler) {
|
||||
@ -26,9 +28,42 @@ void CoordinatorHandlers::Register(DbmsHandler &dbms_handler) {
|
||||
|
||||
server.Register<coordination::PromoteReplicaToMainRpc>(
|
||||
[&dbms_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void {
|
||||
spdlog::info("Received PromoteReplicaToMainRpc from coordinator server");
|
||||
spdlog::info("Received PromoteReplicaToMainRpc");
|
||||
CoordinatorHandlers::PromoteReplicaToMainHandler(dbms_handler, req_reader, res_builder);
|
||||
});
|
||||
|
||||
server.Register<coordination::SetMainToReplicaRpc>(
|
||||
[&dbms_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void {
|
||||
spdlog::info("Received SetMainToReplicaRpc from coordinator server");
|
||||
CoordinatorHandlers::SetMainToReplicaHandler(dbms_handler, req_reader, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
void CoordinatorHandlers::SetMainToReplicaHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader,
|
||||
slk::Builder *res_builder) {
|
||||
auto &repl_state = dbms_handler.ReplicationState();
|
||||
spdlog::info("Executing SetMainToReplicaHandler");
|
||||
|
||||
if (repl_state.IsReplica()) {
|
||||
spdlog::error("Setting to replica must be performed on main.");
|
||||
slk::Save(coordination::SetMainToReplicaRes{false}, res_builder);
|
||||
return;
|
||||
}
|
||||
|
||||
coordination::SetMainToReplicaReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
|
||||
const replication::ReplicationServerConfig clients_config{
|
||||
.ip_address = req.replication_client_info.replication_ip_address,
|
||||
.port = req.replication_client_info.replication_port};
|
||||
|
||||
if (bool success = memgraph::dbms::SetReplicationRoleReplica(dbms_handler, clients_config); !success) {
|
||||
spdlog::error("Setting main to replica failed!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
}
|
||||
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder);
|
||||
}
|
||||
|
||||
void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader,
|
||||
@ -36,11 +71,15 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler,
|
||||
auto &repl_state = dbms_handler.ReplicationState();
|
||||
|
||||
if (!repl_state.IsReplica()) {
|
||||
spdlog::error("Failover must be performed on replica!");
|
||||
spdlog::error("Only replica can be promoted to main!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
}
|
||||
|
||||
auto repl_server_config = std::get<replication::RoleReplicaData>(repl_state.ReplicationData()).config;
|
||||
|
||||
// This can fail because of disk. If it does, the cluster state could get inconsistent.
|
||||
// We don't handle disk issues.
|
||||
if (bool success = memgraph::dbms::DoReplicaToMainPromotion(dbms_handler); !success) {
|
||||
spdlog::error("Promoting replica to main failed!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
@ -50,34 +89,40 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler,
|
||||
coordination::PromoteReplicaToMainReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
|
||||
std::vector<replication::ReplicationClientConfig> clients_config;
|
||||
clients_config.reserve(req.replication_clients_info.size());
|
||||
std::ranges::transform(req.replication_clients_info, std::back_inserter(clients_config),
|
||||
[](const auto &repl_info_config) {
|
||||
auto const converter = [](const auto &repl_info_config) {
|
||||
return replication::ReplicationClientConfig{
|
||||
.name = repl_info_config.instance_name,
|
||||
.mode = repl_info_config.replication_mode,
|
||||
.ip_address = repl_info_config.replication_ip_address,
|
||||
.port = repl_info_config.replication_port,
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
std::ranges::for_each(clients_config, [&dbms_handler, &repl_state, &res_builder](const auto &config) {
|
||||
MG_ASSERT(
|
||||
std::get<replication::RoleMainData>(repl_state.ReplicationData()).registered_replicas_.empty(),
|
||||
"No replicas should be registered after promoting replica to main and before registering replication clients!");
|
||||
|
||||
// registering replicas
|
||||
for (auto const &config : req.replication_clients_info | ranges::views::transform(converter)) {
|
||||
auto instance_client = repl_state.RegisterReplica(config);
|
||||
if (instance_client.HasError()) {
|
||||
switch (instance_client.GetError()) {
|
||||
// Can't happen, we are already replica
|
||||
case memgraph::replication::RegisterReplicaError::NOT_MAIN:
|
||||
spdlog::error("Failover must be performed to main!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
// Can't happen, checked on the coordinator side
|
||||
case memgraph::replication::RegisterReplicaError::NAME_EXISTS:
|
||||
spdlog::error("Replica with the same name already exists!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS:
|
||||
// Can't happen, checked on the coordinator side
|
||||
case memgraph::replication::RegisterReplicaError::ENDPOINT_EXISTS:
|
||||
spdlog::error("Replica with the same endpoint already exists!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
// We don't handle disk issues
|
||||
case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED:
|
||||
spdlog::error("Registered replica could not be persisted!");
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
@ -86,18 +131,22 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(DbmsHandler &dbms_handler,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto &instance_client_ref = *instance_client.GetValue();
|
||||
const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler, instance_client_ref);
|
||||
|
||||
if (!all_clients_good) {
|
||||
spdlog::error("Failed to register all databases to the REPLICA \"{}\"", config.name);
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{false}, res_builder);
|
||||
return;
|
||||
if (!allow_mt_repl && dbms_handler.All().size() > 1) {
|
||||
spdlog::warn("Multi-tenant replication is currently not supported!");
|
||||
}
|
||||
|
||||
auto &instance_client_ref = *instance_client.GetValue();
|
||||
|
||||
// Update system before enabling individual storage <-> replica clients
|
||||
dbms_handler.SystemRestore(instance_client_ref);
|
||||
|
||||
// TODO: (andi) Policy for register all databases
|
||||
// Will be resolved after deciding about choosing new replica
|
||||
const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler, instance_client_ref);
|
||||
MG_ASSERT(all_clients_good, "Failed to register one or more databases to the REPLICA \"{}\".", config.name);
|
||||
|
||||
StartReplicaClient(dbms_handler, instance_client_ref);
|
||||
});
|
||||
}
|
||||
|
||||
slk::Save(coordination::PromoteReplicaToMainRes{true}, res_builder);
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ class CoordinatorHandlers {
|
||||
private:
|
||||
static void PromoteReplicaToMainHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader,
|
||||
slk::Builder *res_builder);
|
||||
static void SetMainToReplicaHandler(DbmsHandler &dbms_handler, slk::Reader *req_reader, slk::Builder *res_builder);
|
||||
};
|
||||
|
||||
} // namespace memgraph::dbms
|
||||
|
@ -110,7 +110,7 @@ class Database {
|
||||
* @param force_directory Use the configured directory, do not try to decipher the multi-db version
|
||||
* @return DatabaseInfo
|
||||
*/
|
||||
DatabaseInfo GetInfo(bool force_directory, replication::ReplicationRole replication_role) const {
|
||||
DatabaseInfo GetInfo(bool force_directory, replication_coordination_glue::ReplicationRole replication_role) const {
|
||||
DatabaseInfo info;
|
||||
info.storage_info = storage_->GetInfo(force_directory, replication_role);
|
||||
info.triggers = trigger_store_.GetTriggerInfo().size();
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "storage/v2/inmemory/unique_constraints.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
using memgraph::storage::Delta;
|
||||
using memgraph::storage::EdgeAccessor;
|
||||
using memgraph::storage::EdgeRef;
|
||||
|
@ -38,8 +38,8 @@ std::string RegisterReplicaErrorToString(RegisterReplicaError error) {
|
||||
using enum RegisterReplicaError;
|
||||
case NAME_EXISTS:
|
||||
return "NAME_EXISTS";
|
||||
case END_POINT_EXISTS:
|
||||
return "END_POINT_EXISTS";
|
||||
case ENDPOINT_EXISTS:
|
||||
return "ENDPOINT_EXISTS";
|
||||
case CONNECTION_FAILED:
|
||||
return "CONNECTION_FAILED";
|
||||
case COULD_NOT_BE_PERSISTED:
|
||||
@ -100,16 +100,16 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio
|
||||
-> memgraph::utils::BasicResult<RegisterReplicaError> {
|
||||
MG_ASSERT(dbms_handler_.ReplicationState().IsMain(), "Only main instance can register a replica!");
|
||||
|
||||
auto instance_client = dbms_handler_.ReplicationState().RegisterReplica(config);
|
||||
if (instance_client.HasError()) {
|
||||
switch (instance_client.GetError()) {
|
||||
auto maybe_client = dbms_handler_.ReplicationState().RegisterReplica(config);
|
||||
if (maybe_client.HasError()) {
|
||||
switch (maybe_client.GetError()) {
|
||||
case memgraph::replication::RegisterReplicaError::NOT_MAIN:
|
||||
MG_ASSERT(false, "Only main instance can register a replica!");
|
||||
return {};
|
||||
case memgraph::replication::RegisterReplicaError::NAME_EXISTS:
|
||||
return memgraph::dbms::RegisterReplicaError::NAME_EXISTS;
|
||||
case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS:
|
||||
return memgraph::dbms::RegisterReplicaError::END_POINT_EXISTS;
|
||||
case memgraph::replication::RegisterReplicaError::ENDPOINT_EXISTS:
|
||||
return memgraph::dbms::RegisterReplicaError::ENDPOINT_EXISTS;
|
||||
case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED:
|
||||
return memgraph::dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED;
|
||||
case memgraph::replication::RegisterReplicaError::SUCCESS:
|
||||
@ -123,14 +123,14 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
// Update system before enabling individual storage <-> replica clients
|
||||
dbms_handler_.SystemRestore(*instance_client.GetValue());
|
||||
dbms_handler_.SystemRestore(*maybe_client.GetValue());
|
||||
#endif
|
||||
|
||||
const auto dbms_error = memgraph::dbms::HandleErrorOnReplicaClient(instance_client);
|
||||
const auto dbms_error = memgraph::dbms::HandleRegisterReplicaStatus(maybe_client);
|
||||
if (dbms_error.has_value()) {
|
||||
return *dbms_error;
|
||||
}
|
||||
auto &instance_client_ptr = instance_client.GetValue();
|
||||
auto &instance_client_ptr = maybe_client.GetValue();
|
||||
const bool all_clients_good = memgraph::dbms::RegisterAllDatabasesClients(dbms_handler_, *instance_client_ptr);
|
||||
|
||||
// NOTE Currently if any databases fails, we revert back
|
||||
@ -141,7 +141,7 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio
|
||||
}
|
||||
|
||||
// No client error, start instance level client
|
||||
StartReplicaClient(dbms_handler_, *instance_client.GetValue());
|
||||
StartReplicaClient(dbms_handler_, *instance_client_ptr);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -169,7 +169,7 @@ auto ReplicationHandler::UnregisterReplica(std::string_view name) -> UnregisterR
|
||||
dbms_handler_.ReplicationState().ReplicationData());
|
||||
}
|
||||
|
||||
auto ReplicationHandler::GetRole() const -> memgraph::replication::ReplicationRole {
|
||||
auto ReplicationHandler::GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole {
|
||||
return dbms_handler_.ReplicationState().GetRole();
|
||||
}
|
||||
|
||||
|
@ -11,8 +11,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "replication_coordination_glue/role.hpp"
|
||||
#include "dbms/database.hpp"
|
||||
#include "replication/role.hpp"
|
||||
#include "utils/result.hpp"
|
||||
|
||||
namespace memgraph::replication {
|
||||
@ -25,7 +25,7 @@ namespace memgraph::dbms {
|
||||
|
||||
class DbmsHandler;
|
||||
|
||||
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED };
|
||||
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, CONNECTION_FAILED, COULD_NOT_BE_PERSISTED };
|
||||
|
||||
enum class UnregisterReplicaResult : uint8_t {
|
||||
NOT_MAIN,
|
||||
@ -53,7 +53,7 @@ struct ReplicationHandler {
|
||||
auto UnregisterReplica(std::string_view name) -> UnregisterReplicaResult;
|
||||
|
||||
// Helper pass-through (TODO: remove)
|
||||
auto GetRole() const -> memgraph::replication::ReplicationRole;
|
||||
auto GetRole() const -> memgraph::replication_coordination_glue::ReplicationRole;
|
||||
bool IsMain() const;
|
||||
bool IsReplica() const;
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
namespace memgraph::dbms {
|
||||
|
||||
inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) {
|
||||
auto &repl_state = dbms_handler.ReplicationState();
|
||||
// STEP 1) bring down all REPLICA servers
|
||||
dbms_handler.ForEach([](DatabaseAccess db_acc) {
|
||||
auto *storage = db_acc->storage();
|
||||
@ -27,7 +28,7 @@ inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) {
|
||||
|
||||
// STEP 2) Change to MAIN
|
||||
// TODO: restore replication servers if false?
|
||||
if (!dbms_handler.ReplicationState().SetReplicationRoleMain()) {
|
||||
if (!repl_state.SetReplicationRoleMain()) {
|
||||
// TODO: Handle recovery on failure???
|
||||
return false;
|
||||
}
|
||||
@ -43,6 +44,38 @@ inline bool DoReplicaToMainPromotion(dbms::DbmsHandler &dbms_handler) {
|
||||
return true;
|
||||
};
|
||||
|
||||
inline bool SetReplicationRoleReplica(dbms::DbmsHandler &dbms_handler,
|
||||
const memgraph::replication::ReplicationServerConfig &config) {
|
||||
if (dbms_handler.ReplicationState().IsReplica()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO StorageState needs to be synched. Could have a dangling reference if someone adds a database as we are
|
||||
// deleting the replica.
|
||||
// Remove database specific clients
|
||||
dbms_handler.ForEach([&](DatabaseAccess db_acc) {
|
||||
auto *storage = db_acc->storage();
|
||||
storage->repl_storage_state_.replication_clients_.WithLock([](auto &clients) { clients.clear(); });
|
||||
});
|
||||
// Remove instance level clients
|
||||
std::get<replication::RoleMainData>(dbms_handler.ReplicationState().ReplicationData()).registered_replicas_.clear();
|
||||
|
||||
// Creates the server
|
||||
dbms_handler.ReplicationState().SetReplicationRoleReplica(config);
|
||||
|
||||
// Start
|
||||
const auto success = std::visit(utils::Overloaded{[](replication::RoleMainData const &) {
|
||||
// ASSERT
|
||||
return false;
|
||||
},
|
||||
[&dbms_handler](replication::RoleReplicaData const &data) {
|
||||
return StartRpcServer(dbms_handler, data);
|
||||
}},
|
||||
dbms_handler.ReplicationState().ReplicationData());
|
||||
// TODO Handle error (restore to main?)
|
||||
return success;
|
||||
}
|
||||
|
||||
inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler,
|
||||
replication::ReplicationClient &instance_client) {
|
||||
if (!allow_mt_repl && dbms_handler.All().size() > 1) {
|
||||
@ -69,7 +102,7 @@ inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler,
|
||||
// MAYBE_BEHIND isn't a statement of the current state, this is the default value
|
||||
// Failed to start due an error like branching of MAIN and REPLICA
|
||||
if (client->State() == storage::replication::ReplicaState::MAYBE_BEHIND) {
|
||||
return false;
|
||||
return false; // TODO: sometimes we need to still add to storage_clients
|
||||
}
|
||||
storage_clients.push_back(std::move(client));
|
||||
return true;
|
||||
@ -79,7 +112,7 @@ inline bool RegisterAllDatabasesClients(dbms::DbmsHandler &dbms_handler,
|
||||
return all_clients_good;
|
||||
}
|
||||
|
||||
inline std::optional<RegisterReplicaError> HandleErrorOnReplicaClient(
|
||||
inline std::optional<RegisterReplicaError> HandleRegisterReplicaStatus(
|
||||
utils::BasicResult<replication::RegisterReplicaError, replication::ReplicationClient *> &instance_client) {
|
||||
if (instance_client.HasError()) switch (instance_client.GetError()) {
|
||||
case replication::RegisterReplicaError::NOT_MAIN:
|
||||
@ -87,8 +120,8 @@ inline std::optional<RegisterReplicaError> HandleErrorOnReplicaClient(
|
||||
return {};
|
||||
case replication::RegisterReplicaError::NAME_EXISTS:
|
||||
return dbms::RegisterReplicaError::NAME_EXISTS;
|
||||
case replication::RegisterReplicaError::END_POINT_EXISTS:
|
||||
return dbms::RegisterReplicaError::END_POINT_EXISTS;
|
||||
case replication::RegisterReplicaError::ENDPOINT_EXISTS:
|
||||
return dbms::RegisterReplicaError::ENDPOINT_EXISTS;
|
||||
case replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED:
|
||||
return dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED;
|
||||
case replication::RegisterReplicaError::SUCCESS:
|
||||
|
@ -32,7 +32,7 @@
|
||||
#include "utils/timer.hpp"
|
||||
#include "version.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
bool ValidateControlCharacter(const char *flagname, const std::string &value) {
|
||||
if (value.empty()) {
|
||||
|
@ -3072,14 +3072,11 @@ class CoordinatorQuery : public memgraph::query::Query {
|
||||
const utils::TypeInfo &GetTypeInfo() const override { return kType; }
|
||||
|
||||
enum class Action {
|
||||
REGISTER_MAIN_COORDINATOR_SERVER,
|
||||
REGISTER_REPLICA_COORDINATOR_SERVER,
|
||||
REGISTER_INSTANCE,
|
||||
SET_INSTANCE_TO_MAIN,
|
||||
SHOW_REPLICATION_CLUSTER,
|
||||
DO_FAILOVER
|
||||
};
|
||||
|
||||
enum class ReplicationRole { MAIN, REPLICA };
|
||||
|
||||
enum class SyncMode { SYNC, ASYNC };
|
||||
|
||||
CoordinatorQuery() = default;
|
||||
@ -3087,18 +3084,17 @@ class CoordinatorQuery : public memgraph::query::Query {
|
||||
DEFVISITABLE(QueryVisitor<void>);
|
||||
|
||||
memgraph::query::CoordinatorQuery::Action action_;
|
||||
memgraph::query::CoordinatorQuery::ReplicationRole role_;
|
||||
std::string instance_name_;
|
||||
memgraph::query::Expression *socket_address_{nullptr};
|
||||
memgraph::query::Expression *replication_socket_address_{nullptr};
|
||||
memgraph::query::Expression *coordinator_socket_address_{nullptr};
|
||||
memgraph::query::CoordinatorQuery::SyncMode sync_mode_;
|
||||
|
||||
CoordinatorQuery *Clone(AstStorage *storage) const override {
|
||||
auto *object = storage->Create<CoordinatorQuery>();
|
||||
object->action_ = action_;
|
||||
object->role_ = role_;
|
||||
object->instance_name_ = instance_name_;
|
||||
object->socket_address_ = socket_address_ ? socket_address_->Clone(storage) : nullptr;
|
||||
object->replication_socket_address_ =
|
||||
replication_socket_address_ ? replication_socket_address_->Clone(storage) : nullptr;
|
||||
object->sync_mode_ = sync_mode_;
|
||||
object->coordinator_socket_address_ =
|
||||
coordinator_socket_address_ ? coordinator_socket_address_->Clone(storage) : nullptr;
|
||||
|
@ -375,10 +375,28 @@ antlrcpp::Any CypherMainVisitor::visitRegisterReplica(MemgraphCypher::RegisterRe
|
||||
}
|
||||
|
||||
// License check is done in the interpreter.
|
||||
antlrcpp::Any CypherMainVisitor::visitRegisterCoordinatorServer(MemgraphCypher::RegisterCoordinatorServerContext *ctx) {
|
||||
MG_ASSERT(ctx->children.size() == 1, "RegisterCoordinatorServerQuery should have exactly one child!");
|
||||
auto *coordinator_query = std::any_cast<CoordinatorQuery *>(ctx->children[0]->accept(this));
|
||||
query_ = coordinator_query;
|
||||
antlrcpp::Any CypherMainVisitor::visitRegisterInstanceOnCoordinator(
|
||||
MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) {
|
||||
auto *coordinator_query = storage_->Create<CoordinatorQuery>();
|
||||
if (!ctx->replicationSocketAddress()->literal()->StringLiteral()) {
|
||||
throw SemanticException("Replication socket address should be a string literal!");
|
||||
}
|
||||
|
||||
if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) {
|
||||
throw SemanticException("Coordinator socket address should be a string literal!");
|
||||
}
|
||||
coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_INSTANCE;
|
||||
coordinator_query->replication_socket_address_ =
|
||||
std::any_cast<Expression *>(ctx->replicationSocketAddress()->accept(this));
|
||||
coordinator_query->coordinator_socket_address_ =
|
||||
std::any_cast<Expression *>(ctx->coordinatorSocketAddress()->accept(this));
|
||||
coordinator_query->instance_name_ = std::any_cast<std::string>(ctx->instanceName()->symbolicName()->accept(this));
|
||||
if (ctx->ASYNC()) {
|
||||
coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC;
|
||||
} else {
|
||||
coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC;
|
||||
}
|
||||
|
||||
return coordinator_query;
|
||||
}
|
||||
|
||||
@ -389,48 +407,6 @@ antlrcpp::Any CypherMainVisitor::visitShowReplicationCluster(MemgraphCypher::Sho
|
||||
return coordinator_query;
|
||||
}
|
||||
|
||||
// License check is done in the interpreter
|
||||
antlrcpp::Any CypherMainVisitor::visitRegisterReplicaCoordinatorServer(
|
||||
MemgraphCypher::RegisterReplicaCoordinatorServerContext *ctx) {
|
||||
auto *coordinator_query = storage_->Create<CoordinatorQuery>();
|
||||
if (!ctx->socketAddress()->literal()->StringLiteral()) {
|
||||
throw SemanticException("Socket address should be a string literal!");
|
||||
}
|
||||
|
||||
if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) {
|
||||
throw SemanticException("Coordinator socket address should be a string literal!");
|
||||
}
|
||||
coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER;
|
||||
coordinator_query->role_ = CoordinatorQuery::ReplicationRole::REPLICA;
|
||||
coordinator_query->socket_address_ = std::any_cast<Expression *>(ctx->socketAddress()->accept(this));
|
||||
coordinator_query->coordinator_socket_address_ =
|
||||
std::any_cast<Expression *>(ctx->coordinatorSocketAddress()->accept(this));
|
||||
coordinator_query->instance_name_ = std::any_cast<std::string>(ctx->instanceName()->symbolicName()->accept(this));
|
||||
if (ctx->SYNC()) {
|
||||
coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC;
|
||||
} else if (ctx->ASYNC()) {
|
||||
coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC;
|
||||
}
|
||||
|
||||
return coordinator_query;
|
||||
}
|
||||
|
||||
// License check is done in the interpreter
|
||||
antlrcpp::Any CypherMainVisitor::visitRegisterMainCoordinatorServer(
|
||||
MemgraphCypher::RegisterMainCoordinatorServerContext *ctx) {
|
||||
if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) {
|
||||
throw SemanticException("Coordinator socket address should be a string literal!");
|
||||
}
|
||||
auto *coordinator_query = storage_->Create<CoordinatorQuery>();
|
||||
coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER;
|
||||
coordinator_query->role_ = CoordinatorQuery::ReplicationRole::MAIN;
|
||||
coordinator_query->coordinator_socket_address_ =
|
||||
std::any_cast<Expression *>(ctx->coordinatorSocketAddress()->accept(this));
|
||||
coordinator_query->instance_name_ = std::any_cast<std::string>(ctx->instanceName()->symbolicName()->accept(this));
|
||||
|
||||
return coordinator_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitDropReplica(MemgraphCypher::DropReplicaContext *ctx) {
|
||||
auto *replication_query = storage_->Create<ReplicationQuery>();
|
||||
replication_query->action_ = ReplicationQuery::Action::DROP_REPLICA;
|
||||
@ -445,9 +421,10 @@ antlrcpp::Any CypherMainVisitor::visitShowReplicas(MemgraphCypher::ShowReplicasC
|
||||
}
|
||||
|
||||
// License check is done in the interpreter
|
||||
antlrcpp::Any CypherMainVisitor::visitDoFailover(MemgraphCypher::DoFailoverContext * /*ctx*/) {
|
||||
antlrcpp::Any CypherMainVisitor::visitSetInstanceToMain(MemgraphCypher::SetInstanceToMainContext *ctx) {
|
||||
auto *coordinator_query = storage_->Create<CoordinatorQuery>();
|
||||
coordinator_query->action_ = CoordinatorQuery::Action::DO_FAILOVER;
|
||||
coordinator_query->action_ = CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN;
|
||||
coordinator_query->instance_name_ = std::any_cast<std::string>(ctx->instanceName()->symbolicName()->accept(this));
|
||||
query_ = coordinator_query;
|
||||
return coordinator_query;
|
||||
}
|
||||
|
@ -241,29 +241,18 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
|
||||
/**
|
||||
* @return CoordinatorQuery*
|
||||
*/
|
||||
antlrcpp::Any visitRegisterCoordinatorServer(MemgraphCypher::RegisterCoordinatorServerContext *ctx) override;
|
||||
antlrcpp::Any visitRegisterInstanceOnCoordinator(MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return CoordinatorQuery*
|
||||
*/
|
||||
antlrcpp::Any visitRegisterMainCoordinatorServer(MemgraphCypher::RegisterMainCoordinatorServerContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return CoordinatorQuery*
|
||||
*/
|
||||
antlrcpp::Any visitRegisterReplicaCoordinatorServer(
|
||||
MemgraphCypher::RegisterReplicaCoordinatorServerContext *ctx) override;
|
||||
antlrcpp::Any visitSetInstanceToMain(MemgraphCypher::SetInstanceToMainContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return CoordinatorQuery*
|
||||
*/
|
||||
antlrcpp::Any visitShowReplicationCluster(MemgraphCypher::ShowReplicationClusterContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return CoordinatorQuery*
|
||||
*/
|
||||
antlrcpp::Any visitDoFailover(MemgraphCypher::DoFailoverContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return LockPathQuery*
|
||||
*/
|
||||
|
@ -102,6 +102,7 @@ FILTER : F I L T E R ;
|
||||
IN : I N ;
|
||||
INDEX : I N D E X ;
|
||||
INFO : I N F O ;
|
||||
INSTANCE : I N S T A N C E ;
|
||||
IS : I S ;
|
||||
KB : K B ;
|
||||
KEY : K E Y ;
|
||||
@ -122,6 +123,7 @@ PROCEDURE : P R O C E D U R E ;
|
||||
PROFILE : P R O F I L E ;
|
||||
QUERY : Q U E R Y ;
|
||||
REDUCE : R E D U C E ;
|
||||
REGISTER : R E G I S T E R;
|
||||
REMOVE : R E M O V E ;
|
||||
RETURN : R E T U R N ;
|
||||
SET : S E T ;
|
||||
|
@ -63,6 +63,7 @@ memgraphCypherKeyword : cypherKeyword
|
||||
| GRANT
|
||||
| HEADER
|
||||
| IDENTIFIED
|
||||
| INSTANCE
|
||||
| NODE_LABELS
|
||||
| NULLIF
|
||||
| IMPORT
|
||||
@ -186,9 +187,9 @@ replicationQuery : setReplicationRole
|
||||
| showReplicas
|
||||
;
|
||||
|
||||
coordinatorQuery : registerCoordinatorServer
|
||||
coordinatorQuery : registerInstanceOnCoordinator
|
||||
| setInstanceToMain
|
||||
| showReplicationCluster
|
||||
| doFailover
|
||||
;
|
||||
|
||||
triggerQuery : createTrigger
|
||||
@ -252,8 +253,6 @@ transactionQueueQuery : showTransactions
|
||||
|
||||
showTransactions : SHOW TRANSACTIONS ;
|
||||
|
||||
doFailover : DO FAILOVER ;
|
||||
|
||||
terminateTransactions : TERMINATE TRANSACTIONS transactionIdList;
|
||||
|
||||
loadCsv : LOAD CSV FROM csvFile ( WITH | NO ) HEADER
|
||||
@ -382,15 +381,14 @@ instanceName : symbolicName ;
|
||||
socketAddress : literal ;
|
||||
|
||||
coordinatorSocketAddress : literal ;
|
||||
replicationSocketAddress : literal ;
|
||||
|
||||
registerReplica : REGISTER REPLICA instanceName ( SYNC | ASYNC )
|
||||
TO socketAddress ;
|
||||
|
||||
registerReplicaCoordinatorServer: REGISTER REPLICA instanceName ( ASYNC | SYNC ) TO socketAddress WITH COORDINATOR SERVER ON coordinatorSocketAddress ;
|
||||
registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ON coordinatorSocketAddress ( AS ASYNC ) ? WITH replicationSocketAddress ;
|
||||
|
||||
registerMainCoordinatorServer: REGISTER MAIN instanceName WITH COORDINATOR SERVER ON coordinatorSocketAddress ;
|
||||
|
||||
registerCoordinatorServer : registerMainCoordinatorServer | registerReplicaCoordinatorServer ;
|
||||
setInstanceToMain : SET INSTANCE instanceName TO MAIN ;
|
||||
|
||||
dropReplica : DROP REPLICA instanceName ;
|
||||
|
||||
|
@ -79,6 +79,7 @@ IMPORT : I M P O R T ;
|
||||
INACTIVE : I N A C T I V E ;
|
||||
IN_MEMORY_ANALYTICAL : I N UNDERSCORE M E M O R Y UNDERSCORE A N A L Y T I C A L ;
|
||||
IN_MEMORY_TRANSACTIONAL : I N UNDERSCORE M E M O R Y UNDERSCORE T R A N S A C T I O N A L ;
|
||||
INSTANCE : I N S T A N C E ;
|
||||
ISOLATION : I S O L A T I O N ;
|
||||
KAFKA : K A F K A ;
|
||||
LABELS : L A B E L S ;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -218,7 +218,8 @@ const trie::Trie kKeywords = {"union",
|
||||
"directory",
|
||||
"lock",
|
||||
"unlock",
|
||||
"build"};
|
||||
"build",
|
||||
"instance"};
|
||||
|
||||
// Unicode codepoints that are allowed at the start of the unescaped name.
|
||||
const std::bitset<kBitsetSize> kUnescapedNameAllowedStarts(
|
||||
|
@ -110,7 +110,6 @@
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
#include "coordination/constants.hpp"
|
||||
#include "coordination/coordinator_entity_info.hpp"
|
||||
#endif
|
||||
|
||||
namespace memgraph::metrics {
|
||||
@ -337,9 +336,9 @@ class ReplQueryHandler {
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
ReplicationQuery::ReplicationRole ShowReplicationRole() const {
|
||||
switch (handler_.GetRole()) {
|
||||
case memgraph::replication::ReplicationRole::MAIN:
|
||||
case memgraph::replication_coordination_glue::ReplicationRole::MAIN:
|
||||
return ReplicationQuery::ReplicationRole::MAIN;
|
||||
case memgraph::replication::ReplicationRole::REPLICA:
|
||||
case memgraph::replication_coordination_glue::ReplicationRole::REPLICA:
|
||||
return ReplicationQuery::ReplicationRole::REPLICA;
|
||||
}
|
||||
throw QueryRuntimeException("Couldn't show replication role - invalid role set!");
|
||||
@ -462,10 +461,8 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
void RegisterReplicaCoordinatorServer(const std::string &replication_socket_address,
|
||||
const std::string &coordinator_socket_address,
|
||||
const std::chrono::seconds instance_check_frequency,
|
||||
const std::string &instance_name,
|
||||
void RegisterInstance(const std::string &coordinator_socket_address, const std::string &replication_socket_address,
|
||||
const std::chrono::seconds instance_check_frequency, const std::string &instance_name,
|
||||
CoordinatorQuery::SyncMode sync_mode) override {
|
||||
const auto maybe_replication_ip_port =
|
||||
io::network::Endpoint::ParseSocketOrAddress(replication_socket_address, std::nullopt);
|
||||
@ -487,7 +484,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
|
||||
.replication_ip_address = replication_ip,
|
||||
.replication_port = replication_port};
|
||||
|
||||
const auto coordinator_client_config =
|
||||
auto coordinator_client_config =
|
||||
coordination::CoordinatorClientConfig{.instance_name = instance_name,
|
||||
.ip_address = coordinator_server_ip,
|
||||
.port = coordinator_server_port,
|
||||
@ -495,87 +492,49 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
|
||||
.replication_client_info = repl_config,
|
||||
.ssl = std::nullopt};
|
||||
|
||||
if (const auto ret = coordinator_handler_.RegisterReplicaOnCoordinator(coordinator_client_config); ret.HasError()) {
|
||||
throw QueryRuntimeException("Couldn't register replica on coordinator!");
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterMainCoordinatorServer(const std::string &coordinator_socket_address,
|
||||
const std::chrono::seconds instance_check_frequency,
|
||||
const std::string &instance_name) override {
|
||||
const auto maybe_ip_and_port =
|
||||
io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address, std::nullopt);
|
||||
if (!maybe_ip_and_port) {
|
||||
throw QueryRuntimeException("Invalid socket address!");
|
||||
}
|
||||
const auto [ip, port] = *maybe_ip_and_port;
|
||||
const auto config = coordination::CoordinatorClientConfig{.instance_name = instance_name,
|
||||
.ip_address = ip,
|
||||
.port = port,
|
||||
.health_check_frequency_sec = instance_check_frequency,
|
||||
.ssl = std::nullopt};
|
||||
|
||||
if (const auto ret = coordinator_handler_.RegisterMainOnCoordinator(config); ret.HasError()) {
|
||||
throw QueryRuntimeException("Couldn't register main on coordinator!");
|
||||
}
|
||||
}
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
void DoFailover() const override {
|
||||
if (!FLAGS_coordinator) {
|
||||
throw QueryRuntimeException("Only coordinator can register coordinator server!");
|
||||
}
|
||||
|
||||
auto status = coordinator_handler_.DoFailover();
|
||||
auto status = coordinator_handler_.RegisterInstance(coordinator_client_config);
|
||||
switch (status) {
|
||||
using enum memgraph::dbms::DoFailoverStatus;
|
||||
case ALL_REPLICAS_DOWN:
|
||||
throw QueryRuntimeException("Failover aborted since all replicas are down!");
|
||||
case MAIN_ALIVE:
|
||||
throw QueryRuntimeException("Failover aborted since main is alive!");
|
||||
case CLUSTER_UNINITIALIZED:
|
||||
throw QueryRuntimeException("Failover aborted since cluster is uninitialized!");
|
||||
using enum memgraph::coordination::RegisterInstanceCoordinatorStatus;
|
||||
case NAME_EXISTS:
|
||||
throw QueryRuntimeException("Couldn't register replica instance since instance with such name already exists!");
|
||||
case END_POINT_EXISTS:
|
||||
throw QueryRuntimeException(
|
||||
"Couldn't register replica instance since instance with such endpoint already exists!");
|
||||
case NOT_COORDINATOR:
|
||||
throw QueryRuntimeException("Couldn't register replica instance since this instance is not a coordinator!");
|
||||
case RPC_FAILED:
|
||||
throw QueryRuntimeException(
|
||||
"Couldn't register instance because setting instance to replica failed! Check logs on replica to find out "
|
||||
"more "
|
||||
"info!");
|
||||
case SUCCESS:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MainReplicaStatus> ShowMainReplicaStatus(
|
||||
const std::vector<coordination::CoordinatorEntityInfo> &replicas,
|
||||
const std::unordered_map<std::string_view, bool> &health_check_replicas,
|
||||
const std::optional<coordination::CoordinatorEntityInfo> &main,
|
||||
const std::optional<coordination::CoordinatorEntityHealthInfo> &health_check_main) const override {
|
||||
std::vector<MainReplicaStatus> result{};
|
||||
result.reserve(replicas.size() + 1); // replicas + 1 main
|
||||
std::ranges::transform(
|
||||
replicas, std::back_inserter(result), [&health_check_replicas](const auto &replica) -> MainReplicaStatus {
|
||||
return {replica.name, replica.endpoint.SocketAddress(), health_check_replicas.at(replica.name), false};
|
||||
});
|
||||
if (main) {
|
||||
bool is_main_alive = health_check_main.has_value() ? health_check_main.value().alive : false;
|
||||
result.emplace_back(main->name, main->endpoint.SocketAddress(), is_main_alive, true);
|
||||
void SetInstanceToMain(const std::string &instance_name) override {
|
||||
auto status = coordinator_handler_.SetInstanceToMain(instance_name);
|
||||
switch (status) {
|
||||
using enum memgraph::coordination::SetInstanceToMainCoordinatorStatus;
|
||||
case NO_INSTANCE_WITH_NAME:
|
||||
throw QueryRuntimeException("No instance with such name!");
|
||||
case NOT_COORDINATOR:
|
||||
throw QueryRuntimeException("Couldn't set replica instance to main since this instance is not a coordinator!");
|
||||
case COULD_NOT_PROMOTE_TO_MAIN:
|
||||
throw QueryRuntimeException(
|
||||
"Couldn't set replica instance to main. Check coordinator and replica for more logs");
|
||||
case SUCCESS:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
std::vector<coordination::CoordinatorEntityInfo> ShowReplicasOnCoordinator() const override {
|
||||
return coordinator_handler_.ShowReplicasOnCoordinator();
|
||||
std::vector<coordination::CoordinatorInstanceStatus> ShowInstances() const override {
|
||||
return coordinator_handler_.ShowInstances();
|
||||
}
|
||||
|
||||
std::unordered_map<std::string_view, bool> PingReplicasOnCoordinator() const override {
|
||||
return coordinator_handler_.PingReplicasOnCoordinator();
|
||||
}
|
||||
|
||||
std::optional<coordination::CoordinatorEntityInfo> ShowMainOnCoordinator() const override {
|
||||
return coordinator_handler_.ShowMainOnCoordinator();
|
||||
}
|
||||
|
||||
std::optional<coordination::CoordinatorEntityHealthInfo> PingMainOnCoordinator() const override {
|
||||
return coordinator_handler_.PingMainOnCoordinator();
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
@ -890,10 +849,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
|
||||
case ReplicationQuery::Action::SET_REPLICATION_ROLE: {
|
||||
#ifdef MG_ENTERPRISE
|
||||
if (FLAGS_coordinator) {
|
||||
if (repl_query->role_ == ReplicationQuery::ReplicationRole::REPLICA) {
|
||||
throw QueryRuntimeException("Coordinator cannot become a replica!");
|
||||
throw QueryRuntimeException("Coordinator can't set roles!");
|
||||
}
|
||||
throw QueryRuntimeException("Coordinator cannot become main!");
|
||||
if (FLAGS_coordinator_server_port) {
|
||||
throw QueryRuntimeException("Can't set role manually on instance with coordinator server port.");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -938,6 +897,11 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
|
||||
return callback;
|
||||
}
|
||||
case ReplicationQuery::Action::REGISTER_REPLICA: {
|
||||
#ifdef MG_ENTERPRISE
|
||||
if (FLAGS_coordinator_server_port) {
|
||||
throw QueryRuntimeException("Can't register replica manually on instance with coordinator server port.");
|
||||
}
|
||||
#endif
|
||||
const auto &name = repl_query->instance_name_;
|
||||
const auto &sync_mode = repl_query->sync_mode_;
|
||||
auto socket_address = repl_query->socket_address_->Accept(evaluator);
|
||||
@ -954,6 +918,11 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
|
||||
}
|
||||
|
||||
case ReplicationQuery::Action::DROP_REPLICA: {
|
||||
#ifdef MG_ENTERPRISE
|
||||
if (FLAGS_coordinator_server_port) {
|
||||
throw QueryRuntimeException("Can't drop replica manually on instance with coordinator server port.");
|
||||
}
|
||||
#endif
|
||||
const auto &name = repl_query->instance_name_;
|
||||
callback.fn = [handler = ReplQueryHandler{dbms_handler}, name]() mutable {
|
||||
handler.DropReplica(name);
|
||||
@ -1026,7 +995,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
|
||||
std::vector<Notification> *notifications) {
|
||||
Callback callback;
|
||||
switch (coordinator_query->action_) {
|
||||
case CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER: {
|
||||
case CoordinatorQuery::Action::REGISTER_INSTANCE: {
|
||||
if (!license::global_license_checker.IsEnterpriseValidFast()) {
|
||||
throw QueryException("Trying to use enterprise feature without a valid license.");
|
||||
}
|
||||
@ -1045,11 +1014,14 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
|
||||
auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context};
|
||||
|
||||
auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator);
|
||||
auto replication_socket_address_tv = coordinator_query->replication_socket_address_->Accept(evaluator);
|
||||
callback.fn = [handler = CoordQueryHandler{dbms_handler}, coordinator_socket_address_tv,
|
||||
main_check_frequency = config.replication_replica_check_frequency,
|
||||
instance_name = coordinator_query->instance_name_]() mutable {
|
||||
handler.RegisterMainCoordinatorServer(std::string(coordinator_socket_address_tv.ValueString()),
|
||||
main_check_frequency, instance_name);
|
||||
replication_socket_address_tv, main_check_frequency = config.replication_replica_check_frequency,
|
||||
instance_name = coordinator_query->instance_name_,
|
||||
sync_mode = coordinator_query->sync_mode_]() mutable {
|
||||
handler.RegisterInstance(std::string(coordinator_socket_address_tv.ValueString()),
|
||||
std::string(replication_socket_address_tv.ValueString()), main_check_frequency,
|
||||
instance_name, sync_mode);
|
||||
return std::vector<std::vector<TypedValue>>();
|
||||
};
|
||||
|
||||
@ -1060,7 +1032,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
|
||||
return callback;
|
||||
#endif
|
||||
}
|
||||
case CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER: {
|
||||
case CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN: {
|
||||
if (!license::global_license_checker.IsEnterpriseValidFast()) {
|
||||
throw QueryException("Trying to use enterprise feature without a valid license.");
|
||||
}
|
||||
@ -1077,22 +1049,13 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
|
||||
// the argument to Callback.
|
||||
EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters};
|
||||
auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context};
|
||||
auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator);
|
||||
auto replication_socket_address_tv = coordinator_query->socket_address_->Accept(evaluator);
|
||||
callback.fn = [handler = CoordQueryHandler{dbms_handler}, coordinator_socket_address_tv,
|
||||
replication_socket_address_tv, main_check_frequency = config.replication_replica_check_frequency,
|
||||
instance_name = coordinator_query->instance_name_,
|
||||
sync_mode = coordinator_query->sync_mode_]() mutable {
|
||||
handler.RegisterReplicaCoordinatorServer(std::string(replication_socket_address_tv.ValueString()),
|
||||
std::string(coordinator_socket_address_tv.ValueString()),
|
||||
main_check_frequency, instance_name, sync_mode);
|
||||
|
||||
callback.fn = [handler = CoordQueryHandler{dbms_handler},
|
||||
instance_name = coordinator_query->instance_name_]() mutable {
|
||||
handler.SetInstanceToMain(instance_name);
|
||||
return std::vector<std::vector<TypedValue>>();
|
||||
};
|
||||
|
||||
notifications->emplace_back(
|
||||
SeverityLevel::INFO, NotificationCode::REGISTER_COORDINATOR_SERVER,
|
||||
fmt::format("Coordinator has registered coordinator server on {} for instance {}.",
|
||||
coordinator_socket_address_tv.ValueString(), coordinator_query->instance_name_));
|
||||
return callback;
|
||||
#endif
|
||||
}
|
||||
@ -1112,57 +1075,19 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
|
||||
|
||||
callback.header = {"name", "socket_address", "alive", "role"};
|
||||
callback.fn = [handler = CoordQueryHandler{dbms_handler}, replica_nfields = callback.header.size()]() mutable {
|
||||
const auto main = handler.ShowMainOnCoordinator();
|
||||
const auto health_check_main = main ? handler.PingMainOnCoordinator() : std::nullopt;
|
||||
const auto result_status = handler.ShowMainReplicaStatus(
|
||||
handler.ShowReplicasOnCoordinator(), handler.PingReplicasOnCoordinator(), main, health_check_main);
|
||||
auto const instances = handler.ShowInstances();
|
||||
std::vector<std::vector<TypedValue>> result{};
|
||||
result.reserve(result_status.size());
|
||||
result.reserve(result.size());
|
||||
|
||||
std::ranges::transform(result_status, std::back_inserter(result),
|
||||
std::ranges::transform(instances, std::back_inserter(result),
|
||||
[](const auto &status) -> std::vector<TypedValue> {
|
||||
return {TypedValue{status.name}, TypedValue{status.socket_address},
|
||||
TypedValue{status.alive}, TypedValue{status.is_main ? "main" : "replica"}};
|
||||
return {TypedValue{status.instance_name}, TypedValue{status.socket_address},
|
||||
TypedValue{status.is_alive}, TypedValue{status.replication_role}};
|
||||
});
|
||||
|
||||
return result;
|
||||
};
|
||||
return callback;
|
||||
#endif
|
||||
}
|
||||
case CoordinatorQuery::Action::DO_FAILOVER: {
|
||||
if (!license::global_license_checker.IsEnterpriseValidFast()) {
|
||||
throw QueryException("Trying to use enterprise feature without a valid license.");
|
||||
}
|
||||
#ifdef MG_ENTERPRISE
|
||||
if constexpr (!coordination::allow_ha) {
|
||||
throw QueryRuntimeException(
|
||||
"High availability is experimental feature. Please set MG_EXPERIMENTAL_HIGH_AVAILABILITY compile flag to "
|
||||
"be able to use this functionality.");
|
||||
}
|
||||
if (!FLAGS_coordinator) {
|
||||
throw QueryRuntimeException("Only coordinator can run DO FAILOVER!");
|
||||
}
|
||||
|
||||
callback.header = {"name", "socket_address", "alive", "role"};
|
||||
callback.fn = [handler = CoordQueryHandler{dbms_handler}]() mutable {
|
||||
handler.DoFailover();
|
||||
const auto main = handler.ShowMainOnCoordinator();
|
||||
const auto health_check_main = main ? handler.PingMainOnCoordinator() : std::nullopt;
|
||||
const auto result_status = handler.ShowMainReplicaStatus(
|
||||
handler.ShowReplicasOnCoordinator(), handler.PingReplicasOnCoordinator(), main, health_check_main);
|
||||
std::vector<std::vector<TypedValue>> result{};
|
||||
result.reserve(result_status.size());
|
||||
|
||||
std::ranges::transform(result_status, std::back_inserter(result),
|
||||
[](const auto &status) -> std::vector<TypedValue> {
|
||||
return {TypedValue{status.name}, TypedValue{status.socket_address},
|
||||
TypedValue{status.alive}, TypedValue{status.is_main ? "main" : "replica"}};
|
||||
});
|
||||
return result;
|
||||
};
|
||||
notifications->emplace_back(SeverityLevel::INFO, NotificationCode::DO_FAILOVER,
|
||||
"DO FAILOVER called on coordinator.");
|
||||
return callback;
|
||||
#endif
|
||||
}
|
||||
return callback;
|
||||
@ -3157,7 +3082,7 @@ PreparedQuery PrepareEdgeImportModeQuery(ParsedQuery parsed_query, CurrentDB &cu
|
||||
}
|
||||
|
||||
PreparedQuery PrepareCreateSnapshotQuery(ParsedQuery parsed_query, bool in_explicit_transaction, CurrentDB ¤t_db,
|
||||
replication::ReplicationRole replication_role) {
|
||||
replication_coordination_glue::ReplicationRole replication_role) {
|
||||
if (in_explicit_transaction) {
|
||||
throw CreateSnapshotInMulticommandTxException();
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include "coordination/coordinator_entity_info.hpp"
|
||||
#include "dbms/database.hpp"
|
||||
#include "dbms/dbms_handler.hpp"
|
||||
#include "memory/query_memory_control.hpp"
|
||||
@ -53,6 +52,10 @@
|
||||
#include "utils/timer.hpp"
|
||||
#include "utils/tsc.hpp"
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
#include "coordination/coordinator_instance_status.hpp"
|
||||
#endif
|
||||
|
||||
namespace memgraph::metrics {
|
||||
extern const Event FailedQuery;
|
||||
extern const Event FailedPrepare;
|
||||
@ -93,47 +96,27 @@ class CoordinatorQueryHandler {
|
||||
#ifdef MG_ENTERPRISE
|
||||
struct MainReplicaStatus {
|
||||
std::string_view name;
|
||||
std::string socket_address;
|
||||
std::string_view socket_address;
|
||||
bool alive;
|
||||
bool is_main;
|
||||
|
||||
MainReplicaStatus(std::string_view name, std::string socket_address, bool alive, bool is_main)
|
||||
: name{name}, socket_address{std::move(socket_address)}, alive{alive}, is_main{is_main} {}
|
||||
MainReplicaStatus(std::string_view name, std::string_view socket_address, bool alive, bool is_main)
|
||||
: name{name}, socket_address{socket_address}, alive{alive}, is_main{is_main} {}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual void RegisterReplicaCoordinatorServer(const std::string &replication_socket_address,
|
||||
const std::string &coordinator_socket_address,
|
||||
const std::chrono::seconds instance_check_frequency,
|
||||
const std::string &instance_name,
|
||||
virtual void RegisterInstance(const std::string &coordinator_socket_address,
|
||||
const std::string &replication_socket_address,
|
||||
const std::chrono::seconds instance_check_frequency, const std::string &instance_name,
|
||||
CoordinatorQuery::SyncMode sync_mode) = 0;
|
||||
virtual void RegisterMainCoordinatorServer(const std::string &socket_address,
|
||||
const std::chrono::seconds instance_check_frequency,
|
||||
const std::string &instance_name) = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual std::vector<coordination::CoordinatorEntityInfo> ShowReplicasOnCoordinator() const = 0;
|
||||
virtual void SetInstanceToMain(const std::string &instance_name) = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual std::optional<coordination::CoordinatorEntityInfo> ShowMainOnCoordinator() const = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual std::unordered_map<std::string_view, bool> PingReplicasOnCoordinator() const = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual std::optional<coordination::CoordinatorEntityHealthInfo> PingMainOnCoordinator() const = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual void DoFailover() const = 0;
|
||||
|
||||
/// @throw QueryRuntimeException if an error ocurred.
|
||||
virtual std::vector<MainReplicaStatus> ShowMainReplicaStatus(
|
||||
const std::vector<coordination::CoordinatorEntityInfo> &replicas,
|
||||
const std::unordered_map<std::string_view, bool> &health_check_replicas,
|
||||
const std::optional<coordination::CoordinatorEntityInfo> &main,
|
||||
const std::optional<coordination::CoordinatorEntityHealthInfo> &health_check_main) const = 0;
|
||||
virtual std::vector<coordination::CoordinatorInstanceStatus> ShowInstances() const = 0;
|
||||
|
||||
#endif
|
||||
};
|
||||
|
@ -69,8 +69,6 @@ constexpr std::string_view GetCodeString(const NotificationCode code) {
|
||||
#ifdef MG_ENTERPRISE
|
||||
case NotificationCode::REGISTER_COORDINATOR_SERVER:
|
||||
return "RegisterCoordinatorServer"sv;
|
||||
case NotificationCode::DO_FAILOVER:
|
||||
return "DoFailover"sv;
|
||||
#endif
|
||||
case NotificationCode::REPLICA_PORT_WARNING:
|
||||
return "ReplicaPortWarning"sv;
|
||||
|
@ -44,7 +44,6 @@ enum class NotificationCode : uint8_t {
|
||||
REGISTER_REPLICA,
|
||||
#ifdef MG_ENTERPRISE
|
||||
REGISTER_COORDINATOR_SERVER,
|
||||
DO_FAILOVER,
|
||||
#endif
|
||||
SET_REPLICA,
|
||||
START_STREAM,
|
||||
|
@ -5,7 +5,6 @@ target_sources(mg-replication
|
||||
include/replication/state.hpp
|
||||
include/replication/epoch.hpp
|
||||
include/replication/config.hpp
|
||||
include/replication/role.hpp
|
||||
include/replication/status.hpp
|
||||
include/replication/messages.hpp
|
||||
include/replication/replication_client.hpp
|
||||
|
@ -40,7 +40,7 @@ struct ReplicationClientConfig {
|
||||
friend bool operator==(const SSL &, const SSL &) = default;
|
||||
};
|
||||
|
||||
std::optional<SSL> ssl;
|
||||
std::optional<SSL> ssl{};
|
||||
|
||||
friend bool operator==(ReplicationClientConfig const &, ReplicationClientConfig const &) = default;
|
||||
};
|
||||
|
@ -15,8 +15,8 @@
|
||||
#include "replication/config.hpp"
|
||||
#include "replication/epoch.hpp"
|
||||
#include "replication/replication_client.hpp"
|
||||
#include "replication/role.hpp"
|
||||
#include "replication_coordination_glue/mode.hpp"
|
||||
#include "replication_coordination_glue/role.hpp"
|
||||
#include "replication_server.hpp"
|
||||
#include "status.hpp"
|
||||
#include "utils/result.hpp"
|
||||
@ -32,7 +32,8 @@ namespace memgraph::replication {
|
||||
|
||||
enum class RolePersisted : uint8_t { UNKNOWN_OR_NO, YES };
|
||||
|
||||
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, COULD_NOT_BE_PERSISTED, NOT_MAIN, SUCCESS };
|
||||
// TODO: (andi) Rename Error to Status
|
||||
enum class RegisterReplicaError : uint8_t { NAME_EXISTS, ENDPOINT_EXISTS, COULD_NOT_BE_PERSISTED, NOT_MAIN, SUCCESS };
|
||||
|
||||
struct RoleMainData {
|
||||
RoleMainData() = default;
|
||||
@ -72,12 +73,13 @@ struct ReplicationState {
|
||||
using FetchReplicationResult_t = utils::BasicResult<FetchReplicationError, ReplicationData_t>;
|
||||
auto FetchReplicationData() -> FetchReplicationResult_t;
|
||||
|
||||
auto GetRole() const -> ReplicationRole {
|
||||
return std::holds_alternative<RoleReplicaData>(replication_data_) ? ReplicationRole::REPLICA
|
||||
: ReplicationRole::MAIN;
|
||||
auto GetRole() const -> replication_coordination_glue::ReplicationRole {
|
||||
return std::holds_alternative<RoleReplicaData>(replication_data_)
|
||||
? replication_coordination_glue::ReplicationRole::REPLICA
|
||||
: replication_coordination_glue::ReplicationRole::MAIN;
|
||||
}
|
||||
bool IsMain() const { return GetRole() == ReplicationRole::MAIN; }
|
||||
bool IsReplica() const { return GetRole() == ReplicationRole::REPLICA; }
|
||||
bool IsMain() const { return GetRole() == replication_coordination_glue::ReplicationRole::MAIN; }
|
||||
bool IsReplica() const { return GetRole() == replication_coordination_glue::ReplicationRole::REPLICA; }
|
||||
|
||||
bool HasDurability() const { return nullptr != durability_; }
|
||||
|
||||
@ -92,7 +94,6 @@ struct ReplicationState {
|
||||
utils::BasicResult<RegisterReplicaError, ReplicationClient *> RegisterReplica(const ReplicationClientConfig &config);
|
||||
|
||||
bool SetReplicationRoleMain();
|
||||
|
||||
bool SetReplicationRoleReplica(const ReplicationServerConfig &config);
|
||||
|
||||
private:
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
#include "replication/config.hpp"
|
||||
#include "replication/epoch.hpp"
|
||||
#include "replication/role.hpp"
|
||||
#include "replication_coordination_glue/role.hpp"
|
||||
|
||||
namespace memgraph::replication::durability {
|
||||
|
||||
|
@ -260,7 +260,7 @@ utils::BasicResult<RegisterReplicaError, ReplicationClient *> ReplicationState::
|
||||
return std::any_of(replicas.begin(), replicas.end(), endpoint_matches);
|
||||
};
|
||||
if (endpoint_check(mainData.registered_replicas_)) {
|
||||
return RegisterReplicaError::END_POINT_EXISTS;
|
||||
return RegisterReplicaError::ENDPOINT_EXISTS;
|
||||
}
|
||||
|
||||
// Durability
|
||||
@ -279,4 +279,5 @@ utils::BasicResult<RegisterReplicaError, ReplicationClient *> ReplicationState::
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace memgraph::replication
|
||||
|
@ -29,12 +29,14 @@ constexpr auto *kVersion = "durability_version";
|
||||
|
||||
void to_json(nlohmann::json &j, const ReplicationRoleEntry &p) {
|
||||
auto processMAIN = [&](MainRole const &main) {
|
||||
j = nlohmann::json{{kVersion, p.version}, {kReplicationRole, ReplicationRole::MAIN}, {kEpoch, main.epoch.id()}};
|
||||
j = nlohmann::json{{kVersion, p.version},
|
||||
{kReplicationRole, replication_coordination_glue::ReplicationRole::MAIN},
|
||||
{kEpoch, main.epoch.id()}};
|
||||
};
|
||||
auto processREPLICA = [&](ReplicaRole const &replica) {
|
||||
j = nlohmann::json{
|
||||
{kVersion, p.version},
|
||||
{kReplicationRole, ReplicationRole::REPLICA},
|
||||
{kReplicationRole, replication_coordination_glue::ReplicationRole::REPLICA},
|
||||
{kIpAddress, replica.config.ip_address},
|
||||
{kPort, replica.config.port}
|
||||
// TODO: SSL
|
||||
@ -47,17 +49,17 @@ void from_json(const nlohmann::json &j, ReplicationRoleEntry &p) {
|
||||
// This value did not exist in V1, hence default DurabilityVersion::V1
|
||||
DurabilityVersion version = j.value(kVersion, DurabilityVersion::V1);
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
ReplicationRole role;
|
||||
replication_coordination_glue::ReplicationRole role;
|
||||
j.at(kReplicationRole).get_to(role);
|
||||
switch (role) {
|
||||
case ReplicationRole::MAIN: {
|
||||
case replication_coordination_glue::ReplicationRole::MAIN: {
|
||||
auto json_epoch = j.value(kEpoch, std::string{});
|
||||
auto epoch = ReplicationEpoch{};
|
||||
if (!json_epoch.empty()) epoch.SetEpoch(json_epoch);
|
||||
p = ReplicationRoleEntry{.version = version, .role = MainRole{.epoch = std::move(epoch)}};
|
||||
break;
|
||||
}
|
||||
case ReplicationRole::REPLICA: {
|
||||
case memgraph::replication_coordination_glue::ReplicationRole::REPLICA: {
|
||||
std::string ip_address;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
uint16_t port;
|
||||
|
@ -5,6 +5,7 @@ target_sources(mg-repl_coord_glue
|
||||
PUBLIC
|
||||
messages.hpp
|
||||
mode.hpp
|
||||
role.hpp
|
||||
|
||||
PRIVATE
|
||||
messages.cpp
|
||||
|
@ -12,8 +12,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
namespace memgraph::replication {
|
||||
namespace memgraph::replication_coordination_glue {
|
||||
|
||||
// TODO: figure out a way of ensuring that usage of this type is never uninitialed/defaulted incorrectly to MAIN
|
||||
enum class ReplicationRole : uint8_t { MAIN, REPLICA };
|
||||
} // namespace memgraph::replication
|
||||
} // namespace memgraph::replication_coordination_glue
|
@ -288,7 +288,8 @@ DiskStorage::~DiskStorage() {
|
||||
|
||||
DiskStorage::DiskAccessor::DiskAccessor(auto tag, DiskStorage *storage, IsolationLevel isolation_level,
|
||||
StorageMode storage_mode)
|
||||
: Accessor(tag, storage, isolation_level, storage_mode, memgraph::replication::ReplicationRole::MAIN) {
|
||||
: Accessor(tag, storage, isolation_level, storage_mode,
|
||||
memgraph::replication_coordination_glue::ReplicationRole::MAIN) {
|
||||
rocksdb::WriteOptions write_options;
|
||||
auto txOptions = rocksdb::TransactionOptions{.set_snapshot = true};
|
||||
transaction_.disk_transaction_ = storage->kvstore_->db_->BeginTransaction(write_options, txOptions);
|
||||
@ -837,7 +838,8 @@ StorageInfo DiskStorage::GetBaseInfo(bool /* unused */) {
|
||||
return info;
|
||||
}
|
||||
|
||||
StorageInfo DiskStorage::GetInfo(bool force_dir, memgraph::replication::ReplicationRole replication_role) {
|
||||
StorageInfo DiskStorage::GetInfo(bool force_dir,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
StorageInfo info = GetBaseInfo(force_dir);
|
||||
{
|
||||
auto access = Access(replication_role);
|
||||
@ -2007,7 +2009,7 @@ UniqueConstraints::DeletionStatus DiskStorage::DiskAccessor::DropUniqueConstrain
|
||||
}
|
||||
|
||||
Transaction DiskStorage::CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole /*is_main*/) {
|
||||
memgraph::replication_coordination_glue::ReplicationRole /*is_main*/) {
|
||||
/// We acquire the transaction engine lock here because we access (and
|
||||
/// modify) the transaction engine variables (`transaction_id` and
|
||||
/// `timestamp`) below.
|
||||
@ -2032,7 +2034,8 @@ uint64_t DiskStorage::CommitTimestamp(const std::optional<uint64_t> desired_comm
|
||||
return *desired_commit_timestamp;
|
||||
}
|
||||
|
||||
std::unique_ptr<Storage::Accessor> DiskStorage::Access(memgraph::replication::ReplicationRole /*replication_role*/,
|
||||
std::unique_ptr<Storage::Accessor> DiskStorage::Access(
|
||||
memgraph::replication_coordination_glue::ReplicationRole /*replication_role*/,
|
||||
std::optional<IsolationLevel> override_isolation_level) {
|
||||
auto isolation_level = override_isolation_level.value_or(isolation_level_);
|
||||
if (isolation_level != IsolationLevel::SNAPSHOT_ISOLATION) {
|
||||
@ -2042,7 +2045,7 @@ std::unique_ptr<Storage::Accessor> DiskStorage::Access(memgraph::replication::Re
|
||||
new DiskAccessor{Storage::Accessor::shared_access, this, isolation_level, storage_mode_});
|
||||
}
|
||||
std::unique_ptr<Storage::Accessor> DiskStorage::UniqueAccess(
|
||||
memgraph::replication::ReplicationRole /*replication_role*/,
|
||||
memgraph::replication_coordination_glue::ReplicationRole /*replication_role*/,
|
||||
std::optional<IsolationLevel> override_isolation_level) {
|
||||
auto isolation_level = override_isolation_level.value_or(isolation_level_);
|
||||
if (isolation_level != IsolationLevel::SNAPSHOT_ISOLATION) {
|
||||
|
@ -176,11 +176,11 @@ class DiskStorage final : public Storage {
|
||||
};
|
||||
|
||||
using Storage::Access;
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication::ReplicationRole replication_role,
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) override;
|
||||
|
||||
using Storage::UniqueAccess;
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication::ReplicationRole replication_role,
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) override;
|
||||
|
||||
/// Flushing methods
|
||||
@ -285,7 +285,7 @@ class DiskStorage final : public Storage {
|
||||
RocksDBStorage *GetRocksDBStorage() const { return kvstore_.get(); }
|
||||
|
||||
Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role) override;
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
|
||||
|
||||
void SetEdgeImportMode(EdgeImportMode edge_import_status);
|
||||
|
||||
@ -308,7 +308,8 @@ class DiskStorage final : public Storage {
|
||||
PropertyId property);
|
||||
|
||||
StorageInfo GetBaseInfo(bool force_directory) override;
|
||||
StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) override;
|
||||
StorageInfo GetInfo(bool force_directory,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
|
||||
|
||||
void FreeMemory(std::unique_lock<utils::ResourceLock> /*lock*/) override {}
|
||||
|
||||
|
@ -178,7 +178,7 @@ InMemoryStorage::~InMemoryStorage() {
|
||||
|
||||
InMemoryStorage::InMemoryAccessor::InMemoryAccessor(auto tag, InMemoryStorage *storage, IsolationLevel isolation_level,
|
||||
StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role)
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role)
|
||||
: Accessor(tag, storage, isolation_level, storage_mode, replication_role),
|
||||
config_(storage->config_.salient.items) {}
|
||||
InMemoryStorage::InMemoryAccessor::InMemoryAccessor(InMemoryAccessor &&other) noexcept
|
||||
@ -1280,8 +1280,9 @@ VerticesIterable InMemoryStorage::InMemoryAccessor::Vertices(
|
||||
mem_label_property_index->Vertices(label, property, lower_bound, upper_bound, view, storage_, &transaction_));
|
||||
}
|
||||
|
||||
Transaction InMemoryStorage::CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role) {
|
||||
Transaction InMemoryStorage::CreateTransaction(
|
||||
IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
// We acquire the transaction engine lock here because we access (and
|
||||
// modify) the transaction engine variables (`transaction_id` and
|
||||
// `timestamp`) below.
|
||||
@ -1296,7 +1297,7 @@ Transaction InMemoryStorage::CreateTransaction(IsolationLevel isolation_level, S
|
||||
// of any query on replica to the last commited transaction
|
||||
// which is timestamp_ as only commit of transaction with writes
|
||||
// can change the value of it.
|
||||
if (replication_role == memgraph::replication::ReplicationRole::MAIN) {
|
||||
if (replication_role == memgraph::replication_coordination_glue::ReplicationRole::MAIN) {
|
||||
start_timestamp = timestamp_++;
|
||||
} else {
|
||||
start_timestamp = timestamp_;
|
||||
@ -1683,7 +1684,8 @@ StorageInfo InMemoryStorage::GetBaseInfo(bool force_directory) {
|
||||
return info;
|
||||
}
|
||||
|
||||
StorageInfo InMemoryStorage::GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) {
|
||||
StorageInfo InMemoryStorage::GetInfo(bool force_directory,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
StorageInfo info = GetBaseInfo(force_directory);
|
||||
{
|
||||
auto access = Access(replication_role); // TODO: override isolation level?
|
||||
@ -2004,15 +2006,15 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
|
||||
}
|
||||
|
||||
utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot(
|
||||
memgraph::replication::ReplicationRole replication_role) {
|
||||
if (replication_role == memgraph::replication::ReplicationRole::REPLICA) {
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
if (replication_role == memgraph::replication_coordination_glue::ReplicationRole::REPLICA) {
|
||||
return InMemoryStorage::CreateSnapshotError::DisabledForReplica;
|
||||
}
|
||||
auto const &epoch = repl_storage_state_.epoch_;
|
||||
auto snapshot_creator = [this, &epoch]() {
|
||||
utils::Timer timer;
|
||||
auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION, storage_mode_,
|
||||
memgraph::replication::ReplicationRole::MAIN);
|
||||
memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
durability::CreateSnapshot(this, &transaction, recovery_.snapshot_directory_, recovery_.wal_directory_, &vertices_,
|
||||
&edges_, uuid_, epoch, repl_storage_state_.history, &file_retainer_);
|
||||
// Finalize snapshot transaction.
|
||||
@ -2100,14 +2102,16 @@ utils::FileRetainer::FileLockerAccessor::ret_type InMemoryStorage::UnlockPath()
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<Storage::Accessor> InMemoryStorage::Access(memgraph::replication::ReplicationRole replication_role,
|
||||
std::unique_ptr<Storage::Accessor> InMemoryStorage::Access(
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) {
|
||||
return std::unique_ptr<InMemoryAccessor>(new InMemoryAccessor{Storage::Accessor::shared_access, this,
|
||||
override_isolation_level.value_or(isolation_level_),
|
||||
storage_mode_, replication_role});
|
||||
}
|
||||
std::unique_ptr<Storage::Accessor> InMemoryStorage::UniqueAccess(
|
||||
memgraph::replication::ReplicationRole replication_role, std::optional<IsolationLevel> override_isolation_level) {
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) {
|
||||
return std::unique_ptr<InMemoryAccessor>(new InMemoryAccessor{Storage::Accessor::unique_access, this,
|
||||
override_isolation_level.value_or(isolation_level_),
|
||||
storage_mode_, replication_role});
|
||||
|
@ -73,7 +73,8 @@ class InMemoryStorage final : public Storage {
|
||||
friend class InMemoryStorage;
|
||||
|
||||
explicit InMemoryAccessor(auto tag, InMemoryStorage *storage, IsolationLevel isolation_level,
|
||||
StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role);
|
||||
StorageMode storage_mode,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role);
|
||||
|
||||
public:
|
||||
InMemoryAccessor(const InMemoryAccessor &) = delete;
|
||||
@ -322,10 +323,10 @@ class InMemoryStorage final : public Storage {
|
||||
};
|
||||
|
||||
using Storage::Access;
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication::ReplicationRole replication_role,
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) override;
|
||||
using Storage::UniqueAccess;
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication::ReplicationRole replication_role,
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) override;
|
||||
|
||||
void FreeMemory(std::unique_lock<utils::ResourceLock> main_guard) override;
|
||||
@ -335,12 +336,12 @@ class InMemoryStorage final : public Storage {
|
||||
utils::FileRetainer::FileLockerAccessor::ret_type UnlockPath();
|
||||
|
||||
utils::BasicResult<InMemoryStorage::CreateSnapshotError> CreateSnapshot(
|
||||
memgraph::replication::ReplicationRole replication_role);
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role);
|
||||
|
||||
void CreateSnapshotHandler(std::function<utils::BasicResult<InMemoryStorage::CreateSnapshotError>()> cb);
|
||||
|
||||
Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role) override;
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
|
||||
|
||||
void SetStorageMode(StorageMode storage_mode);
|
||||
|
||||
@ -365,7 +366,8 @@ class InMemoryStorage final : public Storage {
|
||||
void FinalizeWalFile();
|
||||
|
||||
StorageInfo GetBaseInfo(bool force_directory) override;
|
||||
StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) override;
|
||||
StorageInfo GetInfo(bool force_directory,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
|
||||
|
||||
/// Return true in all cases excepted if any sync replicas have not sent confirmation.
|
||||
[[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp,
|
||||
|
@ -67,6 +67,7 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce
|
||||
"now hold unique data. Please resolve data conflicts and start the "
|
||||
"replication on a clean instance.",
|
||||
client_.name_, client_.name_, client_.name_);
|
||||
// TODO: (andi) Talk about renaming MAYBE_BEHIND to branching
|
||||
// State not updated, hence in MAYBE_BEHIND state
|
||||
return;
|
||||
}
|
||||
|
@ -49,7 +49,8 @@ Storage::Storage(Config config, StorageMode storage_mode)
|
||||
}
|
||||
|
||||
Storage::Accessor::Accessor(SharedAccess /* tag */, Storage *storage, IsolationLevel isolation_level,
|
||||
StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role)
|
||||
StorageMode storage_mode,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role)
|
||||
: storage_(storage),
|
||||
// The lock must be acquired before creating the transaction object to
|
||||
// prevent freshly created transactions from dangling in an active state
|
||||
@ -61,7 +62,8 @@ Storage::Accessor::Accessor(SharedAccess /* tag */, Storage *storage, IsolationL
|
||||
creation_storage_mode_(storage_mode) {}
|
||||
|
||||
Storage::Accessor::Accessor(UniqueAccess /* tag */, Storage *storage, IsolationLevel isolation_level,
|
||||
StorageMode storage_mode, memgraph::replication::ReplicationRole replication_role)
|
||||
StorageMode storage_mode,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role)
|
||||
: storage_(storage),
|
||||
// The lock must be acquired before creating the transaction object to
|
||||
// prevent freshly created transactions from dangling in an active state
|
||||
|
@ -145,9 +145,9 @@ class Storage {
|
||||
} unique_access;
|
||||
|
||||
Accessor(SharedAccess /* tag */, Storage *storage, IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role);
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role);
|
||||
Accessor(UniqueAccess /* tag */, Storage *storage, IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role);
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role);
|
||||
Accessor(const Accessor &) = delete;
|
||||
Accessor &operator=(const Accessor &) = delete;
|
||||
Accessor &operator=(Accessor &&other) = delete;
|
||||
@ -328,16 +328,17 @@ class Storage {
|
||||
|
||||
void FreeMemory() { FreeMemory({}); }
|
||||
|
||||
virtual std::unique_ptr<Accessor> Access(memgraph::replication::ReplicationRole replication_role,
|
||||
virtual std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) = 0;
|
||||
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication::ReplicationRole replication_role) {
|
||||
std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
return Access(replication_role, {});
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<Accessor> UniqueAccess(memgraph::replication::ReplicationRole replication_role,
|
||||
virtual std::unique_ptr<Accessor> UniqueAccess(
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role,
|
||||
std::optional<IsolationLevel> override_isolation_level) = 0;
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication::ReplicationRole replication_role) {
|
||||
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
return UniqueAccess(replication_role, {});
|
||||
}
|
||||
|
||||
@ -356,10 +357,11 @@ class Storage {
|
||||
return GetBaseInfo(force_dir);
|
||||
}
|
||||
|
||||
virtual StorageInfo GetInfo(bool force_directory, memgraph::replication::ReplicationRole replication_role) = 0;
|
||||
virtual StorageInfo GetInfo(bool force_directory,
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) = 0;
|
||||
|
||||
virtual Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode,
|
||||
memgraph::replication::ReplicationRole replication_role) = 0;
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) = 0;
|
||||
|
||||
virtual void PrepareForNewEpoch() = 0;
|
||||
|
||||
|
@ -32,7 +32,7 @@ namespace memgraph::utils {
|
||||
* void long_function() {
|
||||
* resource.enable();
|
||||
* OnScopeExit on_exit([&resource] { resource.disable(); });
|
||||
* // long block of code, might trow an exception
|
||||
* // long block of code, might throw an exception
|
||||
* }
|
||||
*/
|
||||
template <typename Callable>
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -58,28 +58,40 @@ class Scheduler {
|
||||
// the start of the program. Since Server will log some messages on
|
||||
// the program start we let him log first and we make sure by first
|
||||
// waiting that funcion f will not log before it.
|
||||
// Check for pause also.
|
||||
std::unique_lock<std::mutex> lk(mutex_);
|
||||
auto now = std::chrono::system_clock::now();
|
||||
start_time += pause;
|
||||
if (start_time > now) {
|
||||
condition_variable_.wait_until(lk, start_time, [&] { return is_working_.load() == false; });
|
||||
condition_variable_.wait_until(lk, start_time, [&] { return !is_working_.load(); });
|
||||
} else {
|
||||
start_time = now;
|
||||
}
|
||||
|
||||
pause_cv_.wait(lk, [&] { return !is_paused_.load(); });
|
||||
|
||||
if (!is_working_) break;
|
||||
f();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void Resume() {
|
||||
is_paused_.store(false);
|
||||
pause_cv_.notify_one();
|
||||
}
|
||||
|
||||
void Pause() { is_paused_.store(true); }
|
||||
|
||||
/**
|
||||
* @brief Stops the thread execution. This is a blocking call and may take as
|
||||
* much time as one call to the function given previously to Run takes.
|
||||
* @throw std::system_error
|
||||
*/
|
||||
void Stop() {
|
||||
is_paused_.store(false);
|
||||
is_working_.store(false);
|
||||
pause_cv_.notify_one();
|
||||
condition_variable_.notify_one();
|
||||
if (thread_.joinable()) thread_.join();
|
||||
}
|
||||
@ -97,6 +109,16 @@ class Scheduler {
|
||||
*/
|
||||
std::atomic<bool> is_working_{false};
|
||||
|
||||
/**
|
||||
* Variable is true when thread is paused.
|
||||
*/
|
||||
std::atomic<bool> is_paused_{false};
|
||||
|
||||
/*
|
||||
* Wait until the thread is resumed.
|
||||
*/
|
||||
std::condition_variable pause_cv_;
|
||||
|
||||
/**
|
||||
* Mutex used to synchronize threads using condition variable.
|
||||
*/
|
||||
|
@ -97,6 +97,8 @@ enum class TypeId : uint64_t {
|
||||
// Coordinator
|
||||
COORD_FAILOVER_REQ,
|
||||
COORD_FAILOVER_RES,
|
||||
COORD_SET_REPL_MAIN_REQ,
|
||||
COORD_SET_REPL_MAIN_RES,
|
||||
|
||||
// AST
|
||||
AST_LABELIX = 3000,
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "storage/v2/storage.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
// The following classes are wrappers for memgraph::utils::MemoryResource, so that we can
|
||||
// use BENCHMARK_TEMPLATE
|
||||
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include "query/interpreter.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
// The following classes are wrappers for memgraph::utils::MemoryResource, so that we can
|
||||
// use BENCHMARK_TEMPLATE
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "query/plan/vertex_count_cache.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
// Add chained MATCH (node1) -- (node2), MATCH (node2) -- (node3) ... clauses.
|
||||
static memgraph::query::CypherQuery *AddChainedMatches(int num_matches, memgraph::query::AstStorage &storage) {
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include "storage/v2/storage.hpp"
|
||||
#include "utils/timer.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
// This benchmark should be run for a fixed amount of time that is
|
||||
// large compared to GC interval to make the output relevant.
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include "storage/v2/storage.hpp"
|
||||
#include "utils/timer.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
// This benchmark should be run for a fixed amount of time that is
|
||||
// large compared to GC interval to make the output relevant.
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "storage/v2/storage_error.hpp"
|
||||
#include "utils/thread.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
const uint64_t kNumVerifiers = 5;
|
||||
const uint64_t kNumMutators = 1;
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "storage/v2/constraints/constraints.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
const int kNumThreads = 8;
|
||||
|
||||
|
@ -1,10 +1,8 @@
|
||||
find_package(gflags REQUIRED)
|
||||
|
||||
copy_e2e_python_files(ha_experimental coordinator.py)
|
||||
copy_e2e_python_files(ha_experimental client_initiated_failover.py)
|
||||
copy_e2e_python_files(ha_experimental uninitialized_cluster.py)
|
||||
copy_e2e_python_files(ha_experimental automatic_failover.py)
|
||||
copy_e2e_python_files(ha_experimental common.py)
|
||||
copy_e2e_python_files(ha_experimental conftest.py)
|
||||
copy_e2e_python_files(ha_experimental workloads.yaml)
|
||||
|
||||
copy_e2e_python_files_from_parent_folder(ha_experimental ".." memgraph.py)
|
||||
|
337
tests/e2e/high_availability_experimental/automatic_failover.py
Normal file
337
tests/e2e/high_availability_experimental/automatic_failover.py
Normal file
@ -0,0 +1,337 @@
|
||||
# Copyright 2022 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import interactive_mg_runner
|
||||
import pytest
|
||||
from common import connect, execute_and_fetch_all, safe_execute
|
||||
from mg_utils import mg_sleep_and_assert
|
||||
|
||||
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
interactive_mg_runner.PROJECT_DIR = os.path.normpath(
|
||||
os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..")
|
||||
)
|
||||
interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build"))
|
||||
interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph"))
|
||||
|
||||
TEMP_DIR = tempfile.TemporaryDirectory().name
|
||||
|
||||
MEMGRAPH_INSTANCES_DESCRIPTION = {
|
||||
"instance_1": {
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7688",
|
||||
"--log-level",
|
||||
"TRACE",
|
||||
"--coordinator-server-port",
|
||||
"10011",
|
||||
"--replication-restore-state-on-startup",
|
||||
],
|
||||
"log_file": "instance_1.log",
|
||||
"data_directory": f"{TEMP_DIR}/instance_1",
|
||||
"setup_queries": [],
|
||||
},
|
||||
"instance_2": {
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7689",
|
||||
"--log-level",
|
||||
"TRACE",
|
||||
"--coordinator-server-port",
|
||||
"10012",
|
||||
"--replication-restore-state-on-startup",
|
||||
],
|
||||
"log_file": "instance_2.log",
|
||||
"data_directory": f"{TEMP_DIR}/instance_2",
|
||||
"setup_queries": [],
|
||||
},
|
||||
"instance_3": {
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level",
|
||||
"TRACE",
|
||||
"--coordinator-server-port",
|
||||
"10013",
|
||||
"--replication-restore-state-on-startup",
|
||||
],
|
||||
"log_file": "instance_3.log",
|
||||
"data_directory": f"{TEMP_DIR}/instance_3",
|
||||
"setup_queries": [],
|
||||
},
|
||||
"coordinator": {
|
||||
"args": ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"],
|
||||
"log_file": "coordinator.log",
|
||||
"setup_queries": [
|
||||
"REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';",
|
||||
"REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';",
|
||||
"REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';",
|
||||
"SET INSTANCE instance_3 TO MAIN",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_show_replication_cluster():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
instance1_cursor = connect(host="localhost", port=7688).cursor()
|
||||
instance2_cursor = connect(host="localhost", port=7689).cursor()
|
||||
instance3_cursor = connect(host="localhost", port=7687).cursor()
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def show_repl_cluster():
|
||||
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data = [
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data, show_repl_cluster)
|
||||
|
||||
def retrieve_data_show_repl_role_instance1():
|
||||
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
def retrieve_data_show_repl_role_instance2():
|
||||
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
def retrieve_data_show_repl_role_instance3():
|
||||
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1)
|
||||
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
|
||||
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
|
||||
expected_data = [
|
||||
("instance_1", "127.0.0.1:10011", False, "unknown"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data, show_repl_cluster)
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
|
||||
|
||||
expected_data = [
|
||||
("instance_1", "127.0.0.1:10011", False, "unknown"),
|
||||
("instance_2", "127.0.0.1:10012", False, "unknown"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data, show_repl_cluster)
|
||||
|
||||
|
||||
def test_simple_automatic_failover():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
main_cursor = connect(host="localhost", port=7687).cursor()
|
||||
expected_data_on_main = [
|
||||
("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"),
|
||||
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
|
||||
]
|
||||
actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;")))
|
||||
assert actual_data_on_main == expected_data_on_main
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def retrieve_data_show_repl_cluster():
|
||||
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data_on_coord = [
|
||||
("instance_1", "127.0.0.1:10011", True, "main"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", False, "unknown"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
|
||||
|
||||
new_main_cursor = connect(host="localhost", port=7688).cursor()
|
||||
|
||||
def retrieve_data_show_replicas():
|
||||
return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;")))
|
||||
|
||||
expected_data_on_new_main = [
|
||||
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
|
||||
|
||||
|
||||
def test_registering_replica_fails_name_exists():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(
|
||||
coord_cursor,
|
||||
"REGISTER INSTANCE instance_1 ON '127.0.0.1:10051' WITH '127.0.0.1:10111';",
|
||||
)
|
||||
assert str(e.value) == "Couldn't register replica instance since instance with such name already exists!"
|
||||
shutil.rmtree(TEMP_DIR)
|
||||
|
||||
|
||||
def test_registering_replica_fails_endpoint_exists():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(
|
||||
coord_cursor,
|
||||
"REGISTER INSTANCE instance_5 ON '127.0.0.1:10011' WITH '127.0.0.1:10005';",
|
||||
)
|
||||
assert str(e.value) == "Couldn't register replica instance since instance with such endpoint already exists!"
|
||||
|
||||
|
||||
def test_replica_instance_restarts():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def show_repl_cluster():
|
||||
return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data_up = [
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
|
||||
expected_data_down = [
|
||||
("instance_1", "127.0.0.1:10011", False, "unknown"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_down, show_repl_cluster)
|
||||
|
||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
|
||||
mg_sleep_and_assert(expected_data_up, show_repl_cluster)
|
||||
|
||||
instance1_cursor = connect(host="localhost", port=7688).cursor()
|
||||
|
||||
def retrieve_data_show_repl_role_instance1():
|
||||
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
expected_data_replica = [("replica",)]
|
||||
mg_sleep_and_assert(expected_data_replica, retrieve_data_show_repl_role_instance1)
|
||||
|
||||
|
||||
def test_automatic_failover_main_back_as_replica():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def retrieve_data_show_repl_cluster():
|
||||
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data_after_failover = [
|
||||
("instance_1", "127.0.0.1:10011", True, "main"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", False, "unknown"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_after_failover, retrieve_data_show_repl_cluster)
|
||||
|
||||
expected_data_after_main_coming_back = [
|
||||
("instance_1", "127.0.0.1:10011", True, "main"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "replica"),
|
||||
]
|
||||
|
||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
mg_sleep_and_assert(expected_data_after_main_coming_back, retrieve_data_show_repl_cluster)
|
||||
|
||||
instance3_cursor = connect(host="localhost", port=7687).cursor()
|
||||
|
||||
def retrieve_data_show_repl_role_instance3():
|
||||
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance3)
|
||||
|
||||
|
||||
def test_automatic_failover_main_back_as_main():
|
||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
|
||||
coord_cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def retrieve_data_show_repl_cluster():
|
||||
return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data_all_down = [
|
||||
("instance_1", "127.0.0.1:10011", False, "unknown"),
|
||||
("instance_2", "127.0.0.1:10012", False, "unknown"),
|
||||
("instance_3", "127.0.0.1:10013", False, "unknown"),
|
||||
]
|
||||
|
||||
mg_sleep_and_assert(expected_data_all_down, retrieve_data_show_repl_cluster)
|
||||
|
||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
expected_data_main_back = [
|
||||
("instance_1", "127.0.0.1:10011", False, "unknown"),
|
||||
("instance_2", "127.0.0.1:10012", False, "unknown"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data_main_back, retrieve_data_show_repl_cluster)
|
||||
|
||||
instance3_cursor = connect(host="localhost", port=7687).cursor()
|
||||
|
||||
def retrieve_data_show_repl_role_instance3():
|
||||
return sorted(list(execute_and_fetch_all(instance3_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
|
||||
|
||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
|
||||
|
||||
expected_data_replicas_back = [
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
|
||||
mg_sleep_and_assert(expected_data_replicas_back, retrieve_data_show_repl_cluster)
|
||||
|
||||
instance1_cursor = connect(host="localhost", port=7688).cursor()
|
||||
instance2_cursor = connect(host="localhost", port=7689).cursor()
|
||||
|
||||
def retrieve_data_show_repl_role_instance1():
|
||||
return sorted(list(execute_and_fetch_all(instance1_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
def retrieve_data_show_repl_role_instance2():
|
||||
return sorted(list(execute_and_fetch_all(instance2_cursor, "SHOW REPLICATION ROLE;")))
|
||||
|
||||
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance1)
|
||||
mg_sleep_and_assert([("replica",)], retrieve_data_show_repl_role_instance2)
|
||||
mg_sleep_and_assert([("main",)], retrieve_data_show_repl_role_instance3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
@ -1,224 +0,0 @@
|
||||
# Copyright 2022 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import interactive_mg_runner
|
||||
import pytest
|
||||
from common import execute_and_fetch_all
|
||||
from mg_utils import mg_sleep_and_assert
|
||||
|
||||
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
interactive_mg_runner.PROJECT_DIR = os.path.normpath(
|
||||
os.path.join(interactive_mg_runner.SCRIPT_DIR, "..", "..", "..", "..")
|
||||
)
|
||||
interactive_mg_runner.BUILD_DIR = os.path.normpath(os.path.join(interactive_mg_runner.PROJECT_DIR, "build"))
|
||||
interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactive_mg_runner.BUILD_DIR, "memgraph"))
|
||||
|
||||
MEMGRAPH_INSTANCES_DESCRIPTION = {
|
||||
"instance_1": {
|
||||
"args": ["--bolt-port", "7688", "--log-level", "TRACE", "--coordinator-server-port", "10011"],
|
||||
"log_file": "replica1.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
|
||||
},
|
||||
"instance_2": {
|
||||
"args": ["--bolt-port", "7689", "--log-level", "TRACE", "--coordinator-server-port", "10012"],
|
||||
"log_file": "replica2.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
|
||||
},
|
||||
"instance_3": {
|
||||
"args": ["--bolt-port", "7687", "--log-level", "TRACE", "--coordinator-server-port", "10013"],
|
||||
"log_file": "main.log",
|
||||
"setup_queries": [
|
||||
"REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001'",
|
||||
"REGISTER REPLICA instance_2 SYNC TO '127.0.0.1:10002'",
|
||||
],
|
||||
},
|
||||
"coordinator": {
|
||||
"args": ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"],
|
||||
"log_file": "replica3.log",
|
||||
"setup_queries": [
|
||||
"REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011';",
|
||||
"REGISTER REPLICA instance_2 SYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012';",
|
||||
"REGISTER MAIN instance_3 WITH COORDINATOR SERVER ON '127.0.0.1:10013';",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_show_replication_cluster(connection):
|
||||
# Goal of this test is to check the SHOW REPLICATION CLUSTER command.
|
||||
# 1. We start all replicas, main and coordinator manually: we want to be able to kill them ourselves without relying on external tooling to kill processes.
|
||||
# 2. We check that all replicas and main have the correct state: they should all be alive.
|
||||
# 3. We kill one replica. It should not appear anymore in the SHOW REPLICATION CLUSTER command.
|
||||
# 4. We kill main. It should not appear anymore in the SHOW REPLICATION CLUSTER command.
|
||||
|
||||
# 1.
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
|
||||
# 2.
|
||||
|
||||
# We leave some time for the coordinator to realise the replicas are down.
|
||||
def retrieve_data():
|
||||
return set(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;"))
|
||||
|
||||
expected_data = {
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data, retrieve_data)
|
||||
|
||||
# 3.
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
|
||||
expected_data = {
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
("instance_1", "127.0.0.1:10011", False, "replica"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data, retrieve_data)
|
||||
|
||||
# 4.
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
|
||||
expected_data = {
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_1", "127.0.0.1:10011", False, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", False, "main"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data, retrieve_data)
|
||||
|
||||
|
||||
def test_simple_client_initiated_failover(connection):
|
||||
# 1. Start all instances
|
||||
# 2. Kill main
|
||||
# 3. Run DO FAILOVER on COORDINATOR
|
||||
# 4. Assert new config on coordinator by running show replication cluster
|
||||
# 5. Assert replicas on new main
|
||||
|
||||
# 1.
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
# 2.
|
||||
main_cursor = connection(7687, "instance_3").cursor()
|
||||
expected_data_on_main = {
|
||||
("instance_1", "127.0.0.1:10001", "sync", 0, 0, "ready"),
|
||||
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
|
||||
}
|
||||
actual_data_on_main = set(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))
|
||||
assert actual_data_on_main == expected_data_on_main
|
||||
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
coord_cursor = connection(7690, "coordinator").cursor()
|
||||
|
||||
def retrieve_data_show_repl_cluster():
|
||||
return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;"))
|
||||
|
||||
expected_data_on_coord = {
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", False, "main"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
|
||||
|
||||
# 3.
|
||||
execute_and_fetch_all(coord_cursor, "DO FAILOVER")
|
||||
|
||||
expected_data_on_coord = {
|
||||
("instance_1", "127.0.0.1:10011", True, "main"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
|
||||
|
||||
# 4.
|
||||
new_main_cursor = connection(7688, "instance_1").cursor()
|
||||
|
||||
def retrieve_data_show_replicas():
|
||||
return set(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))
|
||||
|
||||
expected_data_on_new_main = {
|
||||
("instance_2", "127.0.0.1:10002", "sync", 0, 0, "ready"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas)
|
||||
|
||||
|
||||
def test_failover_fails_all_replicas_down(connection):
|
||||
# 1. Start all instances
|
||||
# 2. Kill all replicas
|
||||
# 3. Kill main
|
||||
# 4. Run DO FAILOVER on COORDINATOR. Assert exception is being thrown due to all replicas being down
|
||||
# 5. Assert cluster status didn't change
|
||||
|
||||
# 1.
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
# 2.
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
|
||||
|
||||
# 3.
|
||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
|
||||
|
||||
coord_cursor = connection(7690, "coordinator").cursor()
|
||||
# 4.
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(coord_cursor, "DO FAILOVER;")
|
||||
assert str(e.value) == "Failover aborted since all replicas are down!"
|
||||
|
||||
# 5.
|
||||
|
||||
def retrieve_data():
|
||||
return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;"))
|
||||
|
||||
expected_data_on_coord = {
|
||||
("instance_1", "127.0.0.1:10011", False, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", False, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", False, "main"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data)
|
||||
|
||||
|
||||
def test_failover_fails_main_is_alive(connection):
|
||||
# 1. Start all instances
|
||||
# 2. Run DO FAILOVER on COORDINATOR. Assert exception is being thrown due to main is still live.
|
||||
# 3. Assert cluster status didn't change
|
||||
|
||||
# 1.
|
||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||
|
||||
# 2.
|
||||
coord_cursor = connection(7690, "coordinator").cursor()
|
||||
|
||||
def retrieve_data():
|
||||
return set(execute_and_fetch_all(coord_cursor, "SHOW REPLICATION CLUSTER;"))
|
||||
|
||||
expected_data_on_coord = {
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
}
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data)
|
||||
|
||||
# 4.
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(coord_cursor, "DO FAILOVER;")
|
||||
assert str(e.value) == "Failover aborted since main is alive!"
|
||||
|
||||
# 5.
|
||||
mg_sleep_and_assert(expected_data_on_coord, retrieve_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
@ -23,3 +23,10 @@ def connect(**kwargs) -> mgclient.Connection:
|
||||
connection = mgclient.connect(**kwargs)
|
||||
connection.autocommit = True
|
||||
return connection
|
||||
|
||||
|
||||
def safe_execute(function, *args):
|
||||
try:
|
||||
function(*args)
|
||||
except:
|
||||
pass
|
||||
|
@ -1,43 +0,0 @@
|
||||
# Copyright 2022 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import pytest
|
||||
from common import connect, execute_and_fetch_all
|
||||
|
||||
|
||||
# The fixture here is more complex because the connection has to be
|
||||
# parameterized based on the test parameters (info has to be available on both
|
||||
# sides).
|
||||
#
|
||||
# https://docs.pytest.org/en/latest/example/parametrize.html#indirect-parametrization
|
||||
# is not an elegant/feasible solution here.
|
||||
#
|
||||
# The solution was independently developed and then I stumbled upon the same
|
||||
# approach here https://stackoverflow.com/a/68286553/4888809 which I think is
|
||||
# optimal.
|
||||
@pytest.fixture(scope="function")
|
||||
def connection():
|
||||
connection_holder = None
|
||||
role_holder = None
|
||||
|
||||
def inner_connection(port, role):
|
||||
nonlocal connection_holder, role_holder
|
||||
connection_holder = connect(host="localhost", port=port)
|
||||
role_holder = role
|
||||
return connection_holder
|
||||
|
||||
yield inner_connection
|
||||
|
||||
# Only main instance can be cleaned up because replicas do NOT accept
|
||||
# writes.
|
||||
if role_holder == "main":
|
||||
cursor = connection_holder.cursor()
|
||||
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n;")
|
@ -12,87 +12,76 @@
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from common import execute_and_fetch_all
|
||||
from common import connect, execute_and_fetch_all
|
||||
from mg_utils import mg_sleep_and_assert
|
||||
|
||||
|
||||
def test_disable_cypher_queries(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
def test_disable_cypher_queries():
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "CREATE (n:TestNode {prop: 'test'})")
|
||||
assert str(e.value) == "Coordinator can run only coordinator queries!"
|
||||
|
||||
|
||||
def test_coordinator_cannot_be_replica_role(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "SET REPLICATION ROLE TO REPLICA WITH PORT 10001;")
|
||||
assert str(e.value) == "Coordinator can run only coordinator queries!"
|
||||
|
||||
|
||||
def test_coordinator_cannot_run_show_repl_role(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
def test_coordinator_cannot_run_show_repl_role():
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "SHOW REPLICATION ROLE;")
|
||||
assert str(e.value) == "Coordinator can run only coordinator queries!"
|
||||
|
||||
|
||||
def test_coordinator_show_replication_cluster(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
def test_coordinator_show_replication_cluster():
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
|
||||
def retrieve_data():
|
||||
return set(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;"))
|
||||
return sorted(list(execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")))
|
||||
|
||||
expected_data = {
|
||||
("main", "127.0.0.1:10013", True, "main"),
|
||||
("replica_1", "127.0.0.1:10011", True, "replica"),
|
||||
("replica_2", "127.0.0.1:10012", True, "replica"),
|
||||
}
|
||||
expected_data = [
|
||||
("instance_1", "127.0.0.1:10011", True, "replica"),
|
||||
("instance_2", "127.0.0.1:10012", True, "replica"),
|
||||
("instance_3", "127.0.0.1:10013", True, "main"),
|
||||
]
|
||||
mg_sleep_and_assert(expected_data, retrieve_data)
|
||||
|
||||
|
||||
def test_coordinator_cannot_call_show_replicas(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
cursor = connect(host="localhost", port=7690).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "SHOW REPLICAS;")
|
||||
assert str(e.value) == "Coordinator can run only coordinator queries!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"port, role",
|
||||
[(7687, "main"), (7688, "replica"), (7689, "replica")],
|
||||
"port",
|
||||
[7687, 7688, 7689],
|
||||
)
|
||||
def test_main_and_replicas_cannot_call_show_repl_cluster(port, role, connection):
|
||||
cursor = connection(port, role).cursor()
|
||||
def test_main_and_replicas_cannot_call_show_repl_cluster(port):
|
||||
cursor = connect(host="localhost", port=port).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "SHOW REPLICATION CLUSTER;")
|
||||
assert str(e.value) == "Only coordinator can run SHOW REPLICATION CLUSTER."
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"port, role",
|
||||
[(7687, "main"), (7688, "replica"), (7689, "replica")],
|
||||
"port",
|
||||
[7687, 7688, 7689],
|
||||
)
|
||||
def test_main_and_replicas_cannot_register_coord_server(port, role, connection):
|
||||
cursor = connection(port, role).cursor()
|
||||
def test_main_and_replicas_cannot_register_coord_server(port):
|
||||
cursor = connect(host="localhost", port=port).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(
|
||||
cursor,
|
||||
"REGISTER REPLICA instance_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011';",
|
||||
"REGISTER INSTANCE instance_1 ON '127.0.0.1:10001' WITH '127.0.0.1:10011';",
|
||||
)
|
||||
assert str(e.value) == "Only coordinator can register coordinator server!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"port, role",
|
||||
[(7687, "main"), (7688, "replica"), (7689, "replica")],
|
||||
)
|
||||
def test_main_and_replicas_cannot_run_do_failover(port, role, connection):
|
||||
cursor = connection(port, role).cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "DO FAILOVER;")
|
||||
assert str(e.value) == "Only coordinator can run DO FAILOVER!"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
||||
|
@ -1,26 +0,0 @@
|
||||
# Copyright 2022 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from common import execute_and_fetch_all
|
||||
|
||||
|
||||
def test_failover_on_non_setup_cluster(connection):
|
||||
cursor = connection(7690, "coordinator").cursor()
|
||||
with pytest.raises(Exception) as e:
|
||||
execute_and_fetch_all(cursor, "DO FAILOVER;")
|
||||
assert str(e.value) == "Failover aborted since cluster is uninitialized!"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
@ -3,49 +3,24 @@ ha_cluster: &ha_cluster
|
||||
replica_1:
|
||||
args: ["--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"]
|
||||
log_file: "replication-e2e-replica1.log"
|
||||
setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"]
|
||||
replica_2:
|
||||
args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"]
|
||||
log_file: "replication-e2e-replica2.log"
|
||||
setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"]
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"]
|
||||
log_file: "replication-e2e-main.log"
|
||||
setup_queries: [
|
||||
"REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001'",
|
||||
"REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002'",
|
||||
]
|
||||
coordinator:
|
||||
args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"]
|
||||
log_file: "replication-e2e-coordinator.log"
|
||||
setup_queries: [
|
||||
"REGISTER MAIN main WITH COORDINATOR SERVER ON '127.0.0.1:10013'",
|
||||
"REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001' WITH COORDINATOR SERVER ON '127.0.0.1:10011'",
|
||||
"REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012'",
|
||||
]
|
||||
|
||||
noninitialized_cluster: &noninitialized_cluster
|
||||
cluster:
|
||||
replica_1:
|
||||
args: ["--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"]
|
||||
log_file: "replication-e2e-replica1.log"
|
||||
setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"]
|
||||
replica_2:
|
||||
args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"]
|
||||
log_file: "replication-e2e-replica2.log"
|
||||
setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"]
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"]
|
||||
log_file: "replication-e2e-main.log"
|
||||
setup_queries: [
|
||||
"REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001'",
|
||||
"REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002'",
|
||||
]
|
||||
coordinator:
|
||||
args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"]
|
||||
log_file: "replication-e2e-coordinator.log"
|
||||
setup_queries: []
|
||||
|
||||
replica_2:
|
||||
args: ["--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"]
|
||||
log_file: "replication-e2e-replica2.log"
|
||||
setup_queries: []
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"]
|
||||
log_file: "replication-e2e-main.log"
|
||||
setup_queries: []
|
||||
coordinator:
|
||||
args: ["--bolt-port", "7690", "--log-level=TRACE", "--coordinator"]
|
||||
log_file: "replication-e2e-coordinator.log"
|
||||
setup_queries: [
|
||||
"REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';",
|
||||
"REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';",
|
||||
"REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';",
|
||||
"SET INSTANCE instance_3 TO MAIN;"
|
||||
]
|
||||
|
||||
workloads:
|
||||
- name: "Coordinator"
|
||||
@ -53,11 +28,6 @@ workloads:
|
||||
args: ["high_availability_experimental/coordinator.py"]
|
||||
<<: *ha_cluster
|
||||
|
||||
- name: "Uninitialized cluster"
|
||||
- name: "Automatic failover"
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
args: ["high_availability_experimental/uninitialized_cluster.py"]
|
||||
<<: *noninitialized_cluster
|
||||
|
||||
- name: "Client initiated failover"
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
args: ["high_availability_experimental/client_initiated_failover.py"]
|
||||
args: ["high_availability_experimental/automatic_failover.py"]
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
DECLARE_int32(min_log_level);
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "storage/v2/storage.hpp"
|
||||
#include "storage/v2/vertex_accessor.hpp"
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
/**
|
||||
* It is possible to run test with custom seed with:
|
||||
* RC_PARAMS="seed=1" ./random_graph
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "storage/v2/disk/storage.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "storage/v2/view.hpp"
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
#ifdef MG_ENTERPRISE
|
||||
template <typename StorageType>
|
||||
class FineGrainedAuthCheckerFixture : public testing::Test {
|
||||
|
@ -43,7 +43,7 @@ class VertexDb : public Database {
|
||||
}
|
||||
|
||||
std::unique_ptr<memgraph::storage::Storage::Accessor> Access() override {
|
||||
return db_->Access(memgraph::replication::ReplicationRole::MAIN);
|
||||
return db_->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
}
|
||||
|
||||
std::unique_ptr<LogicalOperator> MakeBfsOperator(Symbol source_sym, Symbol sink_sym, Symbol edge_sym,
|
||||
|
@ -32,7 +32,7 @@ class SingleNodeDb : public Database {
|
||||
}
|
||||
|
||||
std::unique_ptr<memgraph::storage::Storage::Accessor> Access() override {
|
||||
return db_->Access(memgraph::replication::ReplicationRole::MAIN);
|
||||
return db_->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
}
|
||||
|
||||
std::unique_ptr<LogicalOperator> MakeBfsOperator(Symbol source_sym, Symbol sink_sym, Symbol edge_sym,
|
||||
|
@ -182,7 +182,7 @@ void TestVertexAndEdgeWithDifferentStorages(std::unique_ptr<memgraph::storage::S
|
||||
output.clear();
|
||||
|
||||
// create vertex
|
||||
auto dba = db->Access(memgraph::replication::ReplicationRole::MAIN);
|
||||
auto dba = db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
auto va1 = dba->CreateVertex();
|
||||
auto va2 = dba->CreateVertex();
|
||||
auto l1 = dba->NameToLabel("label1");
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "storage/v2/property_value.hpp"
|
||||
#include "storage/v2/view.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
class ClearingOldDiskDataTest : public ::testing::Test {
|
||||
public:
|
||||
|
@ -43,7 +43,8 @@ struct CppApiTestFixture : public ::testing::Test {
|
||||
}
|
||||
|
||||
memgraph::query::DbAccessor &CreateDbAccessor(const memgraph::storage::IsolationLevel isolationLevel) {
|
||||
accessors_.push_back(storage->Access(memgraph::replication::ReplicationRole::MAIN, isolationLevel));
|
||||
accessors_.push_back(
|
||||
storage->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN, isolationLevel));
|
||||
db_accessors_.emplace_back(accessors_.back().get());
|
||||
return db_accessors_.back();
|
||||
}
|
||||
|
@ -2632,77 +2632,6 @@ TEST_P(CypherMainVisitorTest, TestRegisterReplicationQuery) {
|
||||
ReplicationQuery::SyncMode::SYNC);
|
||||
}
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
TEST_P(CypherMainVisitorTest, TestRegisterCoordinatorServer) {
|
||||
auto &ast_generator = *GetParam();
|
||||
|
||||
{
|
||||
const std::string faulty_query_1 = "REGISTER MAIN COORDINATOR SERVER TO";
|
||||
ASSERT_THROW(ast_generator.ParseQuery(faulty_query_1), SyntaxException);
|
||||
}
|
||||
|
||||
{
|
||||
const std::string faulty_query_2 = "REGISTER MAIN COORDINATOR SERVER TO MAIN";
|
||||
ASSERT_THROW(ast_generator.ParseQuery(faulty_query_2), SyntaxException);
|
||||
}
|
||||
|
||||
{
|
||||
std::string full_query = "REGISTER MAIN main WITH COORDINATOR SERVER ON '127.0.0.1:10011';";
|
||||
|
||||
auto *full_query_parsed = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(full_query));
|
||||
|
||||
ASSERT_TRUE(full_query_parsed);
|
||||
EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_MAIN_COORDINATOR_SERVER);
|
||||
EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::MAIN);
|
||||
EXPECT_EQ(full_query_parsed->instance_name_, "main");
|
||||
ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10011");
|
||||
ASSERT_EQ(full_query_parsed->socket_address_, nullptr);
|
||||
}
|
||||
|
||||
{
|
||||
std::string full_query =
|
||||
R"(REGISTER REPLICA replica_1 SYNC TO "127.0.0.1:10002" WITH COORDINATOR SERVER ON "127.0.0.1:10012")";
|
||||
auto *full_query_parsed = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(full_query));
|
||||
ASSERT_TRUE(full_query_parsed);
|
||||
EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER);
|
||||
EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::REPLICA);
|
||||
ast_generator.CheckLiteral(full_query_parsed->socket_address_, "127.0.0.1:10002");
|
||||
ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10012");
|
||||
EXPECT_EQ(full_query_parsed->instance_name_, "replica_1");
|
||||
EXPECT_EQ(full_query_parsed->sync_mode_, CoordinatorQuery::SyncMode::SYNC);
|
||||
}
|
||||
|
||||
{
|
||||
std::string full_query =
|
||||
R"(REGISTER REPLICA replica_1 ASYNC TO '127.0.0.1:10002' WITH COORDINATOR SERVER ON '127.0.0.1:10012')";
|
||||
auto *full_query_parsed = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(full_query));
|
||||
ASSERT_TRUE(full_query_parsed);
|
||||
EXPECT_EQ(full_query_parsed->action_, CoordinatorQuery::Action::REGISTER_REPLICA_COORDINATOR_SERVER);
|
||||
EXPECT_EQ(full_query_parsed->role_, CoordinatorQuery::ReplicationRole::REPLICA);
|
||||
ast_generator.CheckLiteral(full_query_parsed->socket_address_, "127.0.0.1:10002");
|
||||
ast_generator.CheckLiteral(full_query_parsed->coordinator_socket_address_, "127.0.0.1:10012");
|
||||
EXPECT_EQ(full_query_parsed->instance_name_, "replica_1");
|
||||
EXPECT_EQ(full_query_parsed->sync_mode_, CoordinatorQuery::SyncMode::ASYNC);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(CypherMainVisitorTest, TestDoFailover) {
|
||||
auto &ast_generator = *GetParam();
|
||||
|
||||
{
|
||||
std::string invalid_query = "DO FAILO";
|
||||
ASSERT_THROW(ast_generator.ParseQuery(invalid_query), SyntaxException);
|
||||
}
|
||||
|
||||
{
|
||||
std::string correct_query = "DO FAILOVER";
|
||||
auto *correct_query_parsed = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(correct_query));
|
||||
ASSERT_TRUE(correct_query_parsed);
|
||||
EXPECT_EQ(correct_query_parsed->action_, CoordinatorQuery::Action::DO_FAILOVER);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_P(CypherMainVisitorTest, TestDeleteReplica) {
|
||||
auto &ast_generator = *GetParam();
|
||||
|
||||
|
@ -165,8 +165,8 @@ TYPED_TEST(InfoTest, InfoCheck) {
|
||||
ASSERT_FALSE(unique_acc->Commit().HasError());
|
||||
}
|
||||
|
||||
const auto &info =
|
||||
db_acc->GetInfo(true, memgraph::replication::ReplicationRole::MAIN); // force to use configured directory
|
||||
const auto &info = db_acc->GetInfo(
|
||||
true, memgraph::replication_coordination_glue::ReplicationRole::MAIN); // force to use configured directory
|
||||
|
||||
ASSERT_EQ(info.storage_info.vertex_count, 5);
|
||||
ASSERT_EQ(info.storage_info.edge_count, 2);
|
||||
|
@ -43,7 +43,7 @@ class PrintToJsonTest : public ::testing::Test {
|
||||
PrintToJsonTest()
|
||||
: config(disk_test_utils::GenerateOnDiskConfig(testSuite)),
|
||||
db(new StorageType(config)),
|
||||
dba_storage(db->Access(memgraph::replication::ReplicationRole::MAIN)),
|
||||
dba_storage(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)),
|
||||
dba(dba_storage.get()) {}
|
||||
|
||||
~PrintToJsonTest() override {
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
using namespace memgraph::query;
|
||||
using namespace memgraph::query::plan;
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
using CardParam = CostEstimator<memgraph::query::DbAccessor>::CardParam;
|
||||
using CostParam = CostEstimator<memgraph::query::DbAccessor>::CostParam;
|
||||
using MiscParam = CostEstimator<memgraph::query::DbAccessor>::MiscParam;
|
||||
|
@ -141,7 +141,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
|
||||
// Capture all vertices
|
||||
std::map<memgraph::storage::Gid, int64_t> gid_mapping;
|
||||
std::set<DatabaseState::Vertex> vertices;
|
||||
auto dba = db->Access(memgraph::replication::ReplicationRole::MAIN);
|
||||
auto dba = db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
for (const auto &vertex : dba->Vertices(memgraph::storage::View::NEW)) {
|
||||
std::set<std::string, std::less<>> labels;
|
||||
auto maybe_labels = vertex.Labels(memgraph::storage::View::NEW);
|
||||
@ -1105,7 +1105,7 @@ TYPED_TEST(DumpTest, MultiplePartialPulls) {
|
||||
}
|
||||
|
||||
TYPED_TEST(DumpTest, DumpDatabaseWithTriggers) {
|
||||
auto acc = this->db->storage()->Access(memgraph::replication::ReplicationRole::MAIN);
|
||||
auto acc = this->db->storage()->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN);
|
||||
memgraph::query::DbAccessor dba(acc.get());
|
||||
{
|
||||
auto trigger_store = this->db.get()->trigger_store();
|
||||
|
@ -67,7 +67,7 @@ class ExpressionEvaluatorTest : public ::testing::Test {
|
||||
ExpressionEvaluatorTest()
|
||||
: config(disk_test_utils::GenerateOnDiskConfig(testSuite)),
|
||||
db(new StorageType(config)),
|
||||
storage_dba(db->Access(memgraph::replication::ReplicationRole::MAIN)),
|
||||
storage_dba(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)),
|
||||
dba(storage_dba.get()) {}
|
||||
|
||||
~ExpressionEvaluatorTest() override {
|
||||
|
@ -39,7 +39,7 @@ class HintProviderSuite : public ::testing::Test {
|
||||
int symbol_count = 0;
|
||||
|
||||
void SetUp() {
|
||||
storage_dba.emplace(db->Access(memgraph::replication::ReplicationRole::MAIN));
|
||||
storage_dba.emplace(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN));
|
||||
dba.emplace(storage_dba->get());
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include "storage/v2/disk/storage.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
using namespace memgraph::query;
|
||||
using namespace memgraph::query::plan;
|
||||
|
@ -31,7 +31,7 @@
|
||||
|
||||
#include "query_plan_common.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
using namespace memgraph::query;
|
||||
using namespace memgraph::query::plan;
|
||||
|
||||
|
@ -38,7 +38,7 @@
|
||||
|
||||
using namespace memgraph::query;
|
||||
using namespace memgraph::query::plan;
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
template <typename StorageType>
|
||||
class QueryPlanTest : public testing::Test {
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
using namespace memgraph::query;
|
||||
using namespace memgraph::query::plan;
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
const std::string testSuite = "query_plan_match_filter_return";
|
||||
|
||||
|
@ -37,7 +37,7 @@ class OperatorToStringTest : public ::testing::Test {
|
||||
OperatorToStringTest()
|
||||
: config(disk_test_utils::GenerateOnDiskConfig(testSuite)),
|
||||
db(new StorageType(config)),
|
||||
dba_storage(db->Access(memgraph::replication::ReplicationRole::MAIN)),
|
||||
dba_storage(db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)),
|
||||
dba(dba_storage.get()) {}
|
||||
|
||||
~OperatorToStringTest() override {
|
||||
|
@ -37,7 +37,7 @@ class ReadWriteTypeCheckTest : public ::testing::Test {
|
||||
memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite);
|
||||
std::unique_ptr<memgraph::storage::Storage> db{new StorageType(config)};
|
||||
std::unique_ptr<memgraph::storage::Storage::Accessor> dba_storage{
|
||||
db->Access(memgraph::replication::ReplicationRole::MAIN)};
|
||||
db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)};
|
||||
memgraph::query::DbAccessor dba{dba_storage.get()};
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "storage/v2/disk/storage.hpp"
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
template <typename StorageType>
|
||||
class QueryPlan : public testing::Test {
|
||||
|
@ -37,7 +37,7 @@ class ExpressionPrettyPrinterTest : public ::testing::Test {
|
||||
memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite);
|
||||
std::unique_ptr<memgraph::storage::Storage> db{new StorageType(config)};
|
||||
std::unique_ptr<memgraph::storage::Storage::Accessor> storage_dba{
|
||||
db->Access(memgraph::replication::ReplicationRole::MAIN)};
|
||||
db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)};
|
||||
memgraph::query::DbAccessor dba{storage_dba.get()};
|
||||
AstStorage storage;
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "disk_test_utils.hpp"
|
||||
#include "test_utils.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
template <typename StorageType>
|
||||
class CypherType : public testing::Test {
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "storage/v2/inmemory/storage.hpp"
|
||||
#include "test_utils.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
template <typename StorageType>
|
||||
class PyModule : public testing::Test {
|
||||
|
@ -34,7 +34,7 @@
|
||||
#include "utils/memory.hpp"
|
||||
#include "utils/variant_helpers.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
#define EXPECT_SUCCESS(...) EXPECT_EQ(__VA_ARGS__, mgp_error::MGP_ERROR_NO_ERROR)
|
||||
|
||||
|
@ -35,7 +35,7 @@ class TestSymbolGenerator : public ::testing::Test {
|
||||
memgraph::storage::Config config = disk_test_utils::GenerateOnDiskConfig(testSuite);
|
||||
std::unique_ptr<memgraph::storage::Storage> db{new StorageType(config)};
|
||||
std::unique_ptr<memgraph::storage::Storage::Accessor> storage_dba{
|
||||
db->Access(memgraph::replication::ReplicationRole::MAIN)};
|
||||
db->Access(memgraph::replication_coordination_glue::ReplicationRole::MAIN)};
|
||||
memgraph::query::DbAccessor dba{storage_dba.get()};
|
||||
AstStorage storage;
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "utils/exceptions.hpp"
|
||||
#include "utils/memory.hpp"
|
||||
|
||||
using memgraph::replication::ReplicationRole;
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
|
||||
namespace {
|
||||
const std::unordered_set<memgraph::query::TriggerEventType> kAllEventTypes{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user