Merge branch 'master' into 1310-pattern-comprehension-executor

This commit is contained in:
Aidar Samerkhanov 2024-03-22 20:46:33 +04:00 committed by GitHub
commit 2895f604c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
99 changed files with 3311 additions and 1302 deletions

View File

@ -257,6 +257,17 @@ jobs:
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph drivers
- name: Run HA driver tests
run: |
./release/package/mgbuild.sh \
--toolchain $TOOLCHAIN \
--os $OS \
--arch $ARCH \
--threads $THREADS \
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph drivers-high-availability
- name: Run integration tests
run: |
./release/package/mgbuild.sh \
@ -278,7 +289,7 @@ jobs:
--enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \
--organization-name $MEMGRAPH_ORGANIZATION_NAME \
test-memgraph cppcheck-and-clang-format
- name: Save cppcheck and clang-format errors
uses: actions/upload-artifact@v4
with:

View File

@ -303,6 +303,8 @@ ExternalProject_Add(mgcxx-proj
"-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DENABLE_TESTS=OFF"
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
INSTALL_DIR "${PROJECT_BINARY_DIR}/mgcxx"
)
ExternalProject_Get_Property(mgcxx-proj install_dir)

View File

@ -48,9 +48,9 @@ SUPPORTED_ARCHS=(
)
SUPPORTED_TESTS=(
clang-tidy cppcheck-and-clang-format code-analysis
code-coverage drivers durability e2e gql-behave
code-coverage drivers drivers-high-availability durability e2e gql-behave
integration leftover-CTest macro-benchmark
mgbench stress-plain stress-ssl
mgbench stress-plain stress-ssl
unit unit-coverage upload-to-bench-graph
)
@ -116,7 +116,7 @@ print_help () {
echo -e "\nToolchain v5 supported OSs:"
echo -e " \"${SUPPORTED_OS_V5[*]}\""
echo -e "\nExample usage:"
echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run"
echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community"
@ -296,7 +296,7 @@ build_memgraph () {
docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/"
fi
# Change ownership of copied files so the mg user inside container can access them
docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR"
docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR"
echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..."
docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS"
@ -318,10 +318,9 @@ build_memgraph () {
# Define cmake command
local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .."
docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO && $cmake_cmd"
# ' is used instead of " because we need to run make within the allowed
# container resources.
# Default value for $threads is 0 instead of $(nproc) because macos
# Default value for $threads is 0 instead of $(nproc) because macos
# doesn't support the nproc command.
# 0 is set for default value and checked here because mgbuild containers
# support nproc
@ -363,7 +362,7 @@ copy_memgraph() {
local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph"
local host_output_path="$PROJECT_ROOT/build/memgraph"
mkdir -p "$PROJECT_ROOT/build"
docker cp -L $build_container:$container_output_path $host_output_path
docker cp -L $build_container:$container_output_path $host_output_path
echo "Binary saved to $host_output_path"
;;
--build-logs)
@ -371,7 +370,7 @@ copy_memgraph() {
local container_output_path="$MGBUILD_ROOT_DIR/build/logs"
local host_output_path="$PROJECT_ROOT/build/logs"
mkdir -p "$PROJECT_ROOT/build"
docker cp -L $build_container:$container_output_path $host_output_path
docker cp -L $build_container:$container_output_path $host_output_path
echo "Build logs saved to $host_output_path"
;;
--package)
@ -418,6 +417,9 @@ test_memgraph() {
drivers)
docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh'
;;
drivers-high-availability)
docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run_cluster.sh'
;;
integration)
docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh'
;;
@ -664,4 +666,4 @@ case $command in
echo "Error: Unknown command '$command'"
exit 1
;;
esac
esac

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -88,6 +88,12 @@ class Session {
virtual void Configure(const std::map<std::string, memgraph::communication::bolt::Value> &run_time_info) = 0;
#ifdef MG_ENTERPRISE
virtual auto Route(std::map<std::string, Value> const &routing,
std::vector<memgraph::communication::bolt::Value> const &bookmarks,
std::map<std::string, Value> const &extra) -> std::map<std::string, Value> = 0;
#endif
/**
* Put results of the processed query in the `encoder`.
*

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -79,9 +79,9 @@ State RunHandlerV4(Signature signature, TSession &session, State state, Marker m
}
case Signature::Route: {
if constexpr (bolt_minor >= 3) {
if (signature == Signature::Route) return HandleRoute<TSession>(session, marker);
return HandleRoute<TSession>(session, marker);
} else {
spdlog::trace("Supported only in bolt v4.3");
spdlog::trace("Supported only in bolt versions >= 4.3");
return State::Close;
}
}

View File

@ -478,9 +478,6 @@ State HandleGoodbye() {
template <typename TSession>
State HandleRoute(TSession &session, const Marker marker) {
// Route message is not implemented since it is Neo4j specific, therefore we will receive it and inform user that
// there is no implementation. Before that, we have to read out the fields from the buffer to leave it in a clean
// state.
if (marker != Marker::TinyStruct3) {
spdlog::trace("Expected TinyStruct3 marker, but received 0x{:02x}!", utils::UnderlyingCast(marker));
return State::Close;
@ -496,11 +493,27 @@ State HandleRoute(TSession &session, const Marker marker) {
spdlog::trace("Couldn't read bookmarks field!");
return State::Close;
}
// TODO: (andi) Fix Bolt versions
Value db;
if (!session.decoder_.ReadValue(&db)) {
spdlog::trace("Couldn't read db field!");
return State::Close;
}
#ifdef MG_ENTERPRISE
try {
auto res = session.Route(routing.ValueMap(), bookmarks.ValueList(), {});
if (!session.encoder_.MessageSuccess(std::move(res))) {
spdlog::trace("Couldn't send result of routing!");
return State::Close;
}
return State::Idle;
} catch (const std::exception &e) {
return HandleFailure(session, e);
}
#else
session.encoder_buffer_.Clear();
bool fail_sent =
session.encoder_.MessageFailure({{"code", "66"}, {"message", "Route message is not supported in Memgraph!"}});
@ -509,6 +522,7 @@ State HandleRoute(TSession &session, const Marker marker) {
return State::Close;
}
return State::Error;
#endif
}
template <typename TSession>

View File

@ -6,7 +6,7 @@ target_sources(mg-coordination
include/coordination/coordinator_state.hpp
include/coordination/coordinator_rpc.hpp
include/coordination/coordinator_server.hpp
include/coordination/coordinator_config.hpp
include/coordination/coordinator_communication_config.hpp
include/coordination/coordinator_exceptions.hpp
include/coordination/coordinator_slk.hpp
include/coordination/coordinator_instance.hpp
@ -23,7 +23,7 @@ target_sources(mg-coordination
include/nuraft/coordinator_state_manager.hpp
PRIVATE
coordinator_config.cpp
coordinator_communication_config.cpp
coordinator_client.cpp
coordinator_state.cpp
coordinator_rpc.cpp

View File

@ -14,7 +14,7 @@
#include "coordination/coordinator_client.hpp"
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "coordination/coordinator_rpc.hpp"
#include "replication_coordination_glue/common.hpp"
#include "replication_coordination_glue/messages.hpp"
@ -23,18 +23,17 @@
namespace memgraph::coordination {
namespace {
auto CreateClientContext(memgraph::coordination::CoordinatorClientConfig const &config)
auto CreateClientContext(memgraph::coordination::CoordinatorToReplicaConfig const &config)
-> communication::ClientContext {
return (config.ssl) ? communication::ClientContext{config.ssl->key_file, config.ssl->cert_file}
: communication::ClientContext{};
}
} // namespace
CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config,
CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config,
HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb)
: rpc_context_{CreateClientContext(config)},
rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port),
&rpc_context_},
rpc_client_{config.mgt_server, &rpc_context_},
config_{std::move(config)},
coord_instance_{coord_instance},
succ_cb_{std::move(succ_cb)},
@ -86,7 +85,9 @@ void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); }
void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); }
void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); }
auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; }
auto CoordinatorClient::ReplicationClientInfo() const -> coordination::ReplicationClientInfo {
return config_.replication_client_info;
}
auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid,
ReplicationClientsInfo replication_clients_info) const -> bool {

View File

@ -18,101 +18,178 @@
namespace memgraph::coordination {
void to_json(nlohmann::json &j, InstanceState const &instance_state) {
j = nlohmann::json{{"config", instance_state.config}, {"status", instance_state.status}};
void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state) {
j = nlohmann::json{
{"config", instance_state.config}, {"status", instance_state.status}, {"uuid", instance_state.instance_uuid}};
}
void from_json(nlohmann::json const &j, InstanceState &instance_state) {
void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state) {
j.at("config").get_to(instance_state.config);
j.at("status").get_to(instance_state.status);
j.at("uuid").get_to(instance_state.instance_uuid);
}
CoordinatorClusterState::CoordinatorClusterState(std::map<std::string, InstanceState, std::less<>> instances)
: instances_{std::move(instances)} {}
CoordinatorClusterState::CoordinatorClusterState(std::map<std::string, ReplicationInstanceState, std::less<>> instances,
utils::UUID const &current_main_uuid, bool is_lock_opened)
: repl_instances_{std::move(instances)}, current_main_uuid_(current_main_uuid), is_lock_opened_(is_lock_opened) {}
CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) : instances_{other.instances_} {}
CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other)
: repl_instances_{other.repl_instances_},
current_main_uuid_(other.current_main_uuid_),
is_lock_opened_(other.is_lock_opened_) {}
CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState const &other) {
if (this == &other) {
return *this;
}
instances_ = other.instances_;
repl_instances_ = other.repl_instances_;
current_main_uuid_ = other.current_main_uuid_;
is_lock_opened_ = other.is_lock_opened_;
return *this;
}
CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState &&other) noexcept
: instances_{std::move(other.instances_)} {}
: repl_instances_{std::move(other.repl_instances_)},
current_main_uuid_(other.current_main_uuid_),
is_lock_opened_(other.is_lock_opened_) {}
CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState &&other) noexcept {
if (this == &other) {
return *this;
}
instances_ = std::move(other.instances_);
repl_instances_ = std::move(other.repl_instances_);
current_main_uuid_ = other.current_main_uuid_;
is_lock_opened_ = other.is_lock_opened_;
return *this;
}
auto CoordinatorClusterState::MainExists() const -> bool {
auto lock = std::shared_lock{log_lock_};
return std::ranges::any_of(instances_,
return std::ranges::any_of(repl_instances_,
[](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; });
}
auto CoordinatorClusterState::IsMain(std::string_view instance_name) const -> bool {
auto CoordinatorClusterState::HasMainState(std::string_view instance_name) const -> bool {
auto lock = std::shared_lock{log_lock_};
auto const it = instances_.find(instance_name);
return it != instances_.end() && it->second.status == ReplicationRole::MAIN;
auto const it = repl_instances_.find(instance_name);
return it != repl_instances_.end() && it->second.status == ReplicationRole::MAIN;
}
auto CoordinatorClusterState::IsReplica(std::string_view instance_name) const -> bool {
auto CoordinatorClusterState::HasReplicaState(std::string_view instance_name) const -> bool {
auto lock = std::shared_lock{log_lock_};
auto const it = instances_.find(instance_name);
return it != instances_.end() && it->second.status == ReplicationRole::REPLICA;
auto const it = repl_instances_.find(instance_name);
return it != repl_instances_.end() && it->second.status == ReplicationRole::REPLICA;
}
auto CoordinatorClusterState::InsertInstance(std::string instance_name, InstanceState instance_state) -> void {
auto lock = std::lock_guard{log_lock_};
instances_.insert_or_assign(std::move(instance_name), std::move(instance_state));
auto CoordinatorClusterState::IsCurrentMain(std::string_view instance_name) const -> bool {
auto lock = std::shared_lock{log_lock_};
auto const it = repl_instances_.find(instance_name);
return it != repl_instances_.end() && it->second.status == ReplicationRole::MAIN &&
it->second.instance_uuid == current_main_uuid_;
}
auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void {
auto lock = std::lock_guard{log_lock_};
switch (log_action) {
// end of OPEN_LOCK_REGISTER_REPLICATION_INSTANCE
case RaftLogAction::REGISTER_REPLICATION_INSTANCE: {
auto const &config = std::get<CoordinatorClientConfig>(log_entry);
instances_[config.instance_name] = InstanceState{config, ReplicationRole::REPLICA};
auto const &config = std::get<CoordinatorToReplicaConfig>(log_entry);
spdlog::trace("DoAction: register replication instance {}", config.instance_name);
// Setting instance uuid to random, if registration fails, we are still in random state
repl_instances_.emplace(config.instance_name,
ReplicationInstanceState{config, ReplicationRole::REPLICA, utils::UUID{}});
is_lock_opened_ = false;
break;
}
// end of OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: {
auto const instance_name = std::get<std::string>(log_entry);
instances_.erase(instance_name);
spdlog::trace("DoAction: unregister replication instance {}", instance_name);
repl_instances_.erase(instance_name);
is_lock_opened_ = false;
break;
}
// end of OPEN_LOCK_SET_INSTANCE_AS_MAIN and OPEN_LOCK_FAILOVER
case RaftLogAction::SET_INSTANCE_AS_MAIN: {
auto const instance_name = std::get<std::string>(log_entry);
auto it = instances_.find(instance_name);
MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!");
auto const instance_uuid_change = std::get<InstanceUUIDUpdate>(log_entry);
auto it = repl_instances_.find(instance_uuid_change.instance_name);
MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!");
it->second.status = ReplicationRole::MAIN;
it->second.instance_uuid = instance_uuid_change.uuid;
is_lock_opened_ = false;
spdlog::trace("DoAction: set replication instance {} as main with uuid {}", instance_uuid_change.instance_name,
std::string{instance_uuid_change.uuid});
break;
}
// end of OPEN_LOCK_SET_INSTANCE_AS_REPLICA
case RaftLogAction::SET_INSTANCE_AS_REPLICA: {
auto const instance_name = std::get<std::string>(log_entry);
auto it = instances_.find(instance_name);
MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!");
auto it = repl_instances_.find(instance_name);
MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!");
it->second.status = ReplicationRole::REPLICA;
is_lock_opened_ = false;
spdlog::trace("DoAction: set replication instance {} as replica", instance_name);
break;
}
case RaftLogAction::UPDATE_UUID: {
uuid_ = std::get<utils::UUID>(log_entry);
case RaftLogAction::UPDATE_UUID_OF_NEW_MAIN: {
current_main_uuid_ = std::get<utils::UUID>(log_entry);
spdlog::trace("DoAction: update uuid of new main {}", std::string{current_main_uuid_});
break;
}
case RaftLogAction::UPDATE_UUID_FOR_INSTANCE: {
auto const instance_uuid_change = std::get<InstanceUUIDUpdate>(log_entry);
auto it = repl_instances_.find(instance_uuid_change.instance_name);
MG_ASSERT(it != repl_instances_.end(), "Instance doesn't exist as part of RAFT state");
it->second.instance_uuid = instance_uuid_change.uuid;
spdlog::trace("DoAction: update uuid for instance {} to {}", instance_uuid_change.instance_name,
std::string{instance_uuid_change.uuid});
break;
}
case RaftLogAction::ADD_COORDINATOR_INSTANCE: {
auto const &config = std::get<CoordinatorToCoordinatorConfig>(log_entry);
coordinators_.emplace_back(CoordinatorInstanceState{config});
spdlog::trace("DoAction: add coordinator instance {}", config.coordinator_server_id);
break;
}
case RaftLogAction::OPEN_LOCK_REGISTER_REPLICATION_INSTANCE: {
is_lock_opened_ = true;
spdlog::trace("DoAction: open lock register");
break;
// TODO(antoniofilipovic) save what we are doing to be able to undo....
}
case RaftLogAction::OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE: {
is_lock_opened_ = true;
spdlog::trace("DoAction: open lock unregister");
break;
// TODO(antoniofilipovic) save what we are doing
}
case RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_MAIN: {
is_lock_opened_ = true;
spdlog::trace("DoAction: open lock set instance as main");
break;
// TODO(antoniofilipovic) save what we are doing
}
case RaftLogAction::OPEN_LOCK_FAILOVER: {
is_lock_opened_ = true;
spdlog::trace("DoAction: open lock failover");
break;
// TODO(antoniofilipovic) save what we are doing
}
case RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_REPLICA: {
is_lock_opened_ = true;
spdlog::trace("DoAction: open lock set instance as replica");
break;
// TODO(antoniofilipovic) save what we need to undo
}
}
}
auto CoordinatorClusterState::Serialize(ptr<buffer> &data) -> void {
auto lock = std::shared_lock{log_lock_};
auto const log = nlohmann::json(instances_).dump();
nlohmann::json j = {{"repl_instances", repl_instances_},
{"is_lock_opened", is_lock_opened_},
{"current_main_uuid", current_main_uuid_}};
auto const log = j.dump();
data = buffer::alloc(sizeof(uint32_t) + log.size());
buffer_serializer bs(data);
bs.put_str(log);
@ -121,26 +198,34 @@ auto CoordinatorClusterState::Serialize(ptr<buffer> &data) -> void {
auto CoordinatorClusterState::Deserialize(buffer &data) -> CoordinatorClusterState {
buffer_serializer bs(data);
auto const j = nlohmann::json::parse(bs.get_str());
auto instances = j.get<std::map<std::string, InstanceState, std::less<>>>();
return CoordinatorClusterState{std::move(instances)};
auto instances = j["repl_instances"].get<std::map<std::string, ReplicationInstanceState, std::less<>>>();
auto current_main_uuid = j["current_main_uuid"].get<utils::UUID>();
bool is_lock_opened = j["is_lock_opened"].get<int>();
return CoordinatorClusterState{std::move(instances), current_main_uuid, is_lock_opened};
}
auto CoordinatorClusterState::GetInstances() const -> std::vector<InstanceState> {
auto CoordinatorClusterState::GetReplicationInstances() const -> std::vector<ReplicationInstanceState> {
auto lock = std::shared_lock{log_lock_};
return instances_ | ranges::views::values | ranges::to<std::vector<InstanceState>>;
return repl_instances_ | ranges::views::values | ranges::to<std::vector<ReplicationInstanceState>>;
}
auto CoordinatorClusterState::GetUUID() const -> utils::UUID { return uuid_; }
auto CoordinatorClusterState::GetCurrentMainUUID() const -> utils::UUID { return current_main_uuid_; }
auto CoordinatorClusterState::FindCurrentMainInstanceName() const -> std::optional<std::string> {
auto CoordinatorClusterState::GetInstanceUUID(std::string_view instance_name) const -> utils::UUID {
auto lock = std::shared_lock{log_lock_};
auto const it =
std::ranges::find_if(instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; });
if (it == instances_.end()) {
return {};
}
return it->first;
auto const it = repl_instances_.find(instance_name);
MG_ASSERT(it != repl_instances_.end(), "Instance with that name doesn't exist.");
return it->second.instance_uuid;
}
auto CoordinatorClusterState::GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState> {
auto lock = std::shared_lock{log_lock_};
return coordinators_;
}
auto CoordinatorClusterState::IsLockOpened() const -> bool {
auto lock = std::shared_lock{log_lock_};
return is_lock_opened_;
}
} // namespace memgraph::coordination

View File

@ -11,43 +11,62 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
namespace memgraph::coordination {
void to_json(nlohmann::json &j, ReplClientInfo const &config) {
void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config) {
j = nlohmann::json{{"coordinator_server_id", config.coordinator_server_id},
{"coordinator_server", config.coordinator_server},
{"bolt_server", config.bolt_server}};
}
void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config) {
config.coordinator_server_id = j.at("coordinator_server_id").get<uint32_t>();
config.coordinator_server = j.at("coordinator_server").get<io::network::Endpoint>();
config.bolt_server = j.at("bolt_server").get<io::network::Endpoint>();
}
void to_json(nlohmann::json &j, ReplicationClientInfo const &config) {
j = nlohmann::json{{"instance_name", config.instance_name},
{"replication_mode", config.replication_mode},
{"replication_ip_address", config.replication_ip_address},
{"replication_port", config.replication_port}};
{"replication_server", config.replication_server}};
}
void from_json(nlohmann::json const &j, ReplClientInfo &config) {
void from_json(nlohmann::json const &j, ReplicationClientInfo &config) {
config.instance_name = j.at("instance_name").get<std::string>();
config.replication_mode = j.at("replication_mode").get<replication_coordination_glue::ReplicationMode>();
config.replication_ip_address = j.at("replication_ip_address").get<std::string>();
config.replication_port = j.at("replication_port").get<uint16_t>();
config.replication_server = j.at("replication_server").get<io::network::Endpoint>();
}
void to_json(nlohmann::json &j, CoordinatorClientConfig const &config) {
void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config) {
j = nlohmann::json{{"instance_name", config.instance_name},
{"ip_address", config.ip_address},
{"port", config.port},
{"mgt_server", config.mgt_server},
{"bolt_server", config.bolt_server},
{"instance_health_check_frequency_sec", config.instance_health_check_frequency_sec.count()},
{"instance_down_timeout_sec", config.instance_down_timeout_sec.count()},
{"instance_get_uuid_frequency_sec", config.instance_get_uuid_frequency_sec.count()},
{"replication_client_info", config.replication_client_info}};
}
void from_json(nlohmann::json const &j, CoordinatorClientConfig &config) {
void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config) {
config.instance_name = j.at("instance_name").get<std::string>();
config.ip_address = j.at("ip_address").get<std::string>();
config.port = j.at("port").get<uint16_t>();
config.mgt_server = j.at("mgt_server").get<io::network::Endpoint>();
config.bolt_server = j.at("bolt_server").get<io::network::Endpoint>();
config.instance_health_check_frequency_sec =
std::chrono::seconds{j.at("instance_health_check_frequency_sec").get<int>()};
config.instance_down_timeout_sec = std::chrono::seconds{j.at("instance_down_timeout_sec").get<int>()};
config.instance_get_uuid_frequency_sec = std::chrono::seconds{j.at("instance_get_uuid_frequency_sec").get<int>()};
config.replication_client_info = j.at("replication_client_info").get<ReplClientInfo>();
config.replication_client_info = j.at("replication_client_info").get<ReplicationClientInfo>();
}
void from_json(nlohmann::json const &j, InstanceUUIDUpdate &instance_uuid_change) {
instance_uuid_change.uuid = j.at("uuid").get<utils::UUID>();
instance_uuid_change.instance_name = j.at("instance_name").get<std::string>();
}
void to_json(nlohmann::json &j, InstanceUUIDUpdate const &instance_uuid_change) {
j = nlohmann::json{{"instance_name", instance_uuid_change.instance_name}, {"uuid", instance_uuid_change.uuid}};
}
} // namespace memgraph::coordination

View File

@ -95,8 +95,8 @@ void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHan
slk::Load(&req, req_reader);
const replication::ReplicationServerConfig clients_config{
.ip_address = req.replication_client_info.replication_ip_address,
.port = req.replication_client_info.replication_port};
.ip_address = req.replication_client_info.replication_server.address,
.port = req.replication_client_info.replication_server.port};
if (!replication_handler.SetReplicationRoleReplica(clients_config, std::nullopt)) {
spdlog::error("Demoting main to replica failed!");
@ -136,8 +136,8 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa
return replication::ReplicationClientConfig{
.name = repl_info_config.instance_name,
.mode = repl_info_config.replication_mode,
.ip_address = repl_info_config.replication_ip_address,
.port = repl_info_config.replication_port,
.ip_address = repl_info_config.replication_server.address,
.port = repl_info_config.replication_server.port,
};
};

View File

@ -14,7 +14,6 @@
#include "coordination/coordinator_instance.hpp"
#include "coordination/coordinator_exceptions.hpp"
#include "coordination/fmt.hpp"
#include "dbms/constants.hpp"
#include "nuraft/coordinator_state_machine.hpp"
#include "nuraft/coordinator_state_manager.hpp"
@ -31,10 +30,11 @@ using nuraft::ptr;
using nuraft::srv_config;
CoordinatorInstance::CoordinatorInstance()
: raft_state_(RaftState::MakeRaftState(
: thread_pool_{1},
raft_state_(RaftState::MakeRaftState(
[this]() {
spdlog::info("Leader changed, starting all replication instances!");
auto const instances = raft_state_.GetInstances();
auto const instances = raft_state_.GetReplicationInstances();
auto replicas = instances | ranges::views::filter([](auto const &instance) {
return instance.status == ReplicationRole::REPLICA;
});
@ -56,23 +56,34 @@ CoordinatorInstance::CoordinatorInstance()
&CoordinatorInstance::MainFailCallback);
});
std::ranges::for_each(repl_instances_, [this](auto &instance) {
instance.SetNewMainUUID(raft_state_.GetUUID());
instance.StartFrequentCheck();
});
std::ranges::for_each(repl_instances_, [](auto &instance) { instance.StartFrequentCheck(); });
},
[this]() {
spdlog::info("Leader changed, stopping all replication instances!");
repl_instances_.clear();
thread_pool_.AddTask([this]() {
spdlog::info("Leader changed, trying to stop all replication instances frequent checks!");
// We need to stop checks before taking a lock because deadlock can happen if instances waits
// to take a lock in frequent check, and this thread already has a lock and waits for instance to
// be done with frequent check
for (auto &repl_instance : repl_instances_) {
repl_instance.StopFrequentCheck();
}
auto lock = std::unique_lock{coord_instance_lock_};
repl_instances_.clear();
spdlog::info("Stopped all replication instance frequent checks.");
});
})) {
client_succ_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void {
auto lock = std::lock_guard{self->coord_instance_lock_};
auto lock = std::unique_lock{self->coord_instance_lock_};
// when coordinator is becoming follower it will want to stop all threads doing frequent checks
// Thread can get stuck here waiting for lock so we need to frequently check if we are in shutdown state
auto &repl_instance = self->FindReplicationInstance(repl_instance_name);
std::invoke(repl_instance.GetSuccessCallback(), self, repl_instance_name);
};
client_fail_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void {
auto lock = std::lock_guard{self->coord_instance_lock_};
auto lock = std::unique_lock{self->coord_instance_lock_};
auto &repl_instance = self->FindReplicationInstance(repl_instance_name);
std::invoke(repl_instance.GetFailCallback(), self, repl_instance_name);
};
@ -101,7 +112,7 @@ auto CoordinatorInstance::ShowInstances() const -> std::vector<InstanceStatus> {
if (raft_state_.IsLeader()) {
auto const stringify_repl_role = [this](ReplicationInstance const &instance) -> std::string {
if (!instance.IsAlive()) return "unknown";
if (raft_state_.IsMain(instance.InstanceName())) return "main";
if (raft_state_.IsCurrentMain(instance.InstanceName())) return "main";
return "replica";
};
@ -122,26 +133,36 @@ auto CoordinatorInstance::ShowInstances() const -> std::vector<InstanceStatus> {
std::ranges::transform(repl_instances_, std::back_inserter(instances_status), process_repl_instance_as_leader);
}
} else {
auto const stringify_inst_status = [](ReplicationRole status) -> std::string {
return status == ReplicationRole::MAIN ? "main" : "replica";
auto const stringify_inst_status = [raft_state_ptr = &raft_state_](
utils::UUID const &main_uuid,
ReplicationInstanceState const &instance) -> std::string {
if (raft_state_ptr->IsCurrentMain(instance.config.instance_name)) {
return "main";
}
if (raft_state_ptr->HasMainState(instance.config.instance_name)) {
return "unknown";
}
return "replica";
};
// TODO: (andi) Add capability that followers can also return socket addresses
auto process_repl_instance_as_follower = [&stringify_inst_status](auto const &instance) -> InstanceStatus {
auto process_repl_instance_as_follower =
[this, &stringify_inst_status](ReplicationInstanceState const &instance) -> InstanceStatus {
return {.instance_name = instance.config.instance_name,
.cluster_role = stringify_inst_status(instance.status),
.cluster_role = stringify_inst_status(raft_state_.GetCurrentMainUUID(), instance),
.health = "unknown"};
};
std::ranges::transform(raft_state_.GetInstances(), std::back_inserter(instances_status),
std::ranges::transform(raft_state_.GetReplicationInstances(), std::back_inserter(instances_status),
process_repl_instance_as_follower);
}
return instances_status;
}
auto CoordinatorInstance::TryFailover() -> void {
auto const is_replica = [this](ReplicationInstance const &instance) { return IsReplica(instance.InstanceName()); };
auto const is_replica = [this](ReplicationInstance const &instance) {
return HasReplicaState(instance.InstanceName());
};
auto alive_replicas =
repl_instances_ | ranges::views::filter(is_replica) | ranges::views::filter(&ReplicationInstance::IsAlive);
@ -151,11 +172,6 @@ auto CoordinatorInstance::TryFailover() -> void {
return;
}
if (!raft_state_.RequestLeadership()) {
spdlog::error("Failover failed since the instance is not the leader!");
return;
}
auto const get_ts = [](ReplicationInstance &replica) { return replica.GetClient().SendGetInstanceTimestampsRpc(); };
auto maybe_instance_db_histories = alive_replicas | ranges::views::transform(get_ts) | ranges::to<std::vector>();
@ -183,6 +199,10 @@ auto CoordinatorInstance::TryFailover() -> void {
auto *new_main = &FindReplicationInstance(most_up_to_date_instance);
if (!raft_state_.AppendOpenLockFailover(most_up_to_date_instance)) {
spdlog::error("Aborting failover as instance is not anymore leader.");
return;
}
new_main->PauseFrequentCheck();
utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }};
@ -192,16 +212,18 @@ auto CoordinatorInstance::TryFailover() -> void {
auto const new_main_uuid = utils::UUID{};
auto const failed_to_swap = [&new_main_uuid](ReplicationInstance &instance) {
return !instance.SendSwapAndUpdateUUID(new_main_uuid);
auto const failed_to_swap = [this, &new_main_uuid](ReplicationInstance &instance) {
return !instance.SendSwapAndUpdateUUID(new_main_uuid) ||
!raft_state_.AppendUpdateUUIDForInstanceLog(instance.InstanceName(), new_main_uuid);
};
// If for some replicas swap fails, for others on successful ping we will revert back on next change
// or we will do failover first again and then it will be consistent again
if (std::ranges::any_of(alive_replicas | ranges::views::filter(is_not_new_main), failed_to_swap)) {
spdlog::error("Failed to swap uuid for all instances");
spdlog::error("Aborting failover. Failed to swap uuid for all alive instances.");
return;
}
auto repl_clients_info = repl_instances_ | ranges::views::filter(is_not_new_main) |
ranges::views::transform(&ReplicationInstance::ReplicationClientInfo) |
ranges::to<ReplicationClientsInfo>();
@ -212,27 +234,36 @@ auto CoordinatorInstance::TryFailover() -> void {
return;
}
if (!raft_state_.AppendUpdateUUIDLog(new_main_uuid)) {
if (!raft_state_.AppendUpdateUUIDForNewMainLog(new_main_uuid)) {
return;
}
auto const new_main_instance_name = new_main->InstanceName();
if (!raft_state_.AppendSetInstanceAsMainLog(new_main_instance_name)) {
if (!raft_state_.AppendSetInstanceAsMainLog(new_main_instance_name, new_main_uuid)) {
return;
}
if (!new_main->EnableWritingOnMain()) {
spdlog::error("Failover successful but couldn't enable writing on instance.");
}
spdlog::info("Failover successful! Instance {} promoted to main.", new_main->InstanceName());
}
auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance_name)
-> SetInstanceToMainCoordinatorStatus {
auto lock = std::lock_guard{coord_instance_lock_};
if (raft_state_.IsLockOpened()) {
return SetInstanceToMainCoordinatorStatus::LOCK_OPENED;
}
if (raft_state_.MainExists()) {
return SetInstanceToMainCoordinatorStatus::MAIN_ALREADY_EXISTS;
}
// TODO(antoniofilipovic) Check if request leadership can cause problems due to changing of leadership while other
// doing failover
if (!raft_state_.RequestLeadership()) {
return SetInstanceToMainCoordinatorStatus::NOT_LEADER;
}
@ -249,6 +280,10 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance
return SetInstanceToMainCoordinatorStatus::NO_INSTANCE_WITH_NAME;
}
if (!raft_state_.AppendOpenLockSetInstanceToMain(instance_name)) {
return SetInstanceToMainCoordinatorStatus::OPEN_LOCK;
}
new_main->PauseFrequentCheck();
utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }};
@ -258,12 +293,13 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance
auto const new_main_uuid = utils::UUID{};
auto const failed_to_swap = [&new_main_uuid](ReplicationInstance &instance) {
return !instance.SendSwapAndUpdateUUID(new_main_uuid);
auto const failed_to_swap = [this, &new_main_uuid](ReplicationInstance &instance) {
return !instance.SendSwapAndUpdateUUID(new_main_uuid) ||
!raft_state_.AppendUpdateUUIDForInstanceLog(instance.InstanceName(), new_main_uuid);
};
if (std::ranges::any_of(repl_instances_ | ranges::views::filter(is_not_new_main), failed_to_swap)) {
spdlog::error("Failed to swap uuid for all instances");
spdlog::error("Failed to swap uuid for all currently alive instances.");
return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED;
}
@ -275,22 +311,28 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance
&CoordinatorInstance::MainFailCallback)) {
return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN;
}
if (!raft_state_.AppendUpdateUUIDLog(new_main_uuid)) {
if (!raft_state_.AppendUpdateUUIDForNewMainLog(new_main_uuid)) {
return SetInstanceToMainCoordinatorStatus::RAFT_LOG_ERROR;
}
if (!raft_state_.AppendSetInstanceAsMainLog(instance_name)) {
if (!raft_state_.AppendSetInstanceAsMainLog(instance_name, new_main_uuid)) {
return SetInstanceToMainCoordinatorStatus::RAFT_LOG_ERROR;
}
spdlog::info("Instance {} promoted to main on leader", instance_name);
if (!new_main->EnableWritingOnMain()) {
return SetInstanceToMainCoordinatorStatus::ENABLE_WRITING_FAILED;
}
return SetInstanceToMainCoordinatorStatus::SUCCESS;
}
auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig const &config)
auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config)
-> RegisterInstanceCoordinatorStatus {
auto lock = std::lock_guard{coord_instance_lock_};
if (raft_state_.IsLockOpened()) {
return RegisterInstanceCoordinatorStatus::LOCK_OPENED;
}
if (std::ranges::any_of(repl_instances_, [instance_name = config.instance_name](ReplicationInstance const &instance) {
return instance.InstanceName() == instance_name;
@ -310,11 +352,14 @@ auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig co
return RegisterInstanceCoordinatorStatus::REPL_ENDPOINT_EXISTS;
}
// TODO(antoniofilipovic) Check if this is an issue
if (!raft_state_.RequestLeadership()) {
return RegisterInstanceCoordinatorStatus::NOT_LEADER;
}
auto const undo_action_ = [this]() { repl_instances_.pop_back(); };
if (!raft_state_.AppendOpenLockRegister(config)) {
return RegisterInstanceCoordinatorStatus::OPEN_LOCK;
}
auto *new_instance = &repl_instances_.emplace_back(this, config, client_succ_cb_, client_fail_cb_,
&CoordinatorInstance::ReplicaSuccessCallback,
@ -322,15 +367,12 @@ auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig co
if (!new_instance->SendDemoteToReplicaRpc()) {
spdlog::error("Failed to send demote to replica rpc for instance {}", config.instance_name);
undo_action_();
return RegisterInstanceCoordinatorStatus::RPC_FAILED;
}
if (!raft_state_.AppendRegisterReplicationInstanceLog(config)) {
undo_action_();
return RegisterInstanceCoordinatorStatus::RAFT_LOG_ERROR;
}
new_instance->StartFrequentCheck();
spdlog::info("Instance {} registered", config.instance_name);
@ -341,6 +383,11 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instanc
-> UnregisterInstanceCoordinatorStatus {
auto lock = std::lock_guard{coord_instance_lock_};
if (raft_state_.IsLockOpened()) {
return UnregisterInstanceCoordinatorStatus::LOCK_OPENED;
}
// TODO(antoniofilipovic) Check if this is an issue
if (!raft_state_.RequestLeadership()) {
return UnregisterInstanceCoordinatorStatus::NOT_LEADER;
}
@ -354,19 +401,23 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instanc
return UnregisterInstanceCoordinatorStatus::NO_INSTANCE_WITH_NAME;
}
auto const is_main = [this](ReplicationInstance const &instance) {
return IsMain(instance.InstanceName()) && instance.GetMainUUID() == raft_state_.GetUUID() && instance.IsAlive();
auto const is_current_main = [this](ReplicationInstance const &instance) {
return raft_state_.IsCurrentMain(instance.InstanceName()) && instance.IsAlive();
};
if (is_main(*inst_to_remove)) {
if (is_current_main(*inst_to_remove)) {
return UnregisterInstanceCoordinatorStatus::IS_MAIN;
}
if (!raft_state_.AppendOpenLockUnregister(instance_name)) {
return UnregisterInstanceCoordinatorStatus::OPEN_LOCK;
}
inst_to_remove->StopFrequentCheck();
auto curr_main = std::ranges::find_if(repl_instances_, is_main);
auto curr_main = std::ranges::find_if(repl_instances_, is_current_main);
if (curr_main != repl_instances_.end() && curr_main->IsAlive()) {
if (curr_main != repl_instances_.end()) {
if (!curr_main->SendUnregisterReplicaRpc(instance_name)) {
inst_to_remove->StartFrequentCheck();
return UnregisterInstanceCoordinatorStatus::RPC_FAILED;
@ -382,20 +433,25 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instanc
return UnregisterInstanceCoordinatorStatus::SUCCESS;
}
auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port,
std::string_view raft_address) -> void {
raft_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address);
auto CoordinatorInstance::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void {
raft_state_.AddCoordinatorInstance(config);
// NOTE: We ignore error we added coordinator instance to networking stuff but not in raft log.
if (!raft_state_.AppendAddCoordinatorInstanceLog(config)) {
spdlog::error("Failed to append add coordinator instance log");
}
}
void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name) {
spdlog::trace("Instance {} performing main fail callback", repl_instance_name);
if (raft_state_.IsLockOpened()) {
spdlog::error("Returning from main fail callback as the last action didn't successfully finish");
}
auto &repl_instance = FindReplicationInstance(repl_instance_name);
repl_instance.OnFailPing();
const auto &repl_instance_uuid = repl_instance.GetMainUUID();
MG_ASSERT(repl_instance_uuid.has_value(), "Replication instance must have uuid set");
// NOLINTNEXTLINE
if (!repl_instance.IsAlive() && raft_state_.GetUUID() == repl_instance_uuid.value()) {
if (!repl_instance.IsAlive() && raft_state_.IsCurrentMain(repl_instance_name)) {
spdlog::info("Cluster without main instance, trying automatic failover");
TryFailover();
}
@ -403,6 +459,12 @@ void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name)
void CoordinatorInstance::MainSuccessCallback(std::string_view repl_instance_name) {
spdlog::trace("Instance {} performing main successful callback", repl_instance_name);
if (raft_state_.IsLockOpened()) {
spdlog::error("Stopping main successful callback as the last action didn't successfully finish");
return;
}
auto &repl_instance = FindReplicationInstance(repl_instance_name);
if (repl_instance.IsAlive()) {
@ -410,11 +472,8 @@ void CoordinatorInstance::MainSuccessCallback(std::string_view repl_instance_nam
return;
}
const auto &repl_instance_uuid = repl_instance.GetMainUUID();
MG_ASSERT(repl_instance_uuid.has_value(), "Instance must have uuid set.");
// NOLINTNEXTLINE
if (raft_state_.GetUUID() == repl_instance_uuid.value()) {
if (raft_state_.IsCurrentMain(repl_instance.InstanceName())) {
if (!repl_instance.EnableWritingOnMain()) {
spdlog::error("Failed to enable writing on main instance {}", repl_instance_name);
return;
@ -424,9 +483,8 @@ void CoordinatorInstance::MainSuccessCallback(std::string_view repl_instance_nam
return;
}
if (!raft_state_.RequestLeadership()) {
spdlog::error("Demoting main instance {} to replica failed since the instance is not the leader!",
repl_instance_name);
if (!raft_state_.AppendOpenLockSetInstanceToReplica(repl_instance.InstanceName())) {
spdlog::error("Failed to open lock for demoting OLD MAIN {} to REPLICA", repl_instance_name);
return;
}
@ -439,29 +497,38 @@ void CoordinatorInstance::MainSuccessCallback(std::string_view repl_instance_nam
return;
}
if (!repl_instance.SendSwapAndUpdateUUID(raft_state_.GetUUID())) {
if (!repl_instance.SendSwapAndUpdateUUID(raft_state_.GetCurrentMainUUID())) {
spdlog::error("Failed to swap uuid for demoted main instance {}", repl_instance_name);
return;
}
if (!raft_state_.AppendUpdateUUIDForInstanceLog(repl_instance_name, raft_state_.GetCurrentMainUUID())) {
spdlog::error("Failed to update log of changing instance uuid {} to {}", repl_instance_name,
std::string{raft_state_.GetCurrentMainUUID()});
return;
}
if (!raft_state_.AppendSetInstanceAsReplicaLog(repl_instance_name)) {
spdlog::error("Failed to append log that OLD MAIN was demoted to REPLICA {}", repl_instance_name);
return;
}
}
void CoordinatorInstance::ReplicaSuccessCallback(std::string_view repl_instance_name) {
spdlog::trace("Instance {} performing replica successful callback", repl_instance_name);
auto &repl_instance = FindReplicationInstance(repl_instance_name);
if (!IsReplica(repl_instance_name)) {
spdlog::error("Aborting replica callback since instance {} is not replica anymore", repl_instance_name);
if (raft_state_.IsLockOpened()) {
spdlog::error("Stopping main successful callback as the last action didn't successfully finish");
return;
}
auto &repl_instance = FindReplicationInstance(repl_instance_name);
// We need to get replicas UUID from time to time to ensure replica is listening to correct main
// and that it didn't go down for less time than we could notice
// We need to get id of main replica is listening to
// and swap if necessary
if (!repl_instance.EnsureReplicaHasCorrectMainUUID(raft_state_.GetUUID())) {
if (!repl_instance.EnsureReplicaHasCorrectMainUUID(raft_state_.GetCurrentMainUUID())) {
spdlog::error("Failed to swap uuid for replica instance {} which is alive", repl_instance.InstanceName());
return;
}
@ -471,13 +538,14 @@ void CoordinatorInstance::ReplicaSuccessCallback(std::string_view repl_instance_
void CoordinatorInstance::ReplicaFailCallback(std::string_view repl_instance_name) {
spdlog::trace("Instance {} performing replica failure callback", repl_instance_name);
auto &repl_instance = FindReplicationInstance(repl_instance_name);
if (!IsReplica(repl_instance_name)) {
spdlog::error("Aborting replica fail callback since instance {} is not replica anymore", repl_instance_name);
if (raft_state_.IsLockOpened()) {
spdlog::error("Stopping main successful callback as the last action didn't successfully finish.");
return;
}
auto &repl_instance = FindReplicationInstance(repl_instance_name);
repl_instance.OnFailPing();
}
@ -549,12 +617,63 @@ auto CoordinatorInstance::ChooseMostUpToDateInstance(std::span<InstanceNameDbHis
return std::move(*new_main_res);
}
auto CoordinatorInstance::IsMain(std::string_view instance_name) const -> bool {
return raft_state_.IsMain(instance_name);
auto CoordinatorInstance::HasMainState(std::string_view instance_name) const -> bool {
return raft_state_.HasMainState(instance_name);
}
auto CoordinatorInstance::IsReplica(std::string_view instance_name) const -> bool {
return raft_state_.IsReplica(instance_name);
auto CoordinatorInstance::HasReplicaState(std::string_view instance_name) const -> bool {
return raft_state_.HasReplicaState(instance_name);
}
auto CoordinatorInstance::GetRoutingTable(std::map<std::string, std::string> const &routing) -> RoutingTable {
auto res = RoutingTable{};
auto const repl_instance_to_bolt = [](ReplicationInstanceState const &instance) {
return instance.config.BoltSocketAddress();
};
// TODO: (andi) This is wrong check, Fico will correct in #1819.
auto const is_instance_main = [&](ReplicationInstanceState const &instance) {
return instance.status == ReplicationRole::MAIN;
};
auto const is_instance_replica = [&](ReplicationInstanceState const &instance) {
return instance.status == ReplicationRole::REPLICA;
};
auto const &raft_log_repl_instances = raft_state_.GetReplicationInstances();
auto bolt_mains = raft_log_repl_instances | ranges::views::filter(is_instance_main) |
ranges::views::transform(repl_instance_to_bolt) | ranges::to<std::vector>();
MG_ASSERT(bolt_mains.size() <= 1, "There can be at most one main instance active!");
if (!std::ranges::empty(bolt_mains)) {
res.emplace_back(std::move(bolt_mains), "WRITE");
}
auto bolt_replicas = raft_log_repl_instances | ranges::views::filter(is_instance_replica) |
ranges::views::transform(repl_instance_to_bolt) | ranges::to<std::vector>();
if (!std::ranges::empty(bolt_replicas)) {
res.emplace_back(std::move(bolt_replicas), "READ");
}
auto const coord_instance_to_bolt = [](CoordinatorInstanceState const &instance) {
return instance.config.bolt_server.SocketAddress();
};
auto const &raft_log_coord_instances = raft_state_.GetCoordinatorInstances();
auto bolt_coords =
raft_log_coord_instances | ranges::views::transform(coord_instance_to_bolt) | ranges::to<std::vector>();
auto const &local_bolt_coord = routing.find("address");
if (local_bolt_coord == routing.end()) {
throw InvalidRoutingTableException("No bolt address found in routing table for the current coordinator!");
}
bolt_coords.push_back(local_bolt_coord->second);
res.emplace_back(std::move(bolt_coords), "ROUTE");
return res;
}
} // namespace memgraph::coordination

View File

@ -18,8 +18,7 @@ namespace memgraph::coordination {
namespace {
auto CreateServerContext(const memgraph::coordination::CoordinatorServerConfig &config)
-> communication::ServerContext {
auto CreateServerContext(const memgraph::coordination::ManagementServerConfig &config) -> communication::ServerContext {
return (config.ssl) ? communication::ServerContext{config.ssl->key_file, config.ssl->cert_file, config.ssl->ca_file,
config.ssl->verify_peer}
: communication::ServerContext{};
@ -32,7 +31,7 @@ constexpr auto kCoordinatorServerThreads = 1;
} // namespace
CoordinatorServer::CoordinatorServer(const CoordinatorServerConfig &config)
CoordinatorServer::CoordinatorServer(const ManagementServerConfig &config)
: rpc_server_context_{CreateServerContext(config)},
rpc_server_{io::network::Endpoint{config.ip_address, config.port}, &rpc_server_context_,
kCoordinatorServerThreads} {

View File

@ -13,7 +13,7 @@
#include "coordination/coordinator_state.hpp"
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp"
#include "flags/replication.hpp"
#include "spdlog/spdlog.h"
@ -25,15 +25,15 @@
namespace memgraph::coordination {
CoordinatorState::CoordinatorState() {
MG_ASSERT(!(FLAGS_raft_server_id && FLAGS_coordinator_server_port),
MG_ASSERT(!(FLAGS_coordinator_id && FLAGS_management_port),
"Instance cannot be a coordinator and have registered coordinator server.");
spdlog::info("Executing coordinator constructor");
if (FLAGS_coordinator_server_port) {
if (FLAGS_management_port) {
spdlog::info("Coordinator server port set");
auto const config = CoordinatorServerConfig{
auto const config = ManagementServerConfig{
.ip_address = kDefaultReplicationServerIp,
.port = static_cast<uint16_t>(FLAGS_coordinator_server_port),
.port = static_cast<uint16_t>(FLAGS_management_port),
};
spdlog::info("Executing coordinator constructor main replica");
@ -41,7 +41,7 @@ CoordinatorState::CoordinatorState() {
}
}
auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig const &config)
auto CoordinatorState::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config)
-> RegisterInstanceCoordinatorStatus {
MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_),
"Coordinator cannot register replica since variant holds wrong alternative");
@ -98,11 +98,16 @@ auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & {
return *std::get<CoordinatorMainReplicaData>(data_).coordinator_server_;
}
auto CoordinatorState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port,
std::string_view raft_address) -> void {
auto CoordinatorState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void {
MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_),
"Coordinator cannot register replica since variant holds wrong alternative");
return std::get<CoordinatorInstance>(data_).AddCoordinatorInstance(raft_server_id, raft_port, raft_address);
return std::get<CoordinatorInstance>(data_).AddCoordinatorInstance(config);
}
auto CoordinatorState::GetRoutingTable(std::map<std::string, std::string> const &routing) -> RoutingTable {
MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_),
"Coordinator cannot get routing table since variant holds wrong alternative");
return std::get<CoordinatorInstance>(data_).GetRoutingTable(routing);
}
} // namespace memgraph::coordination

View File

@ -20,18 +20,14 @@ constexpr int MAX_SNAPSHOTS = 3;
namespace memgraph::coordination {
auto CoordinatorStateMachine::FindCurrentMainInstanceName() const -> std::optional<std::string> {
return cluster_state_.FindCurrentMainInstanceName();
}
auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); }
auto CoordinatorStateMachine::IsMain(std::string_view instance_name) const -> bool {
return cluster_state_.IsMain(instance_name);
auto CoordinatorStateMachine::HasMainState(std::string_view instance_name) const -> bool {
return cluster_state_.HasMainState(instance_name);
}
auto CoordinatorStateMachine::IsReplica(std::string_view instance_name) const -> bool {
return cluster_state_.IsReplica(instance_name);
auto CoordinatorStateMachine::HasReplicaState(std::string_view instance_name) const -> bool {
return cluster_state_.HasReplicaState(instance_name);
}
auto CoordinatorStateMachine::CreateLog(nlohmann::json &&log) -> ptr<buffer> {
@ -42,7 +38,24 @@ auto CoordinatorStateMachine::CreateLog(nlohmann::json &&log) -> ptr<buffer> {
return log_buf;
}
auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer> {
auto CoordinatorStateMachine::SerializeOpenLockRegister(CoordinatorToReplicaConfig const &config) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::OPEN_LOCK_REGISTER_REPLICATION_INSTANCE}, {"info", config}});
}
auto CoordinatorStateMachine::SerializeOpenLockUnregister(std::string_view instance_name) -> ptr<buffer> {
return CreateLog(
{{"action", RaftLogAction::OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE}, {"info", std::string{instance_name}}});
}
auto CoordinatorStateMachine::SerializeOpenLockFailover(std::string_view instance_name) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::OPEN_LOCK_FAILOVER}, {"info", std::string(instance_name)}});
}
auto CoordinatorStateMachine::SerializeOpenLockSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_MAIN}, {"info", std::string(instance_name)}});
}
auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::REGISTER_REPLICATION_INSTANCE}, {"info", config}});
}
@ -50,35 +63,65 @@ auto CoordinatorStateMachine::SerializeUnregisterInstance(std::string_view insta
return CreateLog({{"action", RaftLogAction::UNREGISTER_REPLICATION_INSTANCE}, {"info", instance_name}});
}
auto CoordinatorStateMachine::SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::SET_INSTANCE_AS_MAIN}, {"info", instance_name}});
auto CoordinatorStateMachine::SerializeSetInstanceAsMain(InstanceUUIDUpdate const &instance_uuid_change)
-> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::SET_INSTANCE_AS_MAIN}, {"info", instance_uuid_change}});
}
auto CoordinatorStateMachine::SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::SET_INSTANCE_AS_REPLICA}, {"info", instance_name}});
}
auto CoordinatorStateMachine::SerializeUpdateUUID(utils::UUID const &uuid) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::UPDATE_UUID}, {"info", uuid}});
auto CoordinatorStateMachine::SerializeUpdateUUIDForNewMain(utils::UUID const &uuid) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::UPDATE_UUID_OF_NEW_MAIN}, {"info", uuid}});
}
auto CoordinatorStateMachine::SerializeUpdateUUIDForInstance(InstanceUUIDUpdate const &instance_uuid_change)
-> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::UPDATE_UUID_FOR_INSTANCE}, {"info", instance_uuid_change}});
}
auto CoordinatorStateMachine::SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config)
-> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::ADD_COORDINATOR_INSTANCE}, {"info", config}});
}
auto CoordinatorStateMachine::SerializeOpenLockSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer> {
return CreateLog({{"action", RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_REPLICA}, {"info", instance_name}});
}
auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction> {
buffer_serializer bs(data);
auto const json = nlohmann::json::parse(bs.get_str());
auto const action = json["action"].get<RaftLogAction>();
auto const &info = json["info"];
switch (action) {
case RaftLogAction::OPEN_LOCK_REGISTER_REPLICATION_INSTANCE: {
return {info.get<CoordinatorToReplicaConfig>(), action};
}
case RaftLogAction::OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE:
[[fallthrough]];
case RaftLogAction::OPEN_LOCK_FAILOVER:
[[fallthrough]];
case RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_MAIN:
[[fallthrough]];
case RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_REPLICA: {
return {info.get<std::string>(), action};
}
case RaftLogAction::REGISTER_REPLICATION_INSTANCE:
return {info.get<CoordinatorClientConfig>(), action};
case RaftLogAction::UPDATE_UUID:
return {info.get<CoordinatorToReplicaConfig>(), action};
case RaftLogAction::UPDATE_UUID_OF_NEW_MAIN:
return {info.get<utils::UUID>(), action};
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE:
case RaftLogAction::UPDATE_UUID_FOR_INSTANCE:
case RaftLogAction::SET_INSTANCE_AS_MAIN:
return {info.get<InstanceUUIDUpdate>(), action};
case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE:
[[fallthrough]];
case RaftLogAction::SET_INSTANCE_AS_REPLICA:
return {info.get<std::string>(), action};
case RaftLogAction::ADD_COORDINATOR_INSTANCE:
return {info.get<CoordinatorToCoordinatorConfig>(), action};
}
throw std::runtime_error("Unknown action");
}
@ -133,6 +176,7 @@ auto CoordinatorStateMachine::read_logical_snp_obj(snapshot &snapshot, void *& /
} else {
// Object ID > 0: second object, put actual value.
ctx->cluster_state_.Serialize(data_out);
is_last_obj = true;
}
return 0;
@ -155,6 +199,7 @@ auto CoordinatorStateMachine::save_logical_snp_obj(snapshot &snapshot, ulong &ob
DMG_ASSERT(entry != snapshots_.end());
entry->second->cluster_state_ = cluster_state;
}
obj_id++;
}
auto CoordinatorStateMachine::apply_snapshot(snapshot &s) -> bool {
@ -205,11 +250,24 @@ auto CoordinatorStateMachine::create_snapshot_internal(ptr<snapshot> snapshot) -
}
}
auto CoordinatorStateMachine::GetInstances() const -> std::vector<InstanceState> {
return cluster_state_.GetInstances();
auto CoordinatorStateMachine::GetReplicationInstances() const -> std::vector<ReplicationInstanceState> {
return cluster_state_.GetReplicationInstances();
}
auto CoordinatorStateMachine::GetUUID() const -> utils::UUID { return cluster_state_.GetUUID(); }
auto CoordinatorStateMachine::GetCurrentMainUUID() const -> utils::UUID { return cluster_state_.GetCurrentMainUUID(); }
auto CoordinatorStateMachine::IsCurrentMain(std::string_view instance_name) const -> bool {
return cluster_state_.IsCurrentMain(instance_name);
}
auto CoordinatorStateMachine::GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState> {
return cluster_state_.GetCoordinatorInstances();
}
auto CoordinatorStateMachine::GetInstanceUUID(std::string_view instance_name) const -> utils::UUID {
return cluster_state_.GetInstanceUUID(instance_name);
}
auto CoordinatorStateMachine::IsLockOpened() const -> bool { return cluster_state_.IsLockOpened(); }
} // namespace memgraph::coordination
#endif

View File

@ -33,6 +33,7 @@ CoordinatorStateManager::CoordinatorStateManager(int srv_id, std::string const &
auto CoordinatorStateManager::load_config() -> ptr<cluster_config> {
// Just return in-memory data in this example.
// May require reading from disk here, if it has been written to disk.
spdlog::trace("Loading cluster config");
return cluster_config_;
}
@ -41,6 +42,11 @@ auto CoordinatorStateManager::save_config(cluster_config const &config) -> void
// Need to write to disk here, if want to make it durable.
ptr<buffer> buf = config.serialize();
cluster_config_ = cluster_config::deserialize(*buf);
spdlog::info("Saving cluster config.");
auto servers = cluster_config_->get_servers();
for (auto const &server : servers) {
spdlog::trace("Server id: {}, endpoint: {}", server->get_id(), server->get_endpoint());
}
}
auto CoordinatorStateManager::save_state(srv_state const &state) -> void {

View File

@ -13,7 +13,7 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "replication_coordination_glue/common.hpp"
#include "rpc/client.hpp"
#include "rpc_errors.hpp"
@ -25,11 +25,11 @@ namespace memgraph::coordination {
class CoordinatorInstance;
using HealthCheckClientCallback = std::function<void(CoordinatorInstance *, std::string_view)>;
using ReplicationClientsInfo = std::vector<ReplClientInfo>;
using ReplicationClientsInfo = std::vector<ReplicationClientInfo>;
class CoordinatorClient {
public:
explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config,
explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config,
HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb);
~CoordinatorClient() = default;
@ -62,7 +62,7 @@ class CoordinatorClient {
auto SendGetInstanceUUIDRpc() const -> memgraph::utils::BasicResult<GetInstanceUUIDError, std::optional<utils::UUID>>;
auto ReplicationClientInfo() const -> ReplClientInfo;
auto ReplicationClientInfo() const -> ReplicationClientInfo;
auto SendGetInstanceTimestampsRpc() const
-> utils::BasicResult<GetInstanceUUIDError, replication_coordination_glue::DatabaseHistories>;
@ -83,7 +83,7 @@ class CoordinatorClient {
communication::ClientContext rpc_context_;
mutable rpc::Client rpc_client_;
CoordinatorClientConfig config_;
CoordinatorToReplicaConfig config_;
CoordinatorInstance *coord_instance_;
HealthCheckClientCallback succ_cb_;
HealthCheckClientCallback fail_cb_;

View File

@ -0,0 +1,110 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#ifdef MG_ENTERPRISE
#include "io/network/endpoint.hpp"
#include "replication_coordination_glue/mode.hpp"
#include "utils/string.hpp"
#include <chrono>
#include <cstdint>
#include <optional>
#include <string>
#include <fmt/format.h>
#include "json/json.hpp"
#include "utils/uuid.hpp"
namespace memgraph::coordination {
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
struct ReplicationClientInfo {
std::string instance_name{};
replication_coordination_glue::ReplicationMode replication_mode{};
io::network::Endpoint replication_server;
friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default;
};
struct CoordinatorToReplicaConfig {
auto BoltSocketAddress() const -> std::string { return bolt_server.SocketAddress(); }
auto CoordinatorSocketAddress() const -> std::string { return mgt_server.SocketAddress(); }
auto ReplicationSocketAddress() const -> std::string {
return replication_client_info.replication_server.SocketAddress();
}
std::string instance_name{};
io::network::Endpoint mgt_server;
io::network::Endpoint bolt_server;
ReplicationClientInfo replication_client_info;
std::chrono::seconds instance_health_check_frequency_sec{1};
std::chrono::seconds instance_down_timeout_sec{5};
std::chrono::seconds instance_get_uuid_frequency_sec{10};
struct SSL {
std::string key_file;
std::string cert_file;
friend bool operator==(const SSL &, const SSL &) = default;
};
std::optional<SSL> ssl;
friend bool operator==(CoordinatorToReplicaConfig const &, CoordinatorToReplicaConfig const &) = default;
};
struct CoordinatorToCoordinatorConfig {
uint32_t coordinator_server_id{0};
io::network::Endpoint bolt_server;
io::network::Endpoint coordinator_server;
friend bool operator==(CoordinatorToCoordinatorConfig const &, CoordinatorToCoordinatorConfig const &) = default;
};
struct ManagementServerConfig {
std::string ip_address;
uint16_t port{};
struct SSL {
std::string key_file;
std::string cert_file;
std::string ca_file;
bool verify_peer{};
friend bool operator==(SSL const &, SSL const &) = default;
};
std::optional<SSL> ssl;
friend bool operator==(ManagementServerConfig const &, ManagementServerConfig const &) = default;
};
struct InstanceUUIDUpdate {
std::string instance_name;
memgraph::utils::UUID uuid;
};
void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config);
void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config);
void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config);
void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config);
void to_json(nlohmann::json &j, ReplicationClientInfo const &config);
void from_json(nlohmann::json const &j, ReplicationClientInfo &config);
void to_json(nlohmann::json &j, InstanceUUIDUpdate const &config);
void from_json(nlohmann::json const &j, InstanceUUIDUpdate &config);
} // namespace memgraph::coordination
#endif

View File

@ -1,93 +0,0 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#ifdef MG_ENTERPRISE
#include "replication_coordination_glue/mode.hpp"
#include "utils/string.hpp"
#include <chrono>
#include <cstdint>
#include <optional>
#include <string>
#include <fmt/format.h>
#include "json/json.hpp"
namespace memgraph::coordination {
inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0";
struct CoordinatorClientConfig {
std::string instance_name;
std::string ip_address;
uint16_t port{};
std::chrono::seconds instance_health_check_frequency_sec{1};
std::chrono::seconds instance_down_timeout_sec{5};
std::chrono::seconds instance_get_uuid_frequency_sec{10};
auto CoordinatorSocketAddress() const -> std::string { return fmt::format("{}:{}", ip_address, port); }
auto ReplicationSocketAddress() const -> std::string {
return fmt::format("{}:{}", replication_client_info.replication_ip_address,
replication_client_info.replication_port);
}
struct ReplicationClientInfo {
std::string instance_name;
replication_coordination_glue::ReplicationMode replication_mode{};
std::string replication_ip_address;
uint16_t replication_port{};
friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default;
};
ReplicationClientInfo replication_client_info;
struct SSL {
std::string key_file;
std::string cert_file;
friend bool operator==(const SSL &, const SSL &) = default;
};
std::optional<SSL> ssl;
friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default;
};
using ReplClientInfo = CoordinatorClientConfig::ReplicationClientInfo;
struct CoordinatorServerConfig {
std::string ip_address;
uint16_t port{};
struct SSL {
std::string key_file;
std::string cert_file;
std::string ca_file;
bool verify_peer{};
friend bool operator==(SSL const &, SSL const &) = default;
};
std::optional<SSL> ssl;
friend bool operator==(CoordinatorServerConfig const &, CoordinatorServerConfig const &) = default;
};
void to_json(nlohmann::json &j, CoordinatorClientConfig const &config);
void from_json(nlohmann::json const &j, CoordinatorClientConfig &config);
void to_json(nlohmann::json &j, ReplClientInfo const &config);
void from_json(nlohmann::json const &j, ReplClientInfo &config);
} // namespace memgraph::coordination
#endif

View File

@ -94,5 +94,16 @@ class InvalidRaftLogActionException final : public utils::BasicException {
SPECIALIZE_GET_EXCEPTION_NAME(InvalidRaftLogActionException)
};
class InvalidRoutingTableException final : public utils::BasicException {
public:
explicit InvalidRoutingTableException(std::string_view what) noexcept : BasicException(what) {}
template <class... Args>
explicit InvalidRoutingTableException(fmt::format_string<Args...> fmt, Args &&...args) noexcept
: InvalidRoutingTableException(fmt::format(fmt, std::forward<Args>(args)...)) {}
SPECIALIZE_GET_EXCEPTION_NAME(InvalidRoutingTableException)
};
} // namespace memgraph::coordination
#endif

View File

@ -26,6 +26,8 @@
namespace memgraph::coordination {
using RoutingTable = std::vector<std::pair<std::vector<std::string>, std::string>>;
struct NewMainRes {
std::string most_up_to_date_instance;
std::string latest_epoch;
@ -36,8 +38,14 @@ using InstanceNameDbHistories = std::pair<std::string, replication_coordination_
class CoordinatorInstance {
public:
CoordinatorInstance();
CoordinatorInstance(CoordinatorInstance const &) = delete;
CoordinatorInstance &operator=(CoordinatorInstance const &) = delete;
CoordinatorInstance(CoordinatorInstance &&) noexcept = delete;
CoordinatorInstance &operator=(CoordinatorInstance &&) noexcept = delete;
[[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config)
~CoordinatorInstance() = default;
[[nodiscard]] auto RegisterReplicationInstance(CoordinatorToReplicaConfig const &config)
-> RegisterInstanceCoordinatorStatus;
[[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name)
-> UnregisterInstanceCoordinatorStatus;
@ -48,15 +56,17 @@ class CoordinatorInstance {
auto TryFailover() -> void;
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void;
auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void;
auto GetRoutingTable(std::map<std::string, std::string> const &routing) -> RoutingTable;
static auto ChooseMostUpToDateInstance(std::span<InstanceNameDbHistories> histories) -> NewMainRes;
auto HasMainState(std::string_view instance_name) const -> bool;
auto HasReplicaState(std::string_view instance_name) const -> bool;
private:
HealthCheckClientCallback client_succ_cb_, client_fail_cb_;
auto OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void;
auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &;
void MainFailCallback(std::string_view);
@ -67,14 +77,14 @@ class CoordinatorInstance {
void ReplicaFailCallback(std::string_view);
auto IsMain(std::string_view instance_name) const -> bool;
auto IsReplica(std::string_view instance_name) const -> bool;
HealthCheckClientCallback client_succ_cb_, client_fail_cb_;
// NOTE: Must be std::list because we rely on pointer stability.
// Leader and followers should both have same view on repl_instances_
std::list<ReplicationInstance> repl_instances_;
mutable utils::ResourceLock coord_instance_lock_{};
// Thread pool needs to be constructed before raft state as raft state can call thread pool
utils::ThreadPool thread_pool_;
RaftState raft_state_;
};

View File

@ -14,7 +14,7 @@
#include "utils/uuid.hpp"
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "replication_coordination_glue/common.hpp"
#include "rpc/messages.hpp"
#include "slk/serialization.hpp"
@ -28,14 +28,13 @@ struct PromoteReplicaToMainReq {
static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader);
static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder);
explicit PromoteReplicaToMainReq(const utils::UUID &uuid,
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info)
explicit PromoteReplicaToMainReq(const utils::UUID &uuid, std::vector<ReplicationClientInfo> replication_clients_info)
: main_uuid_(uuid), replication_clients_info(std::move(replication_clients_info)) {}
PromoteReplicaToMainReq() = default;
// get uuid here
utils::UUID main_uuid_;
std::vector<CoordinatorClientConfig::ReplicationClientInfo> replication_clients_info;
std::vector<ReplicationClientInfo> replication_clients_info;
};
struct PromoteReplicaToMainRes {
@ -60,12 +59,12 @@ struct DemoteMainToReplicaReq {
static void Load(DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader);
static void Save(const DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder);
explicit DemoteMainToReplicaReq(CoordinatorClientConfig::ReplicationClientInfo replication_client_info)
explicit DemoteMainToReplicaReq(ReplicationClientInfo replication_client_info)
: replication_client_info(std::move(replication_client_info)) {}
DemoteMainToReplicaReq() = default;
CoordinatorClientConfig::ReplicationClientInfo replication_client_info;
ReplicationClientInfo replication_client_info;
};
struct DemoteMainToReplicaRes {

View File

@ -13,14 +13,14 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "rpc/server.hpp"
namespace memgraph::coordination {
class CoordinatorServer {
public:
explicit CoordinatorServer(const CoordinatorServerConfig &config);
explicit CoordinatorServer(const ManagementServerConfig &config);
CoordinatorServer(const CoordinatorServer &) = delete;
CoordinatorServer(CoordinatorServer &&) = delete;
CoordinatorServer &operator=(const CoordinatorServer &) = delete;

View File

@ -13,27 +13,37 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "replication_coordination_glue/common.hpp"
#include "slk/serialization.hpp"
#include "slk/streams.hpp"
namespace memgraph::slk {
using ReplicationClientInfo = coordination::CoordinatorClientConfig::ReplicationClientInfo;
using ReplicationClientInfo = coordination::ReplicationClientInfo;
inline void Save(const ReplicationClientInfo &obj, Builder *builder) {
inline void Save(io::network::Endpoint const &obj, Builder *builder) {
Save(obj.address, builder);
Save(obj.port, builder);
Save(obj.family, builder);
}
inline void Load(io::network::Endpoint *obj, Reader *reader) {
Load(&obj->address, reader);
Load(&obj->port, reader);
Load(&obj->family, reader);
}
inline void Save(ReplicationClientInfo const &obj, Builder *builder) {
Save(obj.instance_name, builder);
Save(obj.replication_mode, builder);
Save(obj.replication_ip_address, builder);
Save(obj.replication_port, builder);
Save(obj.replication_server, builder);
}
inline void Load(ReplicationClientInfo *obj, Reader *reader) {
Load(&obj->instance_name, reader);
Load(&obj->replication_mode, reader);
Load(&obj->replication_ip_address, reader);
Load(&obj->replication_port, reader);
Load(&obj->replication_server, reader);
}
inline void Save(const replication_coordination_glue::DatabaseHistory &obj, Builder *builder) {

View File

@ -33,7 +33,7 @@ class CoordinatorState {
CoordinatorState(CoordinatorState &&) noexcept = delete;
CoordinatorState &operator=(CoordinatorState &&) noexcept = delete;
[[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config)
[[nodiscard]] auto RegisterReplicationInstance(CoordinatorToReplicaConfig const &config)
-> RegisterInstanceCoordinatorStatus;
[[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name)
-> UnregisterInstanceCoordinatorStatus;
@ -42,11 +42,13 @@ class CoordinatorState {
auto ShowInstances() const -> std::vector<InstanceStatus>;
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void;
auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void;
// NOTE: The client code must check that the server exists before calling this method.
auto GetCoordinatorServer() const -> CoordinatorServer &;
auto GetRoutingTable(std::map<std::string, std::string> const &routing) -> RoutingTable;
private:
struct CoordinatorMainReplicaData {
std::unique_ptr<CoordinatorServer> coordinator_server_;

View File

@ -23,7 +23,7 @@
namespace memgraph::coordination {
class CoordinatorInstance;
struct CoordinatorClientConfig;
struct CoordinatorToReplicaConfig;
using BecomeLeaderCb = std::function<void()>;
using BecomeFollowerCb = std::function<void()>;
@ -40,7 +40,7 @@ using raft_result = nuraft::cmd_result<ptr<buffer>>;
class RaftState {
private:
explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id,
explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id,
uint32_t raft_port, std::string raft_address);
auto InitRaftServer() -> void;
@ -58,30 +58,43 @@ class RaftState {
auto InstanceName() const -> std::string;
auto RaftSocketAddress() const -> std::string;
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void;
auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void;
auto GetAllCoordinators() const -> std::vector<ptr<srv_config>>;
auto RequestLeadership() -> bool;
auto IsLeader() const -> bool;
auto FindCurrentMainInstanceName() const -> std::optional<std::string>;
auto MainExists() const -> bool;
auto IsMain(std::string_view instance_name) const -> bool;
auto IsReplica(std::string_view instance_name) const -> bool;
auto AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool;
auto AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool;
auto AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool;
auto AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool;
auto AppendSetInstanceAsMainLog(std::string_view instance_name, utils::UUID const &uuid) -> bool;
auto AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool;
auto AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool;
auto AppendUpdateUUIDForNewMainLog(utils::UUID const &uuid) -> bool;
auto AppendUpdateUUIDForInstanceLog(std::string_view instance_name, utils::UUID const &uuid) -> bool;
auto AppendOpenLockRegister(CoordinatorToReplicaConfig const &) -> bool;
auto AppendOpenLockUnregister(std::string_view) -> bool;
auto AppendOpenLockFailover(std::string_view instance_name) -> bool;
auto AppendOpenLockSetInstanceToMain(std::string_view instance_name) -> bool;
auto AppendOpenLockSetInstanceToReplica(std::string_view instance_name) -> bool;
auto AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool;
auto GetInstances() const -> std::vector<InstanceState>;
auto GetUUID() const -> utils::UUID;
auto GetReplicationInstances() const -> std::vector<ReplicationInstanceState>;
// TODO: (andi) Do we need then GetAllCoordinators?
auto GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState>;
auto MainExists() const -> bool;
auto HasMainState(std::string_view instance_name) const -> bool;
auto HasReplicaState(std::string_view instance_name) const -> bool;
auto IsCurrentMain(std::string_view instance_name) const -> bool;
auto GetCurrentMainUUID() const -> utils::UUID;
auto GetInstanceUUID(std::string_view) const -> utils::UUID;
auto IsLockOpened() const -> bool;
private:
// TODO: (andi) I think variables below can be abstracted/clean them.
io::network::Endpoint raft_endpoint_;
uint32_t raft_server_id_;
uint32_t coordinator_id_;
ptr<CoordinatorStateMachine> state_machine_;
ptr<CoordinatorStateManager> state_manager_;

View File

@ -25,7 +25,9 @@ enum class RegisterInstanceCoordinatorStatus : uint8_t {
NOT_LEADER,
RPC_FAILED,
RAFT_LOG_ERROR,
SUCCESS
SUCCESS,
LOCK_OPENED,
OPEN_LOCK
};
enum class UnregisterInstanceCoordinatorStatus : uint8_t {
@ -36,6 +38,8 @@ enum class UnregisterInstanceCoordinatorStatus : uint8_t {
NOT_LEADER,
RAFT_LOG_ERROR,
SUCCESS,
LOCK_OPENED,
OPEN_LOCK
};
enum class SetInstanceToMainCoordinatorStatus : uint8_t {
@ -47,6 +51,9 @@ enum class SetInstanceToMainCoordinatorStatus : uint8_t {
COULD_NOT_PROMOTE_TO_MAIN,
SWAP_UUID_FAILED,
SUCCESS,
LOCK_OPENED,
OPEN_LOCK,
ENABLE_WRITING_FAILED
};
} // namespace memgraph::coordination

View File

@ -32,7 +32,7 @@ using HealthCheckInstanceCallback = void (CoordinatorInstance::*)(std::string_vi
class ReplicationInstance {
public:
ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, HealthCheckClientCallback succ_cb,
ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb,
HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb,
HealthCheckInstanceCallback fail_instance_cb);
@ -67,7 +67,7 @@ class ReplicationInstance {
auto PauseFrequentCheck() -> void;
auto ResumeFrequentCheck() -> void;
auto ReplicationClientInfo() const -> ReplClientInfo;
auto ReplicationClientInfo() const -> ReplicationClientInfo;
auto EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool;
@ -79,10 +79,6 @@ class ReplicationInstance {
auto EnableWritingOnMain() -> bool;
auto SetNewMainUUID(utils::UUID const &main_uuid) -> void;
auto ResetMainUUID() -> void;
auto GetMainUUID() const -> std::optional<utils::UUID> const &;
auto GetSuccessCallback() -> HealthCheckInstanceCallback &;
auto GetFailCallback() -> HealthCheckInstanceCallback &;
@ -92,19 +88,12 @@ class ReplicationInstance {
bool is_alive_{false};
std::chrono::system_clock::time_point last_check_of_uuid_{};
// for replica this is main uuid of current main
// for "main" main this same as in CoordinatorData
// it is set to nullopt when replica is down
// TLDR; when replica is down and comes back up we reset uuid of main replica is listening to
// so we need to send swap uuid again
std::optional<utils::UUID> main_uuid_;
HealthCheckInstanceCallback succ_cb_;
HealthCheckInstanceCallback fail_cb_;
friend bool operator==(ReplicationInstance const &first, ReplicationInstance const &second) {
return first.client_ == second.client_ && first.last_response_time_ == second.last_response_time_ &&
first.is_alive_ == second.is_alive_ && first.main_uuid_ == second.main_uuid_;
first.is_alive_ == second.is_alive_;
}
};

View File

@ -13,7 +13,7 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "nuraft/raft_log_action.hpp"
#include "replication_coordination_glue/role.hpp"
#include "utils/resource_lock.hpp"
@ -32,19 +32,37 @@ namespace memgraph::coordination {
using replication_coordination_glue::ReplicationRole;
struct InstanceState {
CoordinatorClientConfig config;
struct ReplicationInstanceState {
CoordinatorToReplicaConfig config;
ReplicationRole status;
friend auto operator==(InstanceState const &lhs, InstanceState const &rhs) -> bool {
return lhs.config == rhs.config && lhs.status == rhs.status;
// for replica this is main uuid of current main
// for "main" main this same as current_main_id_
// when replica is down and comes back up we reset uuid of main replica is listening to
// so we need to send swap uuid again
// For MAIN we don't enable writing until cluster is in healthy state
utils::UUID instance_uuid;
friend auto operator==(ReplicationInstanceState const &lhs, ReplicationInstanceState const &rhs) -> bool {
return lhs.config == rhs.config && lhs.status == rhs.status && lhs.instance_uuid == rhs.instance_uuid;
}
};
void to_json(nlohmann::json &j, InstanceState const &instance_state);
void from_json(nlohmann::json const &j, InstanceState &instance_state);
// NOTE: Currently instance of coordinator doesn't change from the registration. Hence, just wrap
// CoordinatorToCoordinatorConfig.
struct CoordinatorInstanceState {
CoordinatorToCoordinatorConfig config;
using TRaftLog = std::variant<CoordinatorClientConfig, std::string, utils::UUID>;
friend auto operator==(CoordinatorInstanceState const &lhs, CoordinatorInstanceState const &rhs) -> bool {
return lhs.config == rhs.config;
}
};
void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state);
void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state);
using TRaftLog = std::variant<CoordinatorToReplicaConfig, std::string, utils::UUID, CoordinatorToCoordinatorConfig,
InstanceUUIDUpdate>;
using nuraft::buffer;
using nuraft::buffer_serializer;
@ -53,7 +71,8 @@ using nuraft::ptr;
class CoordinatorClusterState {
public:
CoordinatorClusterState() = default;
explicit CoordinatorClusterState(std::map<std::string, InstanceState, std::less<>> instances);
explicit CoordinatorClusterState(std::map<std::string, ReplicationInstanceState, std::less<>> instances,
utils::UUID const &current_main_uuid, bool is_lock_opened);
CoordinatorClusterState(CoordinatorClusterState const &);
CoordinatorClusterState &operator=(CoordinatorClusterState const &);
@ -62,15 +81,13 @@ class CoordinatorClusterState {
CoordinatorClusterState &operator=(CoordinatorClusterState &&other) noexcept;
~CoordinatorClusterState() = default;
auto FindCurrentMainInstanceName() const -> std::optional<std::string>;
auto MainExists() const -> bool;
auto IsMain(std::string_view instance_name) const -> bool;
auto HasMainState(std::string_view instance_name) const -> bool;
auto IsReplica(std::string_view instance_name) const -> bool;
auto HasReplicaState(std::string_view instance_name) const -> bool;
auto InsertInstance(std::string instance_name, InstanceState instance_state) -> void;
auto IsCurrentMain(std::string_view instance_name) const -> bool;
auto DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void;
@ -78,14 +95,22 @@ class CoordinatorClusterState {
static auto Deserialize(buffer &data) -> CoordinatorClusterState;
auto GetInstances() const -> std::vector<InstanceState>;
auto GetReplicationInstances() const -> std::vector<ReplicationInstanceState>;
auto GetUUID() const -> utils::UUID;
auto GetCurrentMainUUID() const -> utils::UUID;
auto GetInstanceUUID(std::string_view) const -> utils::UUID;
auto IsLockOpened() const -> bool;
auto GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState>;
private:
std::map<std::string, InstanceState, std::less<>> instances_{};
utils::UUID uuid_{};
std::vector<CoordinatorInstanceState> coordinators_{};
std::map<std::string, ReplicationInstanceState, std::less<>> repl_instances_{};
utils::UUID current_main_uuid_{};
mutable utils::ResourceLock log_lock_{};
bool is_lock_opened_{false};
};
} // namespace memgraph::coordination

View File

@ -13,7 +13,7 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "nuraft/coordinator_cluster_state.hpp"
#include "nuraft/raft_log_action.hpp"
@ -40,19 +40,21 @@ class CoordinatorStateMachine : public state_machine {
CoordinatorStateMachine &operator=(CoordinatorStateMachine const &) = delete;
CoordinatorStateMachine(CoordinatorStateMachine &&) = delete;
CoordinatorStateMachine &operator=(CoordinatorStateMachine &&) = delete;
~CoordinatorStateMachine() override {}
auto FindCurrentMainInstanceName() const -> std::optional<std::string>;
auto MainExists() const -> bool;
auto IsMain(std::string_view instance_name) const -> bool;
auto IsReplica(std::string_view instance_name) const -> bool;
~CoordinatorStateMachine() override = default;
static auto CreateLog(nlohmann::json &&log) -> ptr<buffer>;
static auto SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer>;
static auto SerializeOpenLockRegister(CoordinatorToReplicaConfig const &config) -> ptr<buffer>;
static auto SerializeOpenLockUnregister(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeOpenLockSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeOpenLockFailover(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr<buffer>;
static auto SerializeUnregisterInstance(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeSetInstanceAsMain(InstanceUUIDUpdate const &instance_uuid_change) -> ptr<buffer>;
static auto SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer>;
static auto SerializeUpdateUUID(utils::UUID const &uuid) -> ptr<buffer>;
static auto SerializeUpdateUUIDForNewMain(utils::UUID const &uuid) -> ptr<buffer>;
static auto SerializeUpdateUUIDForInstance(InstanceUUIDUpdate const &instance_uuid_change) -> ptr<buffer>;
static auto SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> ptr<buffer>;
static auto SerializeOpenLockSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer>;
static auto DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction>;
@ -80,8 +82,19 @@ class CoordinatorStateMachine : public state_machine {
auto create_snapshot(snapshot &s, async_result<bool>::handler_type &when_done) -> void override;
auto GetInstances() const -> std::vector<InstanceState>;
auto GetUUID() const -> utils::UUID;
auto GetReplicationInstances() const -> std::vector<ReplicationInstanceState>;
auto GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState>;
// Getters
auto MainExists() const -> bool;
auto HasMainState(std::string_view instance_name) const -> bool;
auto HasReplicaState(std::string_view instance_name) const -> bool;
auto IsCurrentMain(std::string_view instance_name) const -> bool;
auto GetCurrentMainUUID() const -> utils::UUID;
auto GetInstanceUUID(std::string_view instance_name) const -> utils::UUID;
auto IsLockOpened() const -> bool;
private:
struct SnapshotCtx {

View File

@ -23,20 +23,34 @@
namespace memgraph::coordination {
enum class RaftLogAction : uint8_t {
OPEN_LOCK_REGISTER_REPLICATION_INSTANCE,
OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE,
OPEN_LOCK_FAILOVER,
OPEN_LOCK_SET_INSTANCE_AS_MAIN,
OPEN_LOCK_SET_INSTANCE_AS_REPLICA,
REGISTER_REPLICATION_INSTANCE,
UNREGISTER_REPLICATION_INSTANCE,
SET_INSTANCE_AS_MAIN,
SET_INSTANCE_AS_REPLICA,
UPDATE_UUID
UPDATE_UUID_OF_NEW_MAIN,
ADD_COORDINATOR_INSTANCE,
UPDATE_UUID_FOR_INSTANCE,
};
NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, {
{RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"},
{RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"},
{RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"},
{RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"},
{RaftLogAction::UPDATE_UUID, "update_uuid"},
})
NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction,
{{RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"},
{RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"},
{RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"},
{RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"},
{RaftLogAction::UPDATE_UUID_OF_NEW_MAIN, "update_uuid_of_new_main"},
{RaftLogAction::ADD_COORDINATOR_INSTANCE, "add_coordinator_instance"},
{RaftLogAction::UPDATE_UUID_FOR_INSTANCE, "update_uuid_for_instance"},
{RaftLogAction::OPEN_LOCK_REGISTER_REPLICATION_INSTANCE, "open_lock_register_instance"},
{RaftLogAction::OPEN_LOCK_UNREGISTER_REPLICATION_INSTANCE,
"open_lock_unregister_instance"},
{RaftLogAction::OPEN_LOCK_FAILOVER, "open_lock_failover"},
{RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_MAIN, "open_lock_set_instance_as_main"},
{RaftLogAction::OPEN_LOCK_SET_INSTANCE_AS_REPLICA, "open_lock_set_instance_as_replica"}})
} // namespace memgraph::coordination
#endif

View File

@ -12,8 +12,7 @@
#ifdef MG_ENTERPRISE
#include <chrono>
#include <spdlog/spdlog.h>
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "coordination/coordinator_exceptions.hpp"
#include "coordination/raft_state.hpp"
#include "utils/counter.hpp"
@ -31,12 +30,12 @@ using nuraft::raft_server;
using nuraft::srv_config;
using raft_result = cmd_result<ptr<buffer>>;
RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id,
RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id,
uint32_t raft_port, std::string raft_address)
: raft_endpoint_(raft_address, raft_port),
raft_server_id_(raft_server_id),
coordinator_id_(coordinator_id),
state_machine_(cs_new<CoordinatorStateMachine>()),
state_manager_(cs_new<CoordinatorStateManager>(raft_server_id_, raft_endpoint_.SocketAddress())),
state_manager_(cs_new<CoordinatorStateManager>(coordinator_id_, raft_endpoint_.SocketAddress())),
logger_(nullptr),
become_leader_cb_(std::move(become_leader_cb)),
become_follower_cb_(std::move(become_follower_cb)) {}
@ -63,13 +62,18 @@ auto RaftState::InitRaftServer() -> void {
params.leadership_expiry_ = 200;
raft_server::init_options init_opts;
init_opts.raft_callback_ = [this](cb_func::Type event_type, cb_func::Param *param) -> nuraft::CbReturnCode {
if (event_type == cb_func::BecomeLeader) {
spdlog::info("Node {} became leader", param->leaderId);
become_leader_cb_();
} else if (event_type == cb_func::BecomeFollower) {
spdlog::info("Node {} became follower", param->myId);
// TODO(antoniofilipovic) Check what happens when becoming follower while doing failover
// There is no way to stop becoming a follower:
// https://github.com/eBay/NuRaft/blob/188947bcc73ce38ab1c3cf9d01015ca8a29decd9/src/raft_server.cxx#L1334-L1335
spdlog::trace("Got request to become follower");
become_follower_cb_();
spdlog::trace("Node {} became follower", param->myId);
}
return CbReturnCode::Ok;
};
@ -82,7 +86,6 @@ auto RaftState::InitRaftServer() -> void {
if (!raft_server_) {
throw RaftServerStartException("Failed to launch raft server on {}", raft_endpoint_.SocketAddress());
}
auto maybe_stop = utils::ResettableCounter<20>();
do {
if (raft_server_->is_initialized()) {
@ -95,11 +98,11 @@ auto RaftState::InitRaftServer() -> void {
}
auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState {
uint32_t raft_server_id = FLAGS_raft_server_id;
uint32_t raft_port = FLAGS_raft_server_port;
uint32_t coordinator_id = FLAGS_coordinator_id;
uint32_t raft_port = FLAGS_coordinator_port;
auto raft_state =
RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_server_id, raft_port, "127.0.0.1");
RaftState(std::move(become_leader_cb), std::move(become_follower_cb), coordinator_id, raft_port, "127.0.0.1");
raft_state.InitRaftServer();
return raft_state;
@ -108,15 +111,14 @@ auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerC
RaftState::~RaftState() { launcher_.shutdown(); }
auto RaftState::InstanceName() const -> std::string {
return fmt::format("coordinator_{}", std::to_string(raft_server_id_));
return fmt::format("coordinator_{}", std::to_string(coordinator_id_));
}
auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); }
auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address)
-> void {
auto const endpoint = fmt::format("{}:{}", raft_address, raft_port);
srv_config const srv_config_to_add(static_cast<int>(raft_server_id), endpoint);
auto RaftState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void {
auto const endpoint = config.coordinator_server.SocketAddress();
srv_config const srv_config_to_add(static_cast<int>(config.coordinator_server_id), endpoint);
auto cmd_result = raft_server_->add_srv(srv_config_to_add);
@ -134,9 +136,9 @@ auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_po
bool added{false};
while (!maybe_stop()) {
std::this_thread::sleep_for(std::chrono::milliseconds(waiting_period));
const auto server_config = raft_server_->get_srv_config(static_cast<nuraft::int32>(raft_server_id));
const auto server_config = raft_server_->get_srv_config(static_cast<nuraft::int32>(config.coordinator_server_id));
if (server_config) {
spdlog::trace("Server with id {} added to cluster", raft_server_id);
spdlog::trace("Server with id {} added to cluster", config.coordinator_server_id);
added = true;
break;
}
@ -158,7 +160,79 @@ auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); }
auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); }
auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool {
auto RaftState::AppendOpenLockRegister(CoordinatorToReplicaConfig const &config) -> bool {
auto new_log = CoordinatorStateMachine::SerializeOpenLockRegister(config);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error("Failed to accept request to open lock to register instance {}", config.instance_name);
return false;
}
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to open lock for registering instance {} with error code {}", config.instance_name,
int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendOpenLockUnregister(std::string_view instance_name) -> bool {
auto new_log = CoordinatorStateMachine::SerializeOpenLockUnregister(instance_name);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error("Failed to accept request to open lock to unregister instance {}.", instance_name);
return false;
}
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to open lock for unregistering instance {} with error code {}", instance_name,
int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendOpenLockFailover(std::string_view instance_name) -> bool {
auto new_log = CoordinatorStateMachine::SerializeOpenLockFailover(instance_name);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error("Failed to accept request to open lock for failover {}", instance_name);
return false;
}
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to open lock for failover to instance {} with error code {}", instance_name,
int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendOpenLockSetInstanceToMain(std::string_view instance_name) -> bool {
auto new_log = CoordinatorStateMachine::SerializeOpenLockSetInstanceAsMain(instance_name);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error("Failed to accept request to open lock and set instance {} to MAIN", instance_name);
return false;
}
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to open lock to set instance {} to MAIN with error code {}", instance_name,
int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool {
auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config);
auto const res = raft_server_->append_entries({new_log});
@ -202,8 +276,9 @@ auto RaftState::AppendUnregisterReplicationInstanceLog(std::string_view instance
return true;
}
auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool {
auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsMain(instance_name);
auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name, utils::UUID const &uuid) -> bool {
auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsMain(
InstanceUUIDUpdate{.instance_name = std::string{instance_name}, .uuid = uuid});
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error(
@ -242,8 +317,28 @@ auto RaftState::AppendSetInstanceAsReplicaLog(std::string_view instance_name) ->
return true;
}
auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool {
auto new_log = CoordinatorStateMachine::SerializeUpdateUUID(uuid);
auto RaftState::AppendOpenLockSetInstanceToReplica(std::string_view instance_name) -> bool {
auto new_log = CoordinatorStateMachine::SerializeOpenLockSetInstanceAsReplica(instance_name);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error(
"Failed to accept request for demoting instance {}. Most likely the reason is that the instance is not "
"the leader.",
instance_name);
return false;
}
spdlog::info("Request for demoting instance {} accepted", instance_name);
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendUpdateUUIDForNewMainLog(utils::UUID const &uuid) -> bool {
auto new_log = CoordinatorStateMachine::SerializeUpdateUUIDForNewMain(uuid);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error(
@ -251,7 +346,7 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool {
"the leader.");
return false;
}
spdlog::info("Request for updating UUID accepted");
spdlog::trace("Request for updating UUID accepted");
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to update UUID with error code {}", int(res->get_result_code()));
@ -261,21 +356,75 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool {
return true;
}
auto RaftState::FindCurrentMainInstanceName() const -> std::optional<std::string> {
return state_machine_->FindCurrentMainInstanceName();
auto RaftState::AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool {
auto new_log = CoordinatorStateMachine::SerializeAddCoordinatorInstance(config);
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error(
"Failed to accept request for adding coordinator instance {}. Most likely the reason is that the instance is "
"not the leader.",
config.coordinator_server_id);
return false;
}
spdlog::info("Request for adding coordinator instance {} accepted", config.coordinator_server_id);
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to add coordinator instance {} with error code {}", config.coordinator_server_id,
static_cast<int>(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::AppendUpdateUUIDForInstanceLog(std::string_view instance_name, const utils::UUID &uuid) -> bool {
auto new_log = CoordinatorStateMachine::SerializeUpdateUUIDForInstance(
{.instance_name = std::string{instance_name}, .uuid = uuid});
auto const res = raft_server_->append_entries({new_log});
if (!res->get_accepted()) {
spdlog::error("Failed to accept request for updating UUID of instance.");
return false;
}
spdlog::trace("Request for updating UUID of instance accepted");
if (res->get_result_code() != nuraft::cmd_result_code::OK) {
spdlog::error("Failed to update UUID of instance with error code {}", int(res->get_result_code()));
return false;
}
return true;
}
auto RaftState::MainExists() const -> bool { return state_machine_->MainExists(); }
auto RaftState::IsMain(std::string_view instance_name) const -> bool { return state_machine_->IsMain(instance_name); }
auto RaftState::IsReplica(std::string_view instance_name) const -> bool {
return state_machine_->IsReplica(instance_name);
auto RaftState::HasMainState(std::string_view instance_name) const -> bool {
return state_machine_->HasMainState(instance_name);
}
auto RaftState::GetInstances() const -> std::vector<InstanceState> { return state_machine_->GetInstances(); }
auto RaftState::HasReplicaState(std::string_view instance_name) const -> bool {
return state_machine_->HasReplicaState(instance_name);
}
auto RaftState::GetUUID() const -> utils::UUID { return state_machine_->GetUUID(); }
auto RaftState::GetReplicationInstances() const -> std::vector<ReplicationInstanceState> {
return state_machine_->GetReplicationInstances();
}
auto RaftState::GetCurrentMainUUID() const -> utils::UUID { return state_machine_->GetCurrentMainUUID(); }
auto RaftState::IsCurrentMain(std::string_view instance_name) const -> bool {
return state_machine_->IsCurrentMain(instance_name);
}
auto RaftState::IsLockOpened() const -> bool { return state_machine_->IsLockOpened(); }
auto RaftState::GetInstanceUUID(std::string_view instance_name) const -> utils::UUID {
return state_machine_->GetInstanceUUID(instance_name);
}
auto RaftState::GetCoordinatorInstances() const -> std::vector<CoordinatorInstanceState> {
return state_machine_->GetCoordinatorInstances();
}
} // namespace memgraph::coordination
#endif

View File

@ -20,7 +20,7 @@
namespace memgraph::coordination {
ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config,
ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config,
HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb,
HealthCheckInstanceCallback succ_instance_cb,
HealthCheckInstanceCallback fail_instance_cb)
@ -56,7 +56,6 @@ auto ReplicationInstance::PromoteToMain(utils::UUID const &new_uuid, Replication
return false;
}
main_uuid_ = new_uuid;
succ_cb_ = main_succ_cb;
fail_cb_ = main_fail_cb;
@ -82,7 +81,7 @@ auto ReplicationInstance::StopFrequentCheck() -> void { client_.StopFrequentChec
auto ReplicationInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); }
auto ReplicationInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); }
auto ReplicationInstance::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo {
auto ReplicationInstance::ReplicationClientInfo() const -> coordination::ReplicationClientInfo {
return client_.ReplicationClientInfo();
}
@ -91,9 +90,6 @@ auto ReplicationInstance::GetFailCallback() -> HealthCheckInstanceCallback & { r
auto ReplicationInstance::GetClient() -> CoordinatorClient & { return client_; }
auto ReplicationInstance::SetNewMainUUID(utils::UUID const &main_uuid) -> void { main_uuid_ = main_uuid; }
auto ReplicationInstance::GetMainUUID() const -> std::optional<utils::UUID> const & { return main_uuid_; }
auto ReplicationInstance::EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool {
if (!IsReadyForUUIDPing()) {
return true;
@ -116,7 +112,6 @@ auto ReplicationInstance::SendSwapAndUpdateUUID(utils::UUID const &new_main_uuid
if (!replication_coordination_glue::SendSwapMainUUIDRpc(client_.RpcClient(), new_main_uuid)) {
return false;
}
SetNewMainUUID(new_main_uuid);
return true;
}

View File

@ -20,7 +20,7 @@ namespace memgraph::dbms {
CoordinatorHandler::CoordinatorHandler(coordination::CoordinatorState &coordinator_state)
: coordinator_state_(coordinator_state) {}
auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config)
auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config)
-> coordination::RegisterInstanceCoordinatorStatus {
return coordinator_state_.RegisterReplicationInstance(config);
}
@ -39,9 +39,8 @@ auto CoordinatorHandler::ShowInstances() const -> std::vector<coordination::Inst
return coordinator_state_.ShowInstances();
}
auto CoordinatorHandler::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port,
std::string_view raft_address) -> void {
coordinator_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address);
auto CoordinatorHandler::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void {
coordinator_state_.AddCoordinatorInstance(config);
}
} // namespace memgraph::dbms

View File

@ -13,7 +13,7 @@
#ifdef MG_ENTERPRISE
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "coordination/coordinator_state.hpp"
#include "coordination/instance_status.hpp"
#include "coordination/register_main_replica_coordinator_status.hpp"
@ -30,7 +30,7 @@ class CoordinatorHandler {
// TODO: (andi) When moving coordinator state on same instances, rename from RegisterReplicationInstance to
// RegisterInstance
auto RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config)
auto RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config)
-> coordination::RegisterInstanceCoordinatorStatus;
auto UnregisterReplicationInstance(std::string_view instance_name)
@ -40,7 +40,7 @@ class CoordinatorHandler {
auto ShowInstances() const -> std::vector<coordination::InstanceStatus>;
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void;
auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void;
private:
coordination::CoordinatorState &coordinator_state_;

View File

@ -13,11 +13,11 @@
#ifdef MG_ENTERPRISE
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_uint32(coordinator_server_port, 0, "Port on which coordinator servers will be started.");
DEFINE_uint32(management_port, 0, "Port on which coordinator servers will be started.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_uint32(raft_server_port, 0, "Port on which raft servers will be started.");
DEFINE_uint32(coordinator_port, 0, "Port on which raft servers will be started.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_uint32(raft_server_id, 0, "Unique ID of the raft server.");
DEFINE_uint32(coordinator_id, 0, "Unique ID of the raft server.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_uint32(instance_down_timeout_sec, 5, "Time duration after which an instance is considered down.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)

View File

@ -15,11 +15,11 @@
#ifdef MG_ENTERPRISE
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_uint32(coordinator_server_port);
DECLARE_uint32(management_port);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_uint32(raft_server_port);
DECLARE_uint32(coordinator_port);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_uint32(raft_server_id);
DECLARE_uint32(coordinator_id);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_uint32(instance_down_timeout_sec);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)

View File

@ -249,6 +249,40 @@ std::pair<std::vector<std::string>, std::optional<int>> SessionHL::Interpret(
}
}
using memgraph::communication::bolt::Value;
#ifdef MG_ENTERPRISE
auto SessionHL::Route(std::map<std::string, Value> const &routing,
std::vector<memgraph::communication::bolt::Value> const & /*bookmarks*/,
std::map<std::string, Value> const & /*extra*/) -> std::map<std::string, Value> {
auto routing_map = ranges::views::transform(
routing, [](auto const &pair) { return std::pair(pair.first, pair.second.ValueString()); }) |
ranges::to<std::map<std::string, std::string>>();
auto routing_table_res = interpreter_.Route(routing_map);
auto create_server = [](auto const &server_info) -> Value {
auto const &[addresses, role] = server_info;
std::map<std::string, Value> server_map;
auto bolt_addresses = ranges::views::transform(addresses, [](auto const &addr) { return Value{addr}; }) |
ranges::to<std::vector<Value>>();
server_map["addresses"] = std::move(bolt_addresses);
server_map["role"] = memgraph::communication::bolt::Value{role};
return Value{std::move(server_map)};
};
std::map<std::string, Value> communication_res;
communication_res["ttl"] = Value{routing_table_res.ttl};
communication_res["db"] = Value{};
auto servers = ranges::views::transform(routing_table_res.servers, create_server) | ranges::to<std::vector<Value>>();
communication_res["servers"] = memgraph::communication::bolt::Value{std::move(servers)};
return {{"rt", memgraph::communication::bolt::Value{std::move(communication_res)}}};
}
#endif
void SessionHL::RollbackTransaction() {
try {
interpreter_.RollbackTransaction();

View File

@ -55,6 +55,13 @@ class SessionHL final : public memgraph::communication::bolt::Session<memgraph::
const std::string &query, const std::map<std::string, memgraph::communication::bolt::Value> &params,
const std::map<std::string, memgraph::communication::bolt::Value> &extra) override;
#ifdef MG_ENTERPRISE
auto Route(std::map<std::string, memgraph::communication::bolt::Value> const &routing,
std::vector<memgraph::communication::bolt::Value> const &bookmarks,
std::map<std::string, memgraph::communication::bolt::Value> const &extra)
-> std::map<std::string, memgraph::communication::bolt::Value> override;
#endif
std::map<std::string, memgraph::communication::bolt::Value> Pull(TEncoder *encoder, std::optional<int> n,
std::optional<int> qid) override;

View File

@ -82,8 +82,7 @@ bool Endpoint::IsResolvableAddress(std::string_view address, uint16_t port) {
return status == 0;
}
std::optional<ParsedAddress> Endpoint::ParseSocketOrAddress(std::string_view address,
std::optional<uint16_t> default_port) {
std::optional<Endpoint> Endpoint::ParseSocketOrAddress(std::string_view address, std::optional<uint16_t> default_port) {
auto const parts = utils::SplitView(address, delimiter);
if (parts.size() > 2) {
@ -109,13 +108,13 @@ std::optional<ParsedAddress> Endpoint::ParseSocketOrAddress(std::string_view add
}();
if (GetIpFamily(addr) == IpFamily::NONE) {
if (IsResolvableAddress(addr, *port)) { // NOLINT
return std::pair{addr, *port}; // NOLINT
if (IsResolvableAddress(addr, *port)) { // NOLINT
return Endpoint{std::string(addr), *port}; // NOLINT
}
return std::nullopt;
}
return std::pair{addr, *port}; // NOLINT
return Endpoint{std::string(addr), *port}; // NOLINT
}
auto Endpoint::ValidatePort(std::optional<uint16_t> port) -> bool {
@ -138,4 +137,14 @@ auto Endpoint::ValidatePort(std::optional<uint16_t> port) -> bool {
return true;
}
void to_json(nlohmann::json &j, Endpoint const &config) {
j = nlohmann::json{{"address", config.address}, {"port", config.port}, {"family", config.family}};
}
void from_json(nlohmann::json const &j, Endpoint &config) {
config.address = j.at("address").get<std::string>();
config.port = j.at("port").get<uint16_t>();
config.family = j.at("family").get<Endpoint::IpFamily>();
}
} // namespace memgraph::io::network

View File

@ -17,9 +17,9 @@
#include <optional>
#include <string>
namespace memgraph::io::network {
#include "json/json.hpp"
using ParsedAddress = std::pair<std::string_view, uint16_t>;
namespace memgraph::io::network {
struct Endpoint {
static const struct needs_resolving_t {
@ -39,8 +39,8 @@ struct Endpoint {
enum class IpFamily : std::uint8_t { NONE, IP4, IP6 };
static std::optional<ParsedAddress> ParseSocketOrAddress(std::string_view address,
std::optional<uint16_t> default_port = {});
static std::optional<Endpoint> ParseSocketOrAddress(std::string_view address,
std::optional<uint16_t> default_port = {});
std::string SocketAddress() const;
@ -59,4 +59,7 @@ struct Endpoint {
static auto ValidatePort(std::optional<uint16_t> port) -> bool;
};
void to_json(nlohmann::json &j, Endpoint const &config);
void from_json(nlohmann::json const &j, Endpoint &config);
} // namespace memgraph::io::network

View File

@ -429,7 +429,7 @@ int main(int argc, char **argv) {
#ifdef MG_ENTERPRISE
// MAIN or REPLICA instance
if (FLAGS_coordinator_server_port) {
if (FLAGS_management_port) {
memgraph::dbms::CoordinatorHandlers::Register(coordinator_state.GetCoordinatorServer(), replication_handler);
MG_ASSERT(coordinator_state.GetCoordinatorServer().Start(), "Failed to start coordinator server!");
}

View File

@ -328,15 +328,14 @@ class ReplQueryHandler {
const auto repl_mode = convertToReplicationMode(sync_mode);
const auto maybe_ip_and_port =
auto maybe_endpoint =
io::network::Endpoint::ParseSocketOrAddress(socket_address, memgraph::replication::kDefaultReplicationPort);
if (maybe_ip_and_port) {
const auto [ip, port] = *maybe_ip_and_port;
if (maybe_endpoint) {
const auto replication_config =
replication::ReplicationClientConfig{.name = name,
.mode = repl_mode,
.ip_address = std::string(ip),
.port = port,
.ip_address = std::move(maybe_endpoint->address),
.port = maybe_endpoint->port,
.replica_check_frequency = replica_check_frequency,
.ssl = std::nullopt};
@ -408,44 +407,51 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
case RPC_FAILED:
throw QueryRuntimeException(
"Couldn't unregister replica instance because current main instance couldn't unregister replica!");
case LOCK_OPENED:
throw QueryRuntimeException("Couldn't unregister replica because the last action didn't finish successfully!");
case OPEN_LOCK:
throw QueryRuntimeException(
"Couldn't register instance as cluster didn't accept entering unregistration state!");
case SUCCESS:
break;
}
}
void RegisterReplicationInstance(std::string_view coordinator_socket_address,
std::string_view replication_socket_address,
void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server,
std::string_view replication_server,
std::chrono::seconds const &instance_check_frequency,
std::chrono::seconds const &instance_down_timeout,
std::chrono::seconds const &instance_get_uuid_frequency,
std::string_view instance_name, CoordinatorQuery::SyncMode sync_mode) override {
const auto maybe_replication_ip_port = io::network::Endpoint::ParseSocketOrAddress(replication_socket_address);
if (!maybe_replication_ip_port) {
auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server);
if (!maybe_bolt_server) {
throw QueryRuntimeException("Invalid bolt socket address!");
}
auto const maybe_management_server = io::network::Endpoint::ParseSocketOrAddress(management_server);
if (!maybe_management_server) {
throw QueryRuntimeException("Invalid management socket address!");
}
auto const maybe_replication_server = io::network::Endpoint::ParseSocketOrAddress(replication_server);
if (!maybe_replication_server) {
throw QueryRuntimeException("Invalid replication socket address!");
}
const auto maybe_coordinator_ip_port = io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address);
if (!maybe_replication_ip_port) {
throw QueryRuntimeException("Invalid replication socket address!");
}
const auto [replication_ip, replication_port] = *maybe_replication_ip_port;
const auto [coordinator_server_ip, coordinator_server_port] = *maybe_coordinator_ip_port;
const auto repl_config = coordination::CoordinatorClientConfig::ReplicationClientInfo{
.instance_name = std::string(instance_name),
.replication_mode = convertFromCoordinatorToReplicationMode(sync_mode),
.replication_ip_address = std::string(replication_ip),
.replication_port = replication_port};
auto const repl_config =
coordination::ReplicationClientInfo{.instance_name = std::string(instance_name),
.replication_mode = convertFromCoordinatorToReplicationMode(sync_mode),
.replication_server = *maybe_replication_server};
auto coordinator_client_config =
coordination::CoordinatorClientConfig{.instance_name = std::string(instance_name),
.ip_address = std::string(coordinator_server_ip),
.port = coordinator_server_port,
.instance_health_check_frequency_sec = instance_check_frequency,
.instance_down_timeout_sec = instance_down_timeout,
.instance_get_uuid_frequency_sec = instance_get_uuid_frequency,
.replication_client_info = repl_config,
.ssl = std::nullopt};
coordination::CoordinatorToReplicaConfig{.instance_name = std::string(instance_name),
.mgt_server = *maybe_management_server,
.bolt_server = *maybe_bolt_server,
.replication_client_info = repl_config,
.instance_health_check_frequency_sec = instance_check_frequency,
.instance_down_timeout_sec = instance_down_timeout,
.instance_get_uuid_frequency_sec = instance_get_uuid_frequency,
.ssl = std::nullopt};
auto status = coordinator_handler_.RegisterReplicationInstance(coordinator_client_config);
switch (status) {
@ -468,20 +474,36 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
throw QueryRuntimeException(
"Couldn't register replica instance because setting instance to replica failed! Check logs on replica to "
"find out more info!");
case LOCK_OPENED:
throw QueryRuntimeException(
"Couldn't register replica instance because because the last action didn't finish successfully!");
case OPEN_LOCK:
throw QueryRuntimeException(
"Couldn't register replica instance because cluster didn't accept registration query!");
case SUCCESS:
break;
}
}
auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view raft_socket_address) -> void override {
auto const maybe_ip_and_port = io::network::Endpoint::ParseSocketOrAddress(raft_socket_address);
if (maybe_ip_and_port) {
auto const [ip, port] = *maybe_ip_and_port;
spdlog::info("Adding instance {} with raft socket address {}:{}.", raft_server_id, ip, port);
coordinator_handler_.AddCoordinatorInstance(raft_server_id, port, ip);
} else {
spdlog::error("Invalid raft socket address {}.", raft_socket_address);
auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server,
std::string_view coordinator_server) -> void override {
auto const maybe_coordinator_server = io::network::Endpoint::ParseSocketOrAddress(coordinator_server);
if (!maybe_coordinator_server) {
throw QueryRuntimeException("Invalid coordinator socket address!");
}
auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server);
if (!maybe_bolt_server) {
throw QueryRuntimeException("Invalid bolt socket address!");
}
auto const coord_coord_config =
coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = coordinator_id,
.bolt_server = *maybe_bolt_server,
.coordinator_server = *maybe_coordinator_server};
coordinator_handler_.AddCoordinatorInstance(coord_coord_config);
spdlog::info("Added instance on coordinator server {}", maybe_coordinator_server->SocketAddress());
}
void SetReplicationInstanceToMain(std::string_view instance_name) override {
@ -503,6 +525,14 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
"Couldn't set replica instance to main! Check coordinator and replica for more logs");
case SWAP_UUID_FAILED:
throw QueryRuntimeException("Couldn't set replica instance to main. Replicas didn't swap uuid of new main.");
case OPEN_LOCK:
throw QueryRuntimeException(
"Couldn't set replica instance to main as cluster didn't accept setting instance state.");
case LOCK_OPENED:
throw QueryRuntimeException(
"Couldn't register replica instance because because the last action didn't finish successfully!");
case ENABLE_WRITING_FAILED:
throw QueryRuntimeException("Instance promoted to MAIN, but couldn't enable writing to instance.");
case SUCCESS:
break;
}
@ -518,7 +548,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler {
#endif
/// returns false if the replication role can't be set
/// @throw QueryRuntimeException if an error ocurred.
/// @throw QueryRuntimeException if an error occurred.
Callback HandleAuthQuery(AuthQuery *auth_query, InterpreterContext *interpreter_context, const Parameters &parameters,
Interpreter &interpreter) {
@ -931,10 +961,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
switch (repl_query->action_) {
case ReplicationQuery::Action::SET_REPLICATION_ROLE: {
#ifdef MG_ENTERPRISE
if (FLAGS_raft_server_id) {
if (FLAGS_coordinator_id) {
throw QueryRuntimeException("Coordinator can't set roles!");
}
if (FLAGS_coordinator_server_port) {
if (FLAGS_management_port) {
throw QueryRuntimeException("Can't set role manually on instance with coordinator server port.");
}
#endif
@ -961,7 +991,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
}
case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: {
#ifdef MG_ENTERPRISE
if (FLAGS_raft_server_id) {
if (FLAGS_coordinator_id) {
throw QueryRuntimeException("Coordinator doesn't have a replication role!");
}
#endif
@ -982,7 +1012,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
}
case ReplicationQuery::Action::REGISTER_REPLICA: {
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port) {
if (FLAGS_management_port) {
throw QueryRuntimeException("Can't register replica manually on instance with coordinator server port.");
}
#endif
@ -1003,7 +1033,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
case ReplicationQuery::Action::DROP_REPLICA: {
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port) {
if (FLAGS_management_port) {
throw QueryRuntimeException("Can't drop replica manually on instance with coordinator server port.");
}
#endif
@ -1018,7 +1048,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
}
case ReplicationQuery::Action::SHOW_REPLICAS: {
#ifdef MG_ENTERPRISE
if (FLAGS_raft_server_id) {
if (FLAGS_coordinator_id) {
throw QueryRuntimeException("Coordinator cannot call SHOW REPLICAS! Use SHOW INSTANCES instead.");
}
#endif
@ -1165,7 +1195,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
Callback callback;
switch (coordinator_query->action_) {
case CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE: {
if (!FLAGS_raft_server_id) {
if (!FLAGS_coordinator_id) {
throw QueryRuntimeException("Only coordinator can add coordinator instance!");
}
@ -1197,8 +1227,9 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
auto coord_server_id = coordinator_query->coordinator_server_id_->Accept(evaluator).ValueInt();
callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coord_server_id,
bolt_server = bolt_server_it->second,
coordinator_server = coordinator_server_it->second]() mutable {
handler.AddCoordinatorInstance(coord_server_id, coordinator_server);
handler.AddCoordinatorInstance(coord_server_id, bolt_server, coordinator_server);
return std::vector<std::vector<TypedValue>>();
};
@ -1208,7 +1239,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
return callback;
}
case CoordinatorQuery::Action::REGISTER_INSTANCE: {
if (!FLAGS_raft_server_id) {
if (!FLAGS_coordinator_id) {
throw QueryRuntimeException("Only coordinator can register coordinator server!");
}
// TODO: MemoryResource for EvaluationContext, it should probably be passed as
@ -1243,15 +1274,15 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
callback.fn = [handler = CoordQueryHandler{*coordinator_state},
instance_health_check_frequency_sec = config.instance_health_check_frequency_sec,
management_server = management_server_it->second,
replication_server = replication_server_it->second, bolt_server = bolt_server_it->second,
bolt_server = bolt_server_it->second, management_server = management_server_it->second,
replication_server = replication_server_it->second,
instance_name = coordinator_query->instance_name_,
instance_down_timeout_sec = config.instance_down_timeout_sec,
instance_get_uuid_frequency_sec = config.instance_get_uuid_frequency_sec,
sync_mode = coordinator_query->sync_mode_]() mutable {
handler.RegisterReplicationInstance(management_server, replication_server, instance_health_check_frequency_sec,
instance_down_timeout_sec, instance_get_uuid_frequency_sec, instance_name,
sync_mode);
handler.RegisterReplicationInstance(bolt_server, management_server, replication_server,
instance_health_check_frequency_sec, instance_down_timeout_sec,
instance_get_uuid_frequency_sec, instance_name, sync_mode);
return std::vector<std::vector<TypedValue>>();
};
@ -1261,7 +1292,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
return callback;
}
case CoordinatorQuery::Action::UNREGISTER_INSTANCE:
if (!FLAGS_raft_server_id) {
if (!FLAGS_coordinator_id) {
throw QueryRuntimeException("Only coordinator can register coordinator server!");
}
callback.fn = [handler = CoordQueryHandler{*coordinator_state},
@ -1276,7 +1307,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
return callback;
case CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN: {
if (!FLAGS_raft_server_id) {
if (!FLAGS_coordinator_id) {
throw QueryRuntimeException("Only coordinator can register coordinator server!");
}
// TODO: MemoryResource for EvaluationContext, it should probably be passed as
@ -1293,7 +1324,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param
return callback;
}
case CoordinatorQuery::Action::SHOW_INSTANCES: {
if (!FLAGS_raft_server_id) {
if (!FLAGS_coordinator_id) {
throw QueryRuntimeException("Only coordinator can run SHOW INSTANCES.");
}
@ -4266,6 +4297,28 @@ void Interpreter::RollbackTransaction() {
ResetInterpreter();
}
#ifdef MG_ENTERPRISE
auto Interpreter::Route(std::map<std::string, std::string> const &routing) -> RouteResult {
// TODO: (andi) Test
if (!FLAGS_coordinator_id) {
auto const &address = routing.find("address");
if (address == routing.end()) {
throw QueryException("Routing table must contain address field.");
}
auto result = RouteResult{};
if (interpreter_context_->repl_state->IsMain()) {
result.servers.emplace_back(std::vector<std::string>{address->second}, "WRITE");
} else {
result.servers.emplace_back(std::vector<std::string>{address->second}, "READ");
}
return result;
}
return RouteResult{.servers = interpreter_context_->coordinator_state_->GetRoutingTable(routing)};
}
#endif
#if MG_ENTERPRISE
// Before Prepare or during Prepare, but single-threaded.
// TODO: Is there any cleanup?
@ -4383,7 +4436,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
}
#ifdef MG_ENTERPRISE
if (FLAGS_raft_server_id && !utils::Downcast<CoordinatorQuery>(parsed_query.query) &&
if (FLAGS_coordinator_id && !utils::Downcast<CoordinatorQuery>(parsed_query.query) &&
!utils::Downcast<SettingQuery>(parsed_query.query)) {
throw QueryRuntimeException("Coordinator can run only coordinator queries!");
}
@ -4514,7 +4567,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
throw QueryException("Write query forbidden on the replica!");
}
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port && !interpreter_context_->repl_state->IsMainWriteable()) {
if (FLAGS_management_port && !interpreter_context_->repl_state->IsMainWriteable()) {
query_execution = nullptr;
throw QueryException(
"Write query forbidden on the main! Coordinator needs to enable writing on main by sending RPC message.");

View File

@ -143,8 +143,8 @@ class CoordinatorQueryHandler {
};
/// @throw QueryRuntimeException if an error ocurred.
virtual void RegisterReplicationInstance(std::string_view coordinator_socket_address,
std::string_view replication_socket_address,
virtual void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server,
std::string_view replication_server,
std::chrono::seconds const &instance_health_check_frequency,
std::chrono::seconds const &instance_down_timeout,
std::chrono::seconds const &instance_get_uuid_frequency,
@ -160,7 +160,8 @@ class CoordinatorQueryHandler {
virtual std::vector<coordination::InstanceStatus> ShowInstances() const = 0;
/// @throw QueryRuntimeException if an error ocurred.
virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view coordinator_socket_address) -> void = 0;
virtual auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server,
std::string_view coordinator_server) -> void = 0;
};
#endif
@ -247,6 +248,14 @@ class Interpreter final {
std::optional<std::string> db;
};
#ifdef MG_ENTERPRISE
struct RouteResult {
int ttl{300};
std::string db{}; // Currently not used since we don't have any specific replication groups etc.
coordination::RoutingTable servers{};
};
#endif
std::shared_ptr<QueryUserOrRole> user_or_role_{};
bool in_explicit_transaction_{false};
CurrentDB current_db_;
@ -272,6 +281,10 @@ class Interpreter final {
const std::map<std::string, storage::PropertyValue> &params,
QueryExtras const &extras);
#ifdef MG_ENTERPRISE
auto Route(std::map<std::string, std::string> const &routing) -> RouteResult;
#endif
/**
* Execute the last prepared query and stream *all* of the results into the
* given stream.

View File

@ -56,7 +56,7 @@ ReplicationState::ReplicationState(std::optional<std::filesystem::path> durabili
}
auto replication_data = std::move(fetched_replication_data).GetValue();
#ifdef MG_ENTERPRISE
if (FLAGS_coordinator_server_port && std::holds_alternative<RoleReplicaData>(replication_data)) {
if (FLAGS_management_port && std::holds_alternative<RoleReplicaData>(replication_data)) {
spdlog::trace("Restarted replication uuid for replica");
std::get<RoleReplicaData>(replication_data).uuid_.reset();
}
@ -254,7 +254,8 @@ bool ReplicationState::SetReplicationRoleMain(const utils::UUID &main_uuid) {
return false;
}
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}, true, main_uuid};
// By default, writing on MAIN is disabled until cluster is in healthy state
replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}, /*is_writing enabled*/ false, main_uuid};
return true;
}

View File

@ -213,7 +213,7 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler {
// We force sync replicas in other situation
if (state == storage::replication::ReplicaState::DIVERGED_FROM_MAIN) {
#ifdef MG_ENTERPRISE
return FLAGS_coordinator_server_port != 0;
return FLAGS_management_port != 0;
#else
return false;
#endif

View File

@ -132,7 +132,7 @@ struct Config {
inline auto ReplicationStateRootPath(memgraph::storage::Config const &config) -> std::optional<std::filesystem::path> {
if (!config.durability.restore_replication_state_on_startup
#ifdef MG_ENTERPRISE
&& !FLAGS_coordinator_server_port
&& !FLAGS_management_port
#endif
) {
spdlog::warn(

View File

@ -42,7 +42,7 @@ DurableMetadata::DurableMetadata(const Config &config)
DurableMetadata::DurableMetadata(DurableMetadata &&other) noexcept
: durability_kvstore_(std::move(other.durability_kvstore_)), config_(std::move(other.config_)) {}
void DurableMetadata::SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) {
void DurableMetadata::UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) {
durability_kvstore_.Put(kLastTransactionStartTimeStamp, std::to_string(timestamp));
durability_kvstore_.Put(kVertexCountDescr, std::to_string(vertex_count));
durability_kvstore_.Put(kEdgeDountDescr, std::to_string(edge_count));

View File

@ -41,7 +41,7 @@ class DurableMetadata {
std::optional<std::vector<std::string>> LoadExistenceConstraintInfoIfExists() const;
std::optional<std::vector<std::string>> LoadUniqueConstraintInfoIfExists() const;
void SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count);
void UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count);
bool PersistLabelIndexCreation(LabelId label);

View File

@ -274,8 +274,8 @@ DiskStorage::DiskStorage(Config config)
}
DiskStorage::~DiskStorage() {
durable_metadata_.SaveBeforeClosingDB(timestamp_, vertex_count_.load(std::memory_order_acquire),
edge_count_.load(std::memory_order_acquire));
durable_metadata_.UpdateMetaData(timestamp_, vertex_count_.load(std::memory_order_acquire),
edge_count_.load(std::memory_order_acquire));
logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->vertex_chandle));
logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->edge_chandle));
logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->out_edges_chandle));
@ -1786,7 +1786,8 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
disk_storage->indices_.text_index_.Commit();
}
disk_storage->durable_metadata_.UpdateMetaData(disk_storage->timestamp_, disk_storage->vertex_count_,
disk_storage->edge_count_);
is_transaction_active_ = false;
return {};

View File

@ -301,6 +301,8 @@ class DiskStorage final : public Storage {
EdgeImportMode GetEdgeImportMode() const;
DurableMetadata *GetDurableMetadata() { return &durable_metadata_; }
private:
void LoadPersistingMetadataInfo();

View File

@ -92,7 +92,7 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce
client_name, client_name, client_name);
};
#ifdef MG_ENTERPRISE
if (!FLAGS_coordinator_server_port) {
if (!FLAGS_management_port) {
log_error();
return;
}

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -10,7 +10,6 @@
// licenses/APL.txt.
#include "utils/thread_pool.hpp"
namespace memgraph::utils {
ThreadPool::ThreadPool(const size_t pool_size) {

View File

@ -13,12 +13,13 @@ func handle_if_error(err error) {
}
func main() {
dbUri := "bolt://localhost:7687"
driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", ""))
if err != nil {
log.Fatal("An error occurred opening conn: %s", err)
}
defer driver.Close()
fmt.Println("Started running docs_quick_start.go test")
dbUri := "bolt://localhost:7687"
driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", ""))
if err != nil {
log.Fatal("An error occurred opening conn: %s", err)
}
defer driver.Close()
session := driver.NewSession(neo4j.SessionConfig{})
defer session.Close()
@ -33,7 +34,7 @@ func main() {
_,err = session.WriteTransaction(testAll)
handle_if_error(err)
fmt.Println("All ok!")
fmt.Println("doc_quick_start.go test finished successfully.")
}
func clearDatabase(tx neo4j.Transaction) (interface{}, error) {
@ -75,15 +76,14 @@ func testAll(tx neo4j.Transaction) (interface{}, error) {
handle_if_error(err)
age, err := neo4j.GetProperty[int64](node_value, "age")
handle_if_error(err)
if label != "Person" && name != "Alice" && age != 22 {
return nil, fmt.Errorf("Data doesn't match.")
}
fmt.Println("Label", label)
fmt.Println("name", name)
fmt.Println("age", age)
return result.Consume()
}

View File

@ -3,6 +3,6 @@ module bolt-test
go 1.18
require (
github.com/neo4j/neo4j-go-driver/v5 v5.13.0 // indirect
github.com/neo4j/neo4j-go-driver/v5 v5.18.0 // indirect
golang.org/dl v0.0.0-20230502172222-5216546bad51 // indirect
)

View File

@ -8,5 +8,7 @@ github.com/neo4j/neo4j-go-driver/v5 v5.9.0 h1:TYxT0RSiwnvVFia90V7TLnRXv8HkdQQ6rT
github.com/neo4j/neo4j-go-driver/v5 v5.9.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k=
github.com/neo4j/neo4j-go-driver/v5 v5.13.0 h1:NmyUxh4LYTdcJdI6EnazHyUKu1f0/BPiHCYUZUZIGQw=
github.com/neo4j/neo4j-go-driver/v5 v5.13.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k=
github.com/neo4j/neo4j-go-driver/v5 v5.18.0 h1:3dmYsCYt/Fc/bPeSyGRGGfn/T6h06/OmHm72OFQKa3c=
github.com/neo4j/neo4j-go-driver/v5 v5.18.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k=
golang.org/dl v0.0.0-20230502172222-5216546bad51 h1:Bmo/kmR2hzyhGt3jjtl1ghkCqa5LINbB9D3QTkiLJIY=
golang.org/dl v0.0.0-20230502172222-5216546bad51/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ=

View File

@ -0,0 +1,51 @@
package main
import (
"fmt"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
)
func read_messages(uri string) {
username := ""
password := ""
// Connect to Memgraph
driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, ""))
if err != nil {
panic(err)
}
defer driver.Close()
// Use AccessModeRead for read transactions
session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead})
defer session.Close()
greeting, err := session.ReadTransaction(func(transaction neo4j.Transaction) (interface{}, error) {
result, err := transaction.Run("MATCH (n:Greeting) RETURN n.message AS message LIMIT 1", nil)
if err != nil {
return nil, err
}
if result.Next() {
return result.Record().Values[0], nil
}
return nil, result.Err()
})
if err != nil {
panic(err)
}
fmt.Println(greeting)
}
// Test checks that you can use bolt+routing for connecting to main and coordinators for reading.
func main() {
fmt.Println("Started running read_route.go test")
read_messages("neo4j://localhost:7690") // coordinator_1
read_messages("neo4j://localhost:7691") // coordinator_2
read_messages("neo4j://localhost:7692") // coordinator_3
fmt.Println("Successfully finished running coordinator_route.go test")
}

View File

@ -18,4 +18,3 @@ done
go get github.com/neo4j/neo4j-go-driver/v5
go run docs_quick_start.go
# go run parallel_edge_import.go

View File

@ -0,0 +1,21 @@
#!/bin/bash -e
GO_VERSION="1.18.9"
GO_VERSION_DIR="/opt/go$GO_VERSION"
if [ -f "$GO_VERSION_DIR/go/bin/go" ]; then
export GOROOT="$GO_VERSION_DIR/go"
export GOPATH="$HOME/go$GO_VERSION"
export PATH="$GO_VERSION_DIR/go/bin:$PATH"
fi
# check if go is installed
for i in go; do
if ! which $i >/dev/null; then
echo "Please install $i!"
exit 1
fi
done
go get github.com/neo4j/neo4j-go-driver/v5
go run write_routing.go
go run read_routing.go

View File

@ -0,0 +1,51 @@
package main
import (
"fmt"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
)
func create_message(uri string) {
username := ""
password := ""
// Connect to Memgraph
driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, ""))
if err != nil {
panic(err)
}
defer driver.Close()
session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeWrite})
defer session.Close()
greeting, err := session.WriteTransaction(func(transaction neo4j.Transaction) (interface{}, error) {
result, err := transaction.Run("CREATE (n:Greeting) SET n.message = $message RETURN n.message", map[string]interface{}{
"message": "Hello, World!",
})
if err != nil {
return nil, err
}
if result.Next() {
return result.Record().Values[0], nil
}
return nil, result.Err()
})
if err != nil {
panic(err)
}
fmt.Println(greeting)
}
// Test checks that you can use bolt+routing for connecting to main and coordinators for writing.
func main() {
fmt.Println("Started running main_route.go test")
create_message("neo4j://localhost:7690") // coordinator_1
create_message("neo4j://localhost:7691") // coordinator_2
create_message("neo4j://localhost:7692") // coordinator_3
fmt.Println("Successfully finished running main_route.go test")
}

View File

@ -104,6 +104,45 @@
<goal>single</goal>
</goals>
</execution>
<execution>
<id>build-e</id>
<configuration>
<archive>
<manifest>
<mainClass>memgraph.WriteRouting</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
<finalName>WriteRouting</finalName>
</configuration>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
<execution>
<id>build-f</id>
<configuration>
<archive>
<manifest>
<mainClass>memgraph.ReadRouting</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
<finalName>ReadRouting</finalName>
</configuration>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>

View File

@ -36,4 +36,3 @@ mvn clean package
java -jar target/DocsHowToQuery.jar
java -jar target/MaxQueryLength.jar
java -jar target/Transactions.jar
# java -jar target/ParallelEdgeImport.jar

View File

@ -0,0 +1,37 @@
#!/bin/bash -e
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR"
if [ -d "/usr/lib/jvm/java-17-oracle" ]; then
export JAVA_HOME="/usr/lib/jvm/java-17-oracle"
fi
if [ -d "/usr/lib/jvm/java-17-openjdk-amd64" ]; then
export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64"
fi
if [ -d "/opt/apache-maven-3.9.3" ]; then
export M2_HOME="/opt/apache-maven-3.9.3"
fi
export PATH="$JAVA_HOME/bin:$M2_HOME/bin:$PATH"
for i in java mvn; do
if ! which $i >/dev/null; then
echo "Please install $i!"
exit 1
fi
done
JAVA_VER=$(java -version 2>&1 >/dev/null | grep 'version' | cut -d "\"" -f2 | cut -d "." -f1)
if [ $JAVA_VER -ne 17 ]
then
echo "neo4j-java-driver v5.8 requires Java 17. Please install it!"
exit 1
fi
# CentOS 7 doesn't have Java version that supports var keyword
source ../../../../environment/util.sh
mvn clean package
java -jar target/WriteRouting.jar
java -jar target/ReadRouting.jar

View File

@ -0,0 +1,35 @@
package memgraph;
import static org.neo4j.driver.Values.parameters;
import java.util.*;
import java.util.concurrent.TimeUnit;
import org.neo4j.driver.AuthTokens;
import org.neo4j.driver.Driver;
import org.neo4j.driver.GraphDatabase;
import org.neo4j.driver.Session;
import org.neo4j.driver.Transaction;
public class ReadRouting {
private Driver driver;
private void readMessage(String uri) {
driver = GraphDatabase.driver(uri, AuthTokens.basic("", ""));
try (Session session = driver.session()) {
String greeting = session.readTransaction(tx -> {
var result = tx.run("MATCH (n:Greeting) RETURN n.message AS message");
System.out.println("Read txn passed!");
return "OK";
});
}
}
public static void main(String... args) {
System.out.println("Started running ReadRoutingTest...");
ReadRouting greeter = new ReadRouting();
greeter.readMessage("neo4j://localhost:7690"); // coordinator_1
greeter.readMessage("neo4j://localhost:7691"); // coordinator_2
greeter.readMessage("neo4j://localhost:7692"); // coordinator_3
System.out.println("All good!");
}
}

View File

@ -0,0 +1,44 @@
package memgraph;
import static org.neo4j.driver.Values.parameters;
import java.util.*;
import java.util.concurrent.TimeUnit;
import org.neo4j.driver.AuthTokens;
import org.neo4j.driver.Config;
import org.neo4j.driver.Driver;
import org.neo4j.driver.GraphDatabase;
import org.neo4j.driver.Result;
import org.neo4j.driver.Session;
import org.neo4j.driver.Transaction;
import org.neo4j.driver.TransactionWork;
import org.neo4j.driver.exceptions.ClientException;
import org.neo4j.driver.exceptions.TransientException;
public class WriteRouting {
private Driver driver;
private void createMessage(String uri) {
driver = GraphDatabase.driver(uri, AuthTokens.basic("", ""));
try (Session session = driver.session()) {
String greeting = session.writeTransaction(tx -> {
var result = tx.run("CREATE (n:Greeting) SET n.message = $message RETURN n.message",
parameters("message", "Hello, World!"));
if (result.hasNext()) {
return result.single().get(0).asString();
}
throw new RuntimeException("No result found.");
});
System.out.println(greeting);
}
}
public static void main(String... args) {
System.out.println("Started running WriteRoutingTest...");
WriteRouting greeter = new WriteRouting();
greeter.createMessage("neo4j://localhost:7690"); // coordinator_1
greeter.createMessage("neo4j://localhost:7691"); // coordinator_2
greeter.createMessage("neo4j://localhost:7692"); // coordinator_3
System.out.println("All good!");
}
}

View File

@ -0,0 +1,59 @@
const neo4j = require('neo4j-driver');
function die() {
session.close();
driver.close();
process.exit(1);
}
function Neo4jService(uri) {
const driver = neo4j.driver(uri, neo4j.auth.basic("", ""));
async function readGreeting() {
const session = driver.session({ defaultAccessMode: neo4j.session.READ });
try {
const result = await session.readTransaction(tx =>
tx.run('MATCH (n:Greeting) RETURN n.message AS message')
);
console.log("Read txn finished");
} finally {
await session.close();
}
}
async function close() {
await driver.close();
}
return {
readGreeting,
close
};
}
async function readGreetingsFromUri(uri) {
const service = Neo4jService(uri);
await service.readGreeting();
await service.close();
}
async function main() {
console.log("Started reading route");
const uris = [
'neo4j://localhost:7690',
'neo4j://localhost:7691',
'neo4j://localhost:7692'
];
try {
for (const uri of uris) {
await readGreetingsFromUri(uri);
}
} catch (error) {
console.error('An error occurred:', error);
die();
}
console.log("Finished reading route");
}
main().catch(error => console.error(error));

View File

@ -15,4 +15,3 @@ fi
node docs_how_to_query.js
node max_query_length.js
# node parallel_edge_import.js

View File

@ -0,0 +1,17 @@
#!/bin/bash -e
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR"
if ! which node >/dev/null; then
echo "Please install nodejs!"
exit 1
fi
if [ ! -d node_modules ]; then
# Driver generated with: `npm install neo4j-driver`
npm install --no-package-lock --no-save neo4j-driver@5.8.0
fi
node write_routing.js
node read_routing.js

View File

@ -0,0 +1,59 @@
const neo4j = require('neo4j-driver');
function die() {
session.close();
driver.close();
process.exit(1);
}
function Neo4jService(uri) {
const driver = neo4j.driver(uri, neo4j.auth.basic("", ""));
async function createGreeting() {
const session = driver.session({ defaultAccessMode: neo4j.session.WRITE });
try {
const result = await session.writeTransaction(tx =>
tx.run('CREATE (n:Greeting {message: "Hello NodeJs"}) RETURN n.message AS message')
);
console.log("Write txn finished");
} finally {
await session.close();
}
}
async function close() {
await driver.close();
}
return {
createGreeting,
close
};
}
async function createGreetingsFromUri(uri) {
const service = Neo4jService(uri);
await service.createGreeting();
await service.close();
}
async function main() {
console.log("Started writing route");
const uris = [
'neo4j://localhost:7690',
'neo4j://localhost:7691',
'neo4j://localhost:7692'
];
try {
for (const uri of uris) {
await createGreetingsFromUri(uri);
}
} catch (error) {
console.error('An error occurred:', error);
die();
}
console.log("Finished writing route");
}
main().catch(error => console.error(error));

View File

@ -0,0 +1,41 @@
from neo4j import GraphDatabase
class Neo4jService:
def __init__(self, uri, user="", password=""):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def read_greeting(self):
with self.driver.session() as session:
session.execute_read(self._create_and_return_greeting)
print("Read txn passed!")
@staticmethod
def _create_and_return_greeting(tx):
tx.run("MATCH (n:Greeting) RETURN n.message AS message")
def read_greetings_from_uri(uri):
service = Neo4jService(uri)
service.read_greeting()
service.close()
def main():
print("Started reading route")
uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"]
try:
for uri in uris:
read_greetings_from_uri(uri)
except Exception as error:
print(f"An error occurred: {error}")
exit(-1)
print("Finished reading route")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,25 @@
#!/bin/bash -e
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR"
# system check
if ! which virtualenv >/dev/null; then
echo "Please install virtualenv!"
exit 1
fi
# setup virtual environment
if [ ! -d "ve3" ]; then
virtualenv -p python3 ve3 || exit 1
source ve3/bin/activate
python3 -m pip install neo4j==5.8.0 || exit 1
deactivate
fi
# activate virtualenv
source ve3/bin/activate
# execute test
python3 write_routing.py || exit 1
python3 read_routing.py || exit 1

View File

@ -0,0 +1,41 @@
from neo4j import GraphDatabase
class Neo4jService:
def __init__(self, uri, user="", password=""):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def create_greeting(self):
with self.driver.session() as session:
session.execute_write(self._create_and_return_greeting)
print("Write txn passed!")
@staticmethod
def _create_and_return_greeting(tx):
tx.run("CREATE (n:Greeting {message: 'Hello from Python'}) RETURN n.message AS message")
def create_greetings_from_uri(uri):
service = Neo4jService(uri)
service.create_greeting()
service.close()
def main():
print("Started writing route")
uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"]
try:
for uri in uris:
create_greetings_from_uri(uri)
except Exception as error:
print(f"An error occurred: {error}")
exit(-1)
print("Finished writing route")
if __name__ == "__main__":
main()

203
tests/drivers/run_cluster.sh Executable file
View File

@ -0,0 +1,203 @@
#!/bin/bash
pushd () { command pushd "$@" > /dev/null; }
popd () { command popd "$@" > /dev/null; }
function wait_for_server {
port=$1
while ! nc -z -w 1 127.0.0.1 $port; do
sleep 0.1
done
sleep 1
}
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$DIR"
# create a temporary directory.
tmpdir=/tmp/memgraph_drivers
if [ -d $tmpdir ]; then
rm -rf $tmpdir
fi
mkdir -p $tmpdir
# find memgraph binaries.
binary_dir="$DIR/../../build"
# Start instance_1
$binary_dir/memgraph \
--bolt-port=7687 \
--data-directory=$tmpdir/instance_1/ \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/instance1.log \
--also-log-to-stderr \
--management-port=10011 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_instance_1=$!
wait_for_server 7687
# Start instance_2
$binary_dir/memgraph \
--bolt-port=7688 \
--data-directory=$tmpdir/instance_2 \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/instance2.log \
--also-log-to-stderr \
--management-port=10012 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_instance_2=$!
wait_for_server 7688
# Start instance_3
$binary_dir/memgraph \
--bolt-port=7689 \
--data-directory=$tmpdir/instance_3 \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/instance3.log \
--also-log-to-stderr \
--management-port=10013 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_instance_3=$!
wait_for_server 7689
# Start coordinator_1
$binary_dir/memgraph \
--bolt-port=7690 \
--data-directory=$tmpdir/coordinator_1 \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/coordinator1.log \
--also-log-to-stderr \
--coordinator-id=1 \
--coordinator-port=10111 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_coordinator_1=$!
wait_for_server 7690
# Start coordinator_2
$binary_dir/memgraph \
--bolt-port=7691 \
--data-directory=$tmpdir/coordinator_2 \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/coordinator2.log \
--also-log-to-stderr \
--coordinator-id=2 \
--coordinator-port=10112 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_coordinator_2=$!
wait_for_server 7691
# Start coordinator_3
$binary_dir/memgraph \
--bolt-port=7692 \
--data-directory=$tmpdir/coordinator_3 \
--query-execution-timeout-sec=5 \
--bolt-session-inactivity-timeout=10 \
--bolt-server-name-for-init="Neo4j/1.1" \
--bolt-cert-file="" \
--log-file=$tmpdir/logs/coordinator3.log \
--also-log-to-stderr \
--coordinator-id=3 \
--coordinator-port=10113 \
--experimental-enabled=high-availability \
--log-level ERROR &
pid_coordinator_3=$!
wait_for_server 7692
sleep 5
echo 'ADD COORDINATOR 2 WITH CONFIG {"bolt_server": "127.0.0.1:7691", "coordinator_server": "127.0.0.1:10112"};' | $binary_dir/bin/mgconsole --port 7690
echo 'ADD COORDINATOR 3 WITH CONFIG {"bolt_server": "127.0.0.1:7692", "coordinator_server": "127.0.0.1:10113"};' | $binary_dir/bin/mgconsole --port 7690
echo 'REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "127.0.0.1:7687", "management_server": "127.0.0.1:10011", "replication_server": "127.0.0.1:10001"};' | $binary_dir/bin/mgconsole --port 7690
echo 'REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": "127.0.0.1:7688", "management_server": "127.0.0.1:10012", "replication_server": "127.0.0.1:10002"};' | $binary_dir/bin/mgconsole --port 7690
echo 'REGISTER INSTANCE instance_3 WITH CONFIG {"bolt_server": "127.0.0.1:7689", "management_server": "127.0.0.1:10013", "replication_server": "127.0.0.1:10003"};' | $binary_dir/bin/mgconsole --port 7690
echo 'SET INSTANCE instance_1 TO MAIN;' | $binary_dir/bin/mgconsole --port 7690
code_test=0
for lang in *; do
if [ ! -d $lang ]; then continue; fi
pushd $lang
echo "Running tests for language: $lang"
for version in *; do
if [ ! -d $version ]; then continue; fi
pushd $version
if [ -f "run_cluster_tests.sh" ]; then
echo "Running version: $version"
./run_cluster_tests.sh
code_test=$?
if [ $code_test -ne 0 ]; then
echo "FAILED: $lang-$version"
break
fi
fi
popd
done;
popd
done
# Function to stop a process by PID and check its exit code
stop_process() {
local pid=$1 # Capture the PID from the first argument
# Stop the process
kill $pid
wait $pid
local exit_code=$? # Capture the exit code
# Check the process's exit code
if [ $exit_code -ne 0 ]; then
echo "The process with PID $pid didn't terminate properly!"
exit $exit_code
else
echo "Process with PID $pid terminated successfully."
fi
}
echo "Stopping coordinator1"
stop_process $pid_coordinator_1
echo "Stopping coordinator2"
stop_process $pid_coordinator_2
echo "Stopping coordinator3"
stop_process $pid_coordinator_3
echo "Stopping instance1"
stop_process $pid_instance_1
echo "Stopping instance2"
stop_process $pid_instance_2
echo "Stopping instance3"
stop_process $pid_instance_3
# Check test exit code.
if [ $code_test -ne 0 ]; then
echo "One of the tests failed!"
exit $code_test
fi
# Temporary directory cleanup.
if [ -d $tmpdir ]; then
rm -rf $tmpdir
fi

View File

@ -59,9 +59,9 @@ startup_config_dict = {
"Time in seconds after which inactive Bolt sessions will be closed.",
),
"cartesian_product_enabled": ("true", "true", "Enable cartesian product expansion."),
"coordinator_server_port": ("0", "0", "Port on which coordinator servers will be started."),
"raft_server_port": ("0", "0", "Port on which raft servers will be started."),
"raft_server_id": ("0", "0", "Unique ID of the raft server."),
"management_port": ("0", "0", "Port on which coordinator servers will be started."),
"coordinator_port": ("0", "0", "Port on which raft servers will be started."),
"coordinator_id": ("0", "0", "Unique ID of the raft server."),
"instance_down_timeout_sec": ("5", "5", "Time duration after which an instance is considered down."),
"instance_health_check_frequency_sec": ("1", "1", "The time duration between two health checks/pings."),
"instance_get_uuid_frequency_sec": ("10", "10", "The time duration between two instance uuid checks."),

View File

@ -30,14 +30,3 @@ def safe_execute(function, *args):
function(*args)
except:
pass
# NOTE: Repeated execution because it can fail if Raft server is not up
def add_coordinator(cursor, query):
for _ in range(10):
try:
execute_and_fetch_all(cursor, query)
return True
except Exception:
pass
return False

View File

@ -16,7 +16,7 @@ import tempfile
import interactive_mg_runner
import pytest
from common import add_coordinator, connect, execute_and_fetch_all, safe_execute
from common import connect, execute_and_fetch_all, safe_execute
from mg_utils import mg_sleep_and_assert
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -50,7 +50,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -64,7 +64,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -77,8 +77,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"setup_queries": [],
@ -89,8 +89,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"setup_queries": [],
@ -101,8 +101,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
],
"log_file": "coordinator3.log",
"setup_queries": [],
@ -110,147 +110,150 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
}
def test_register_repl_instances_then_coordinators():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
)
execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)
def check_coordinator3():
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
expected_cluster_coord3 = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "replica"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3)
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
def check_coordinator1():
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "replica"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
def check_coordinator2():
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
def test_register_coordinator_then_repl_instances():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
)
execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
def check_coordinator3():
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
expected_cluster_coord3 = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "replica"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3)
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
def check_coordinator1():
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "replica"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
def check_coordinator2():
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# def test_register_repl_instances_then_coordinators():
# safe_execute(shutil.rmtree, TEMP_DIR)
# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
#
# coordinator3_cursor = connect(host="localhost", port=7692).cursor()
#
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
# )
# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
# )
#
# def check_coordinator3():
# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
#
# expected_cluster_coord3 = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "replica"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3)
#
# coordinator1_cursor = connect(host="localhost", port=7690).cursor()
#
# def check_coordinator1():
# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "replica"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
#
# coordinator2_cursor = connect(host="localhost", port=7691).cursor()
#
# def check_coordinator2():
# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
#
#
# def test_register_coordinator_then_repl_instances():
# safe_execute(shutil.rmtree, TEMP_DIR)
# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
#
# coordinator3_cursor = connect(host="localhost", port=7692).cursor()
#
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
# )
# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
#
# def check_coordinator3():
# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
#
# expected_cluster_coord3 = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "replica"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3)
#
# coordinator1_cursor = connect(host="localhost", port=7690).cursor()
#
# def check_coordinator1():
# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "replica"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
#
# coordinator2_cursor = connect(host="localhost", port=7691).cursor()
#
# def check_coordinator2():
# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
def test_coordinators_communication_with_restarts():
# 1 Start all instances
safe_execute(shutil.rmtree, TEMP_DIR)
# 1
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
assert add_coordinator(
execute_and_fetch_all(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
execute_and_fetch_all(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)
@ -310,284 +313,284 @@ def test_coordinators_communication_with_restarts():
# # TODO: (andi) Test when dealing with distributed coordinators that you can register on one coordinator and unregister from any other coordinator
@pytest.mark.parametrize(
"kill_instance",
[True, False],
)
def test_unregister_replicas(kill_instance):
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
)
execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
def check_coordinator1():
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
def check_coordinator2():
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
def check_coordinator3():
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
main_cursor = connect(host="localhost", port=7689).cursor()
def check_main():
return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS")))
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "replica"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "replica"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
expected_replicas = [
(
"instance_1",
"127.0.0.1:10001",
"sync",
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
),
(
"instance_2",
"127.0.0.1:10002",
"sync",
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
mg_sleep_and_assert(expected_replicas, check_main)
if kill_instance:
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1")
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
expected_replicas = [
(
"instance_2",
"127.0.0.1:10002",
"sync",
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
mg_sleep_and_assert(expected_replicas, check_main)
if kill_instance:
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2")
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_3", "", "", "unknown", "main"),
]
expected_replicas = []
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
mg_sleep_and_assert(expected_replicas, check_main)
def test_unregister_main():
safe_execute(shutil.rmtree, TEMP_DIR)
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
)
execute_and_fetch_all(
coordinator3_cursor,
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
)
execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
def check_coordinator1():
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
def check_coordinator2():
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
def check_coordinator3():
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "replica"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "replica"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
try:
execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3")
except Exception as e:
assert (
str(e)
== "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!"
)
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "main"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "down", "unknown"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"),
]
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3")
expected_cluster = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "main"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
]
expected_cluster_shared = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "replica"),
]
expected_replicas = [
(
"instance_2",
"127.0.0.1:10002",
"sync",
{"ts": 0, "behind": None, "status": "ready"},
{"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
),
]
main_cursor = connect(host="localhost", port=7687).cursor()
def check_main():
return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS")))
mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
mg_sleep_and_assert(expected_cluster, check_coordinator3)
mg_sleep_and_assert(expected_replicas, check_main)
# @pytest.mark.parametrize(
# "kill_instance",
# [True, False],
# )
# def test_unregister_replicas(kill_instance):
# safe_execute(shutil.rmtree, TEMP_DIR)
# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
#
# coordinator1_cursor = connect(host="localhost", port=7690).cursor()
# coordinator2_cursor = connect(host="localhost", port=7691).cursor()
# coordinator3_cursor = connect(host="localhost", port=7692).cursor()
#
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
# )
# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
#
# def check_coordinator1():
# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
#
# def check_coordinator2():
# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
#
# def check_coordinator3():
# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
#
# main_cursor = connect(host="localhost", port=7689).cursor()
#
# def check_main():
# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS")))
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "replica"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "replica"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# expected_replicas = [
# (
# "instance_1",
# "127.0.0.1:10001",
# "sync",
# {"ts": 0, "behind": None, "status": "ready"},
# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
# ),
# (
# "instance_2",
# "127.0.0.1:10002",
# "sync",
# {"ts": 0, "behind": None, "status": "ready"},
# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
# ),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
# mg_sleep_and_assert(expected_replicas, check_main)
#
# if kill_instance:
# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1")
# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1")
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# expected_replicas = [
# (
# "instance_2",
# "127.0.0.1:10002",
# "sync",
# {"ts": 0, "behind": None, "status": "ready"},
# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
# ),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
# mg_sleep_and_assert(expected_replicas, check_main)
#
# if kill_instance:
# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2")
# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2")
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_3", "", "", "unknown", "main"),
# ]
# expected_replicas = []
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
# mg_sleep_and_assert(expected_replicas, check_main)
#
#
# def test_unregister_main():
# safe_execute(shutil.rmtree, TEMP_DIR)
# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
#
# coordinator1_cursor = connect(host="localhost", port=7690).cursor()
# coordinator2_cursor = connect(host="localhost", port=7691).cursor()
# coordinator3_cursor = connect(host="localhost", port=7692).cursor()
#
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
# )
# execute_and_fetch_all(
# coordinator3_cursor,
# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
# )
# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
#
# def check_coordinator1():
# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
#
# def check_coordinator2():
# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
#
# def check_coordinator3():
# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "replica"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "up", "main"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "replica"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
#
# try:
# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3")
# except Exception as e:
# assert (
# str(e)
# == "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!"
# )
#
# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3")
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "main"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ("instance_3", "", "127.0.0.1:10013", "down", "unknown"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "main"),
# ("instance_2", "", "", "unknown", "replica"),
# ("instance_3", "", "", "unknown", "main"),
# ]
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
#
# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3")
#
# expected_cluster = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "127.0.0.1:10011", "up", "main"),
# ("instance_2", "", "127.0.0.1:10012", "up", "replica"),
# ]
#
# expected_cluster_shared = [
# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
# ("instance_1", "", "", "unknown", "main"),
# ("instance_2", "", "", "unknown", "replica"),
# ]
#
# expected_replicas = [
# (
# "instance_2",
# "127.0.0.1:10002",
# "sync",
# {"ts": 0, "behind": None, "status": "ready"},
# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}},
# ),
# ]
#
# main_cursor = connect(host="localhost", port=7687).cursor()
#
# def check_main():
# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS")))
#
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1)
# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2)
# mg_sleep_and_assert(expected_cluster, check_coordinator3)
# mg_sleep_and_assert(expected_replicas, check_main)
if __name__ == "__main__":

View File

@ -16,7 +16,7 @@ import tempfile
import interactive_mg_runner
import pytest
from common import add_coordinator, connect, execute_and_fetch_all, safe_execute
from common import connect, execute_and_fetch_all, safe_execute
from mg_utils import mg_sleep_and_assert
interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--also-log-to-stderr",
"--instance-health-check-frequency-sec",
@ -55,7 +55,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--also-log-to-stderr",
"--instance-health-check-frequency-sec",
@ -74,7 +74,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--also-log-to-stderr",
"--instance-health-check-frequency-sec",
@ -92,8 +92,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"setup_queries": [],
@ -104,8 +104,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"setup_queries": [],
@ -116,8 +116,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
"--also-log-to-stderr",
],
"log_file": "coordinator3.log",
@ -137,11 +137,11 @@ def test_writing_disabled_on_main_restart():
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
)
execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN")
assert add_coordinator(
execute_and_fetch_all(
coordinator3_cursor,
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
)
assert add_coordinator(
execute_and_fetch_all(
coordinator3_cursor,
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
)

View File

@ -13,6 +13,7 @@ import os
import shutil
import sys
import tempfile
import time
import interactive_mg_runner
import pytest
@ -40,7 +41,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -54,7 +55,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -68,7 +69,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -81,8 +82,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"setup_queries": [],
@ -93,8 +94,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"setup_queries": [],
@ -105,8 +106,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
],
"log_file": "coordinator3.log",
"setup_queries": [
@ -130,7 +131,7 @@ def get_instances_description_no_setup():
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -144,7 +145,7 @@ def get_instances_description_no_setup():
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -158,7 +159,7 @@ def get_instances_description_no_setup():
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -171,8 +172,8 @@ def get_instances_description_no_setup():
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"data_directory": f"{TEMP_DIR}/coordinator_1",
@ -184,8 +185,8 @@ def get_instances_description_no_setup():
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"data_directory": f"{TEMP_DIR}/coordinator_2",
@ -197,8 +198,8 @@ def get_instances_description_no_setup():
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
],
"log_file": "coordinator3.log",
"data_directory": f"{TEMP_DIR}/coordinator_3",
@ -261,7 +262,7 @@ def test_old_main_comes_back_on_new_leader_as_replica():
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"), # TODO: (andi) Will become unknown.
("instance_3", "", "", "unknown", "unknown"),
]
mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2])
mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2])
@ -456,7 +457,7 @@ def test_distributed_automatic_failover_with_leadership_change():
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "main"), # TODO: (andi) Will become unknown.
("instance_3", "", "", "unknown", "unknown"),
]
mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2])
mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2])
@ -640,7 +641,7 @@ def test_registering_4_coords():
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -654,7 +655,7 @@ def test_registering_4_coords():
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -668,7 +669,7 @@ def test_registering_4_coords():
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -681,8 +682,8 @@ def test_registering_4_coords():
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"setup_queries": [],
@ -693,8 +694,8 @@ def test_registering_4_coords():
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"setup_queries": [],
@ -705,8 +706,8 @@ def test_registering_4_coords():
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
],
"log_file": "coordinator3.log",
"setup_queries": [],
@ -717,8 +718,8 @@ def test_registering_4_coords():
"--bolt-port",
"7693",
"--log-level=TRACE",
"--raft-server-id=4",
"--raft-server-port=10114",
"--coordinator-id=4",
"--coordinator-port=10114",
],
"log_file": "coordinator4.log",
"setup_queries": [
@ -775,7 +776,7 @@ def test_registering_coord_log_store():
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -789,7 +790,7 @@ def test_registering_coord_log_store():
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -803,7 +804,7 @@ def test_registering_coord_log_store():
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -816,8 +817,8 @@ def test_registering_coord_log_store():
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator1.log",
"setup_queries": [],
@ -828,8 +829,8 @@ def test_registering_coord_log_store():
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=2",
"--raft-server-port=10112",
"--coordinator-id=2",
"--coordinator-port=10112",
],
"log_file": "coordinator2.log",
"setup_queries": [],
@ -840,8 +841,8 @@ def test_registering_coord_log_store():
"--bolt-port",
"7692",
"--log-level=TRACE",
"--raft-server-id=3",
"--raft-server-port=10113",
"--coordinator-id=3",
"--coordinator-port=10113",
],
"log_file": "coordinator3.log",
"setup_queries": [],
@ -852,8 +853,8 @@ def test_registering_coord_log_store():
"--bolt-port",
"7693",
"--log-level=TRACE",
"--raft-server-id=4",
"--raft-server-port=10114",
"--coordinator-id=4",
"--coordinator-port=10114",
],
"log_file": "coordinator4.log",
"setup_queries": [
@ -911,7 +912,7 @@ def test_registering_coord_log_store():
bolt_port = f"--bolt-port={bolt_port_id}"
manag_server_port = f"--coordinator-server-port={manag_port_id}"
manag_server_port = f"--management-port={manag_port_id}"
args_desc.append(bolt_port)
args_desc.append(manag_server_port)
@ -1092,8 +1093,8 @@ def test_multiple_failovers_in_row_no_leadership_change():
"",
"",
"unknown",
"main",
), # TODO(antoniofilipovic) change to unknown after PR with transitions
"unknown",
),
]
)
@ -1119,9 +1120,9 @@ def test_multiple_failovers_in_row_no_leadership_change():
follower_data.extend(coordinator_data)
follower_data.extend(
[
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown
("instance_3", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown
("instance_1", "", "", "unknown", "unknown"),
("instance_2", "", "", "unknown", "main"),
("instance_3", "", "", "unknown", "unknown"),
]
)
@ -1149,7 +1150,7 @@ def test_multiple_failovers_in_row_no_leadership_change():
follower_data.extend(coordinator_data)
follower_data.extend(
[
("instance_1", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown
("instance_1", "", "", "unknown", "unknown"),
("instance_2", "", "", "unknown", "main"),
("instance_3", "", "", "unknown", "replica"),
]
@ -1177,8 +1178,8 @@ def test_multiple_failovers_in_row_no_leadership_change():
follower_data.extend(coordinator_data)
follower_data.extend(
[
("instance_1", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown
("instance_2", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown
("instance_1", "", "", "unknown", "unknown"),
("instance_2", "", "", "unknown", "unknown"),
("instance_3", "", "", "unknown", "main"),
]
)
@ -1258,5 +1259,166 @@ def test_multiple_failovers_in_row_no_leadership_change():
mg_sleep_and_assert(1, get_vertex_count_func(connect(port=7688, host="localhost").cursor()))
def test_multiple_old_mains_single_failover():
# Goal of this test is to check when leadership changes
# and we have old MAIN down, that we don't start failover
# 1. Start all instances.
# 2. Kill the main instance
# 3. Do failover
# 4. Kill other main
# 5. Kill leader
# 6. Leave first main down, and start second main
# 7. Second main should write data to new instance all the time
# 1
safe_execute(shutil.rmtree, TEMP_DIR)
inner_instances_description = get_instances_description_no_setup()
interactive_mg_runner.start_all(inner_instances_description)
setup_queries = [
"ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}",
"ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}",
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",
"REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};",
"REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};",
"SET INSTANCE instance_3 TO MAIN",
]
coord_cursor_3 = connect(host="localhost", port=7692).cursor()
for query in setup_queries:
execute_and_fetch_all(coord_cursor_3, query)
def retrieve_data_show_repl_cluster():
return sorted(list(execute_and_fetch_all(coord_cursor_3, "SHOW INSTANCES;")))
coordinators = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
]
basic_instances = [
("instance_1", "", "127.0.0.1:10011", "up", "replica"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "up", "main"),
]
expected_data_on_coord = []
expected_data_on_coord.extend(coordinators)
expected_data_on_coord.extend(basic_instances)
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
# 2
interactive_mg_runner.kill(inner_instances_description, "instance_3")
# 3
basic_instances = [
("instance_1", "", "127.0.0.1:10011", "up", "main"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "down", "unknown"),
]
expected_data_on_coord = []
expected_data_on_coord.extend(coordinators)
expected_data_on_coord.extend(basic_instances)
mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster)
# 4
interactive_mg_runner.kill(inner_instances_description, "instance_1")
# 5
interactive_mg_runner.kill(inner_instances_description, "coordinator_3")
# 6
interactive_mg_runner.start(inner_instances_description, "instance_1")
# 7
coord_cursor_1 = connect(host="localhost", port=7690).cursor()
def show_instances_coord1():
return sorted(list(execute_and_fetch_all(coord_cursor_1, "SHOW INSTANCES;")))
coord_cursor_2 = connect(host="localhost", port=7691).cursor()
def show_instances_coord2():
return sorted(list(execute_and_fetch_all(coord_cursor_2, "SHOW INSTANCES;")))
leader_data = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "127.0.0.1:10011", "up", "main"),
("instance_2", "", "127.0.0.1:10012", "up", "replica"),
("instance_3", "", "127.0.0.1:10013", "down", "unknown"),
]
mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2])
follower_data = [
("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"),
("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"),
("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"),
("instance_1", "", "", "unknown", "main"),
("instance_2", "", "", "unknown", "replica"),
("instance_3", "", "", "unknown", "unknown"),
]
mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2])
mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2])
instance_1_cursor = connect(host="localhost", port=7687).cursor()
def show_replicas():
return sorted(list(execute_and_fetch_all(instance_1_cursor, "SHOW REPLICAS;")))
replicas = [
(
"instance_2",
"127.0.0.1:10002",
"sync",
{"behind": None, "status": "ready", "ts": 0},
{"memgraph": {"behind": 0, "status": "ready", "ts": 0}},
),
(
"instance_3",
"127.0.0.1:10003",
"sync",
{"behind": None, "status": "invalid", "ts": 0},
{"memgraph": {"behind": 0, "status": "invalid", "ts": 0}},
),
]
mg_sleep_and_assert_collection(replicas, show_replicas)
def get_vertex_count_func(cursor):
def get_vertex_count():
return execute_and_fetch_all(cursor, "MATCH (n) RETURN count(n)")[0][0]
return get_vertex_count
vertex_count = 0
instance_1_cursor = connect(port=7687, host="localhost").cursor()
instance_2_cursor = connect(port=7688, host="localhost").cursor()
mg_sleep_and_assert(vertex_count, get_vertex_count_func(instance_1_cursor))
mg_sleep_and_assert(vertex_count, get_vertex_count_func(instance_2_cursor))
time_slept = 0
failover_time = 5
while time_slept < failover_time:
with pytest.raises(Exception) as e:
execute_and_fetch_all(instance_1_cursor, "CREATE ();")
vertex_count += 1
assert vertex_count == execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0]
assert vertex_count == execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0]
time.sleep(0.1)
time_slept += 0.1
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -31,7 +31,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "main.log",

View File

@ -153,7 +153,7 @@ def test_not_replicate_old_main_register_new_cluster():
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
],
"log_file": "instance_1.log",
@ -167,7 +167,7 @@ def test_not_replicate_old_main_register_new_cluster():
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
],
"log_file": "instance_2.log",
@ -180,8 +180,8 @@ def test_not_replicate_old_main_register_new_cluster():
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [
@ -220,7 +220,7 @@ def test_not_replicate_old_main_register_new_cluster():
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
],
"log_file": "instance_3.log",
@ -233,8 +233,8 @@ def test_not_replicate_old_main_register_new_cluster():
"--bolt-port",
"7691",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10112",
"--coordinator-id=1",
"--coordinator-port=10112",
],
"log_file": "coordinator.log",
"setup_queries": [],

View File

@ -35,7 +35,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--replication-restore-state-on-startup=true",
"--storage-recover-on-startup=false",
@ -52,7 +52,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--replication-restore-state-on-startup=true",
"--storage-recover-on-startup=false",
@ -69,7 +69,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--replication-restore-state-on-startup=true",
"--storage-recover-on-startup=false",
@ -85,8 +85,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [
@ -126,7 +126,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--replication-restore-state-on-startup",
"true",
@ -144,7 +144,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--replication-restore-state-on-startup",
"true",
@ -162,7 +162,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--replication-restore-state-on-startup",
"true",
@ -180,8 +180,8 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [
@ -337,7 +337,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--replication-restore-state-on-startup",
"true",
@ -355,7 +355,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--replication-restore-state-on-startup",
"true",
@ -373,7 +373,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--replication-restore-state-on-startup",
"true",
@ -392,7 +392,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r
"7691",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10014",
"--replication-restore-state-on-startup",
"true",
@ -410,8 +410,8 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [
@ -624,7 +624,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--replication-restore-state-on-startup",
"true",
@ -642,7 +642,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--replication-restore-state-on-startup",
"true",
@ -660,7 +660,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--replication-restore-state-on-startup",
"true",
@ -679,7 +679,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco
"7691",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10014",
"--replication-restore-state-on-startup",
"true",
@ -697,8 +697,8 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [
@ -911,7 +911,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
"7688",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10011",
"--replication-restore-state-on-startup",
"true",
@ -929,7 +929,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
"7689",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10012",
"--replication-restore-state-on-startup",
"true",
@ -947,7 +947,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
"7687",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10013",
"--replication-restore-state-on-startup",
"true",
@ -966,7 +966,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
"7691",
"--log-level",
"TRACE",
"--coordinator-server-port",
"--management-port",
"10014",
"--replication-restore-state-on-startup",
"true",
@ -984,8 +984,8 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery):
"--bolt-port",
"7690",
"--log-level=TRACE",
"--raft-server-id=1",
"--raft-server-port=10111",
"--coordinator-id=1",
"--coordinator-port=10111",
],
"log_file": "coordinator.log",
"setup_queries": [

View File

@ -1,19 +1,19 @@
ha_cluster: &ha_cluster
cluster:
replica_1:
args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"]
args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--management-port=10011"]
log_file: "replication-e2e-replica1.log"
setup_queries: []
replica_2:
args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"]
args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--management-port=10012"]
log_file: "replication-e2e-replica2.log"
setup_queries: []
main:
args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"]
args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--management-port=10013"]
log_file: "replication-e2e-main.log"
setup_queries: []
coordinator:
args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--raft-server-id=1", "--raft-server-port=10111"]
args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--coordinator-id=1", "--coordinator-port=10111"]
log_file: "replication-e2e-coordinator.log"
setup_queries: [
"REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};",

View File

@ -37,10 +37,9 @@ auto ParseDatabaseEndpoints(const std::string &database_endpoints_str) {
const auto db_endpoints_strs = memgraph::utils::SplitView(database_endpoints_str, ",");
std::vector<memgraph::io::network::Endpoint> database_endpoints;
for (const auto &db_endpoint_str : db_endpoints_strs) {
const auto maybe_host_port = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687);
MG_ASSERT(maybe_host_port);
auto const [ip, port] = *maybe_host_port;
database_endpoints.emplace_back(std::string(ip), port);
auto maybe_endpoint = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687);
MG_ASSERT(maybe_endpoint);
database_endpoints.emplace_back(std::move(*maybe_endpoint));
}
return database_endpoints;
}

View File

@ -178,8 +178,16 @@ PROCESS_RESULTS() {
CLUSTER_UP() {
PRINT_CONTEXT
"$script_dir/jepsen/docker/bin/up" --daemon
sleep 10
local cnt=0
while [[ "$cnt" < 5 ]]; do
if ! "$script_dir/jepsen/docker/bin/up" --daemon; then
cnt=$((cnt + 1))
continue
else
sleep 10
break
fi
done
# Ensure all SSH connections between Jepsen containers work
for node in $(docker ps --filter name=jepsen* --filter status=running --format "{{.Names}}"); do
if [ "$node" == "jepsen-control" ]; then

View File

@ -632,10 +632,12 @@ def run_isolated_workload_without_authorization(vendor_runner, client, queries,
def setup_indices_and_import_dataset(client, vendor_runner, generated_queries, workload, storage_mode):
vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT)
if benchmark_context.vendor_name == "memgraph":
# Neo4j will get started just before import -> without this if statement it would try to start it twice
vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT)
log.info("Executing database index setup")
start_time = time.time()
import_results = None
if generated_queries:
client.execute(queries=workload.indexes_generator(), num_workers=1)
log.info("Finished setting up indexes.")

View File

@ -127,8 +127,6 @@ def run_full_benchmarks(
],
]
assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!"
if realistic:
# Configurations for full workload
for count, write, read, update, analytical in realistic:

View File

@ -0,0 +1,99 @@
#!/bin/bash
# Currently only pokec dataset is modified to be used with memgraph on-disk storage
pushd () { command pushd "$@" > /dev/null; }
popd () { command popd "$@" > /dev/null; }
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd "$SCRIPT_DIR"
# Help function
function show_help() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -n, --neo4j-path Path to Neo4j binary"
echo " -m, --memgraph-path Path to Memgraph binary"
echo " -w, --num-workers Number of workers for benchmark and import"
echo " -d, --dataset_size dataset_size (small, medium, large)"
echo " -h, --help Show this help message"
exit 0
}
# Default values
neo4j_path="/usr/share/neo4j/bin/neo4j"
memgraph_path="../../build/memgraph"
num_workers=12
dataset_size="small"
# Parse command line arguments
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-n|--neo4j-path)
neo4j_path="$2"
shift
shift
;;
-m|--memgraph-path)
memgraph_path="$2"
shift
shift
;;
-w|--num-workers)
num_workers="$2"
shift
shift
;;
-d|--dataset_size)
dataset_size="$2"
shift
shift
;;
-h|--help)
show_help
;;
*)
echo "Invalid option: $1"
show_help
;;
esac
done
if [ ! -d "pokec_${dataset_size}_results" ]; then
mkdir "pokec_${dataset_size}_results"
fi
# Run Python: Mgbench - Neo4j
echo "Running Python: Mgbench - Neo4j"
python3 benchmark.py vendor-native \
--vendor-binary "$neo4j_path" \
--vendor-name neo4j \
--num-workers-for-benchmark "$num_workers" \
--num-workers-for-import "$num_workers" \
--no-load-query-counts \
--export-results "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \
"pokec_disk/${dataset_size}/*/*" \
--vendor-specific "config=$neo4j_path/conf/neo4j.conf" \
--no-authorization
# Run Python: Mgbench - Memgraph - on-disk
echo "Running Python: Mgbench - Memgraph - on-disk"
python3 benchmark.py vendor-native \
--vendor-binary "$memgraph_path" \
--vendor-name memgraph \
--num-workers-for-benchmark "$num_workers" \
--num-workers-for-import "$num_workers" \
--no-load-query-counts \
--export-results-on-disk-txn "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \
--export-results "pokec_${dataset_size}_results/on_disk_export_${dataset_size}_pokec.json" \
"pokec_disk/${dataset_size}/*/*" \
--no-authorization \
--vendor-specific "data-directory=benchmark_datadir" "storage-mode=ON_DISK_TRANSACTIONAL"
echo "Comparing results"
python3 compare_results.py --compare \
"pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \
"pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \
--output \
"pokec_${dataset_size}_results/neo4j_vs_mg_ondisk_results.html" \
--different-vendors

View File

@ -634,7 +634,7 @@ class Neo4j(BaseRunner):
exit_proc = subprocess.run(args=[self._neo4j_binary, "stop"], capture_output=True, check=True)
return exit_proc.returncode, usage
else:
return 0
return 0, 0
def start_db_init(self, workload):
if self._performance_tracking:

View File

@ -160,12 +160,7 @@ class Workload(ABC):
raise ValueError("Vendor does not have INDEX for dataset!")
def _set_local_files(self) -> None:
if not self.disk_workload:
if self.LOCAL_FILE is not None:
self._local_file = self.LOCAL_FILE.get(self._variant, None)
else:
self._local_file = None
else:
if self.disk_workload and self._vendor != "neo4j":
if self.LOCAL_FILE_NODES is not None:
self._local_file_nodes = self.LOCAL_FILE_NODES.get(self._variant, None)
else:
@ -175,14 +170,14 @@ class Workload(ABC):
self._local_file_edges = self.LOCAL_FILE_EDGES.get(self._variant, None)
else:
self._local_file_edges = None
else:
if self.LOCAL_FILE is not None:
self._local_file = self.LOCAL_FILE.get(self._variant, None)
else:
self._local_file = None
def _set_url_files(self) -> None:
if not self.disk_workload:
if self.URL_FILE is not None:
self._url_file = self.URL_FILE.get(self._variant, None)
else:
self._url_file = None
else:
if self.disk_workload and self._vendor != "neo4j":
if self.URL_FILE_NODES is not None:
self._url_file_nodes = self.URL_FILE_NODES.get(self._variant, None)
else:
@ -191,6 +186,11 @@ class Workload(ABC):
self._url_file_edges = self.URL_FILE_EDGES.get(self._variant, None)
else:
self._url_file_edges = None
else:
if self.URL_FILE is not None:
self._url_file = self.URL_FILE.get(self._variant, None)
else:
self._url_file = None
def _set_local_index_file(self) -> None:
if self.LOCAL_INDEX_FILE is not None:
@ -205,10 +205,10 @@ class Workload(ABC):
self._url_index = None
def prepare(self, directory):
if not self.disk_workload:
self._prepare_dataset_for_in_memory_workload(directory)
else:
if self.disk_workload and self._vendor != "neo4j":
self._prepare_dataset_for_on_disk_workload(directory)
else:
self._prepare_dataset_for_in_memory_workload(directory)
if self._local_index is not None:
print("Using local index file:", self._local_index)

View File

@ -13,7 +13,8 @@ import random
from benchmark_context import BenchmarkContext
from workloads.base import Workload
from workloads.importers.disk_importer_pokec import ImporterPokec
from workloads.importers.disk_importer_pokec import DiskImporterPokec
from workloads.importers.importer_pokec import ImporterPokec
class Pokec(Workload):
@ -22,6 +23,12 @@ class Pokec(Workload):
DEFAULT_VARIANT = "small"
FILE = None
URL_FILE = {
"small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_small_import.cypher",
"medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_medium_import.cypher",
"large": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_large.setup.cypher.gz",
}
URL_FILE_NODES = {
"small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_small_import_nodes.cypher",
"medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_medium_import_nodes.cypher",
@ -42,7 +49,7 @@ class Pokec(Workload):
URL_INDEX_FILE = {
"memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/memgraph.cypher",
"neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/neo4j.cypher",
"neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/neo4j.cypher",
}
PROPERTIES_ON_EDGES = False
@ -51,15 +58,26 @@ class Pokec(Workload):
super().__init__(variant, benchmark_context=benchmark_context, disk_workload=True)
def custom_import(self) -> bool:
importer = ImporterPokec(
benchmark_context=self.benchmark_context,
dataset_name=self.NAME,
index_file=self._file_index,
dataset_nodes_file=self._node_file,
dataset_edges_file=self._edge_file,
variant=self._variant,
)
return importer.execute_import()
if self._vendor == "neo4j":
importer = ImporterPokec(
benchmark_context=self.benchmark_context,
dataset_name=self.NAME,
index_file=self._file_index,
dataset_file=self._file,
variant=self._variant,
)
return importer.execute_import()
else:
importer = DiskImporterPokec(
benchmark_context=self.benchmark_context,
dataset_name=self.NAME,
index_file=self._file_index,
dataset_nodes_file=self._node_file,
dataset_edges_file=self._edge_file,
variant=self._variant,
)
return importer.execute_import()
# Helpers used to generate the queries
def _get_random_vertex(self):
@ -214,12 +232,22 @@ class Pokec(Workload):
# OK
def benchmark__arango__allshortest_paths(self):
vertex_from, vertex_to = self._get_random_from_to()
return (
memgraph = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
"RETURN extract(n in nodes(p) | n.id) AS path",
{"from": vertex_from, "to": vertex_to},
)
neo4j = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p = allShortestPaths((n)-[*..2]->(m)) "
"RETURN [node in nodes(p) | node.id] AS path",
{"from": vertex_from, "to": vertex_to},
)
if self._vendor == "neo4j":
return neo4j
else:
return memgraph
# Our benchmark queries

View File

@ -17,7 +17,7 @@ from constants import *
from runners import BaseRunner
class ImporterPokec:
class DiskImporterPokec:
def __init__(
self,
benchmark_context: BenchmarkContext,

View File

@ -167,30 +167,62 @@ class Pokec(Workload):
def benchmark__arango__shortest_path(self):
vertex_from, vertex_to = self._get_random_from_to()
return (
memgraph = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=(n)-[*bfs..15]->(m) "
"RETURN extract(n in nodes(p) | n.id) AS path",
{"from": vertex_from, "to": vertex_to},
)
neo4j = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=shortestPath((n)-[*..15]->(m)) "
"RETURN [n in nodes(p) | n.id] AS path",
{"from": vertex_from, "to": vertex_to},
)
if self._vendor == "memgraph":
return memgraph
else:
return neo4j
def benchmark__arango__shortest_path_with_filter(self):
vertex_from, vertex_to = self._get_random_from_to()
return (
memgraph = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) "
"RETURN extract(n in nodes(p) | n.id) AS path",
{"from": vertex_from, "to": vertex_to},
)
neo4j = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=shortestPath((n)-[*..15]->(m)) "
"WHERE all(node in nodes(p) WHERE node.age >= 18) "
"RETURN [n in nodes(p) | n.id] AS path",
{"from": vertex_from, "to": vertex_to},
)
if self._vendor == "memgraph":
return memgraph
else:
return neo4j
def benchmark__arango__allshortest_paths(self):
vertex_from, vertex_to = self._get_random_from_to()
return (
memgraph = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
"RETURN extract(n in nodes(p) | n.id) AS path",
{"from": vertex_from, "to": vertex_to},
)
neo4j = (
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
"MATCH p = allShortestPaths((n)-[*..2]->(m)) "
"RETURN [node in nodes(p) | node.id] AS path",
{"from": vertex_from, "to": vertex_to},
)
if self._vendor == "memgraph":
return memgraph
else:
return neo4j
# Our benchmark queries

View File

@ -446,9 +446,16 @@ target_link_libraries(${test_prefix}raft_log_serialization gflags mg-coordinatio
target_include_directories(${test_prefix}raft_log_serialization PRIVATE ${CMAKE_SOURCE_DIR}/include)
endif()
# Test Raft log serialization
# Test CoordinatorClusterState
if(MG_ENTERPRISE)
add_unit_test(coordinator_cluster_state.cpp)
target_link_libraries(${test_prefix}coordinator_cluster_state gflags mg-coordination mg-repl_coord_glue)
target_include_directories(${test_prefix}coordinator_cluster_state PRIVATE ${CMAKE_SOURCE_DIR}/include)
endif()
# Test Raft log serialization
if(MG_ENTERPRISE)
add_unit_test(routing_table.cpp)
target_link_libraries(${test_prefix}routing_table gflags mg-coordination mg-repl_coord_glue)
target_include_directories(${test_prefix}routing_table PRIVATE ${CMAKE_SOURCE_DIR}/include)
endif()

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -114,6 +114,14 @@ class TestSession final : public Session<TestInputStream, TestOutputStream> {
bool Authenticate(const std::string & /*username*/, const std::string & /*password*/) override { return true; }
#ifdef MG_ENTERPRISE
auto Route(std::map<std::string, Value> const & /*routing*/,
std::vector<memgraph::communication::bolt::Value> const & /*bookmarks*/,
std::map<std::string, Value> const & /*extra*/) -> std::map<std::string, Value> override {
return {};
}
#endif
std::optional<std::string> GetServerNameForInit() override { return std::nullopt; }
void Configure(const std::map<std::string, memgraph::communication::bolt::Value> &) override {}
@ -1027,104 +1035,115 @@ TEST(BoltSession, Noop) {
}
}
TEST(BoltSession, Route) {
// Memgraph does not support route message, but it handles it
{
SCOPED_TRACE("v1");
INIT_VARS;
TEST(BoltSession, Route){{SCOPED_TRACE("v1");
INIT_VARS;
ExecuteHandshake(input_stream, session, output);
ExecuteInit(input_stream, session, output);
ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException);
EXPECT_EQ(session.state_, State::Close);
}
{
SCOPED_TRACE("v4");
INIT_VARS;
ExecuteHandshake(input_stream, session, output);
ExecuteInit(input_stream, session, output);
ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException);
EXPECT_EQ(session.state_, State::Close);
}
#ifdef MG_ENTERPRISE
{
SCOPED_TRACE("v4");
INIT_VARS;
ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp);
ExecuteInit(input_stream, session, output, true);
ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)));
static constexpr uint8_t expected_resp[] = {
0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/,
0x40,
0xb1 /*TinyStruct1*/,
0x7f /*Failure*/,
0xa2 /*TinyMap with 2 items*/,
0x84 /*TinyString with 4 chars*/,
'c',
'o',
'd',
'e',
0x82 /*TinyString with 2 chars*/,
'6',
'6',
0x87 /*TinyString with 7 chars*/,
'm',
'e',
's',
's',
'a',
'g',
'e',
0xd0 /*String*/,
0x2b /*With 43 chars*/,
'R',
'o',
'u',
't',
'e',
' ',
'm',
'e',
's',
's',
'a',
'g',
'e',
' ',
'i',
's',
' ',
'n',
'o',
't',
' ',
's',
'u',
'p',
'p',
'o',
'r',
't',
'e',
'd',
' ',
'i',
'n',
' ',
'M',
'e',
'm',
'g',
'r',
'a',
'p',
'h',
'!',
0x00 /*Terminating zeros*/,
0x00,
};
EXPECT_EQ(input_stream.size(), 0U);
CheckOutput(output, expected_resp, sizeof(expected_resp));
EXPECT_EQ(session.state_, State::Error);
ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp);
ExecuteInit(input_stream, session, output, true);
ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)));
SCOPED_TRACE("Try to reset connection after ROUTE failed");
ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req)));
EXPECT_EQ(input_stream.size(), 0U);
CheckOutput(output, success_resp, sizeof(success_resp));
EXPECT_EQ(session.state_, State::Idle);
}
EXPECT_EQ(session.state_, State::Idle);
CheckSuccessMessage(output);
}
#else
{
SCOPED_TRACE("v4");
INIT_VARS;
ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp);
ExecuteInit(input_stream, session, output, true);
ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)));
static constexpr uint8_t expected_resp[] = {
0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/,
0x40,
0xb1 /*TinyStruct1*/,
0x7f /*Failure*/,
0xa2 /*TinyMap with 2 items*/,
0x84 /*TinyString with 4 chars*/,
'c',
'o',
'd',
'e',
0x82 /*TinyString with 2 chars*/,
'6',
'6',
0x87 /*TinyString with 7 chars*/,
'm',
'e',
's',
's',
'a',
'g',
'e',
0xd0 /*String*/,
0x2b /*With 43 chars*/,
'R',
'o',
'u',
't',
'e',
' ',
'm',
'e',
's',
's',
'a',
'g',
'e',
' ',
'i',
's',
' ',
'n',
'o',
't',
' ',
's',
'u',
'p',
'p',
'o',
'r',
't',
'e',
'd',
' ',
'i',
'n',
' ',
'M',
'e',
'm',
'g',
'r',
'a',
'p',
'h',
'!',
0x00 /*Terminating zeros*/,
0x00,
};
EXPECT_EQ(input_stream.size(), 0U);
CheckOutput(output, expected_resp, sizeof(expected_resp));
EXPECT_EQ(session.state_, State::Error);
SCOPED_TRACE("Try to reset connection after ROUTE failed");
ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req)));
EXPECT_EQ(input_stream.size(), 0U);
CheckOutput(output, success_resp, sizeof(success_resp));
EXPECT_EQ(session.state_, State::Idle);
}
#endif
}
TEST(BoltSession, Rollback) {

View File

@ -179,3 +179,35 @@ TEST_F(ClearingOldDiskDataTest, TestNumOfEntriesWithEdgeValueUpdate) {
ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 5);
}
TEST_F(ClearingOldDiskDataTest, TestTimestampAfterCommit) {
auto *tx_db = disk_storage->GetRocksDBStorage()->db_;
ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 0);
auto acc1 = disk_storage->Access(ReplicationRole::MAIN);
auto vertex1 = acc1->CreateVertex();
auto label1 = acc1->NameToLabel("DiskLabel");
auto property1 = acc1->NameToProperty("DiskProperty");
ASSERT_TRUE(vertex1.AddLabel(label1).HasValue());
ASSERT_TRUE(vertex1.SetProperty(property1, memgraph::storage::PropertyValue(10)).HasValue());
ASSERT_FALSE(acc1->Commit().HasError());
ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 1);
auto saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists();
ASSERT_EQ(saved_timestamp.has_value(), true);
ASSERT_EQ(disk_storage->timestamp_, saved_timestamp);
auto acc2 = disk_storage->Access(ReplicationRole::MAIN);
auto vertex2 = acc2->CreateVertex();
auto label2 = acc2->NameToLabel("DiskLabel2");
auto property2 = acc2->NameToProperty("DiskProperty2");
ASSERT_TRUE(vertex2.AddLabel(label2).HasValue());
ASSERT_TRUE(vertex2.SetProperty(property2, memgraph::storage::PropertyValue(10)).HasValue());
ASSERT_FALSE(acc2->Commit().HasError());
ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 2);
saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists();
ASSERT_EQ(saved_timestamp.has_value(), true);
ASSERT_EQ(disk_storage->timestamp_, saved_timestamp);
}

View File

@ -10,6 +10,7 @@
// licenses/APL.txt.
#include "nuraft/coordinator_cluster_state.hpp"
#include "io/network/endpoint.hpp"
#include "nuraft/coordinator_state_machine.hpp"
#include "replication_coordination_glue/role.hpp"
@ -21,11 +22,12 @@
#include "libnuraft/nuraft.hxx"
using memgraph::coordination::CoordinatorClientConfig;
using memgraph::coordination::CoordinatorClusterState;
using memgraph::coordination::CoordinatorStateMachine;
using memgraph::coordination::InstanceState;
using memgraph::coordination::CoordinatorToReplicaConfig;
using memgraph::coordination::RaftLogAction;
using memgraph::coordination::ReplicationInstanceState;
using memgraph::io::network::Endpoint;
using memgraph::replication_coordination_glue::ReplicationMode;
using memgraph::replication_coordination_glue::ReplicationRole;
using nuraft::buffer;
@ -42,20 +44,22 @@ class CoordinatorClusterStateTest : public ::testing::Test {
"MG_tests_unit_coordinator_cluster_state"};
};
TEST_F(CoordinatorClusterStateTest, InstanceStateSerialization) {
InstanceState instance_state{
CoordinatorClientConfig{"instance3",
"127.0.0.1",
10112,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001},
.ssl = std::nullopt},
TEST_F(CoordinatorClusterStateTest, ReplicationInstanceStateSerialization) {
ReplicationInstanceState instance_state{
CoordinatorToReplicaConfig{.instance_name = "instance3",
.mgt_server = Endpoint{"127.0.0.1", 10112},
.bolt_server = Endpoint{"127.0.0.1", 7687},
.replication_client_info = {.instance_name = "instance_name",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10001}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt},
ReplicationRole::MAIN};
nlohmann::json j = instance_state;
InstanceState deserialized_instance_state = j.get<InstanceState>();
ReplicationInstanceState deserialized_instance_state = j.get<ReplicationInstanceState>();
EXPECT_EQ(instance_state.config, deserialized_instance_state.config);
EXPECT_EQ(instance_state.status, deserialized_instance_state.status);
@ -65,13 +69,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
auto coordinator_cluster_state = memgraph::coordination::CoordinatorClusterState{};
{
CoordinatorClientConfig config{"instance1",
"127.0.0.1",
10111,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance1",
.mgt_server = Endpoint{"127.0.0.1", 10111},
.bolt_server = Endpoint{"127.0.0.1", 7687},
.replication_client_info = {.instance_name = "instance1",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10001}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -80,13 +87,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.DoAction(payload, action);
}
{
CoordinatorClientConfig config{"instance2",
"127.0.0.1",
10112,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10002},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance2",
.mgt_server = Endpoint{"127.0.0.1", 10112},
.bolt_server = Endpoint{"127.0.0.1", 7688},
.replication_client_info = {.instance_name = "instance2",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10002}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -95,13 +105,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.DoAction(payload, action);
}
{
CoordinatorClientConfig config{"instance3",
"127.0.0.1",
10113,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10003},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance3",
.mgt_server = Endpoint{"127.0.0.1", 10113},
.bolt_server = Endpoint{"127.0.0.1", 7689},
.replication_client_info = {.instance_name = "instance3",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10003}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -110,13 +123,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.DoAction(payload, action);
}
{
CoordinatorClientConfig config{"instance4",
"127.0.0.1",
10114,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10004},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance4",
.mgt_server = Endpoint{"127.0.0.1", 10114},
.bolt_server = Endpoint{"127.0.0.1", 7690},
.replication_client_info = {.instance_name = "instance4",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10004}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -125,13 +141,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.DoAction(payload, action);
}
{
CoordinatorClientConfig config{"instance5",
"127.0.0.1",
10115,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10005},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance5",
.mgt_server = Endpoint{"127.0.0.1", 10115},
.bolt_server = Endpoint{"127.0.0.1", 7691},
.replication_client_info = {.instance_name = "instance5",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10005}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -140,13 +159,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.DoAction(payload, action);
}
{
CoordinatorClientConfig config{"instance6",
"127.0.0.1",
10116,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10006},
auto config =
CoordinatorToReplicaConfig{.instance_name = "instance6",
.mgt_server = Endpoint{"127.0.0.1", 10116},
.bolt_server = Endpoint{"127.0.0.1", 7692},
.replication_client_info = {.instance_name = "instance6",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10006}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
@ -159,5 +181,6 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) {
coordinator_cluster_state.Serialize(data);
auto deserialized_coordinator_cluster_state = CoordinatorClusterState::Deserialize(*data);
ASSERT_EQ(coordinator_cluster_state.GetInstances(), deserialized_coordinator_cluster_state.GetInstances());
ASSERT_EQ(coordinator_cluster_state.GetReplicationInstances(),
deserialized_coordinator_cluster_state.GetReplicationInstances());
}

View File

@ -9,7 +9,8 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "io/network/endpoint.hpp"
#include "nuraft/coordinator_state_machine.hpp"
#include "nuraft/raft_log_action.hpp"
#include "utils/file.hpp"
@ -19,10 +20,11 @@
#include <gtest/gtest.h>
#include "json/json.hpp"
using memgraph::coordination::CoordinatorClientConfig;
using memgraph::coordination::CoordinatorStateMachine;
using memgraph::coordination::CoordinatorToReplicaConfig;
using memgraph::coordination::RaftLogAction;
using memgraph::coordination::ReplClientInfo;
using memgraph::coordination::ReplicationClientInfo;
using memgraph::io::network::Endpoint;
using memgraph::replication_coordination_glue::ReplicationMode;
using memgraph::utils::UUID;
@ -36,26 +38,29 @@ class RaftLogSerialization : public ::testing::Test {
};
TEST_F(RaftLogSerialization, ReplClientInfo) {
ReplClientInfo info{"instance_name", ReplicationMode::SYNC, "127.0.0.1", 10111};
ReplicationClientInfo info{.instance_name = "instance_name",
.replication_mode = ReplicationMode::SYNC,
.replication_server = Endpoint{"127.0.0.1", 10111}};
nlohmann::json j = info;
ReplClientInfo info2 = j.get<memgraph::coordination::ReplClientInfo>();
ReplicationClientInfo info2 = j.get<memgraph::coordination::ReplicationClientInfo>();
ASSERT_EQ(info, info2);
}
TEST_F(RaftLogSerialization, CoordinatorClientConfig) {
CoordinatorClientConfig config{"instance3",
"127.0.0.1",
10112,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001},
.ssl = std::nullopt};
TEST_F(RaftLogSerialization, CoordinatorToReplicaConfig) {
CoordinatorToReplicaConfig config{.instance_name = "instance3",
.mgt_server = Endpoint{"127.0.0.1", 10112},
.replication_client_info = {.instance_name = "instance_name",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10001}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
nlohmann::json j = config;
CoordinatorClientConfig config2 = j.get<memgraph::coordination::CoordinatorClientConfig>();
CoordinatorToReplicaConfig config2 = j.get<memgraph::coordination::CoordinatorToReplicaConfig>();
ASSERT_EQ(config, config2);
}
@ -96,8 +101,8 @@ TEST_F(RaftLogSerialization, RaftLogActionDemote) {
ASSERT_EQ(action, action2);
}
TEST_F(RaftLogSerialization, RaftLogActionUpdateUUID) {
auto action = RaftLogAction::UPDATE_UUID;
TEST_F(RaftLogSerialization, RaftLogActionUpdateUUIDForInstance) {
auto action = RaftLogAction::UPDATE_UUID_FOR_INSTANCE;
nlohmann::json j = action;
RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>();
@ -106,19 +111,20 @@ TEST_F(RaftLogSerialization, RaftLogActionUpdateUUID) {
}
TEST_F(RaftLogSerialization, RegisterInstance) {
CoordinatorClientConfig config{"instance3",
"127.0.0.1",
10112,
std::chrono::seconds{1},
std::chrono::seconds{5},
std::chrono::seconds{10},
{"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001},
.ssl = std::nullopt};
CoordinatorToReplicaConfig config{.instance_name = "instance3",
.mgt_server = Endpoint{"127.0.0.1", 10112},
.replication_client_info = {.instance_name = "instance_name",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10001}},
.instance_health_check_frequency_sec = std::chrono::seconds{1},
.instance_down_timeout_sec = std::chrono::seconds{5},
.instance_get_uuid_frequency_sec = std::chrono::seconds{10},
.ssl = std::nullopt};
auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config);
auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer);
ASSERT_EQ(action, RaftLogAction::REGISTER_REPLICATION_INSTANCE);
ASSERT_EQ(config, std::get<CoordinatorClientConfig>(payload));
ASSERT_EQ(config, std::get<CoordinatorToReplicaConfig>(payload));
}
TEST_F(RaftLogSerialization, UnregisterInstance) {
@ -129,10 +135,14 @@ TEST_F(RaftLogSerialization, UnregisterInstance) {
}
TEST_F(RaftLogSerialization, SetInstanceAsMain) {
auto buffer = CoordinatorStateMachine::SerializeSetInstanceAsMain("instance3");
auto instance_uuid_update =
memgraph::coordination::InstanceUUIDUpdate{.instance_name = "instance3", .uuid = memgraph::utils::UUID{}};
auto buffer = CoordinatorStateMachine::SerializeSetInstanceAsMain(instance_uuid_update);
auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer);
ASSERT_EQ(action, RaftLogAction::SET_INSTANCE_AS_MAIN);
ASSERT_EQ("instance3", std::get<std::string>(payload));
ASSERT_EQ(instance_uuid_update.instance_name,
std::get<memgraph::coordination::InstanceUUIDUpdate>(payload).instance_name);
ASSERT_EQ(instance_uuid_update.uuid, std::get<memgraph::coordination::InstanceUUIDUpdate>(payload).uuid);
}
TEST_F(RaftLogSerialization, SetInstanceAsReplica) {
@ -142,10 +152,10 @@ TEST_F(RaftLogSerialization, SetInstanceAsReplica) {
ASSERT_EQ("instance3", std::get<std::string>(payload));
}
TEST_F(RaftLogSerialization, UpdateUUID) {
TEST_F(RaftLogSerialization, UpdateUUIDForNewMain) {
UUID uuid;
auto buffer = CoordinatorStateMachine::SerializeUpdateUUID(uuid);
auto buffer = CoordinatorStateMachine::SerializeUpdateUUIDForNewMain(uuid);
auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer);
ASSERT_EQ(action, RaftLogAction::UPDATE_UUID);
ASSERT_EQ(action, RaftLogAction::UPDATE_UUID_OF_NEW_MAIN);
ASSERT_EQ(uuid, std::get<UUID>(payload));
}

View File

@ -0,0 +1,176 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include "auth/auth.hpp"
#include "coordination/coordinator_instance.hpp"
#include "flags/run_time_configurable.hpp"
#include "interpreter_faker.hpp"
#include "io/network/endpoint.hpp"
#include "license/license.hpp"
#include "replication_handler/replication_handler.hpp"
#include "storage/v2/config.hpp"
#include "utils/file.hpp"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "json/json.hpp"
using memgraph::coordination::CoordinatorInstance;
using memgraph::coordination::CoordinatorToCoordinatorConfig;
using memgraph::coordination::CoordinatorToReplicaConfig;
using memgraph::coordination::RaftState;
using memgraph::coordination::ReplicationClientInfo;
using memgraph::io::network::Endpoint;
using memgraph::replication::ReplicationHandler;
using memgraph::replication_coordination_glue::ReplicationMode;
using memgraph::storage::Config;
// class MockCoordinatorInstance : CoordinatorInstance {
// auto AddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> void override {}
// };
class RoutingTableTest : public ::testing::Test {
protected:
std::filesystem::path main_data_directory{std::filesystem::temp_directory_path() /
"MG_tests_unit_coordinator_cluster_state"};
std::filesystem::path repl1_data_directory{std::filesystem::temp_directory_path() /
"MG_test_unit_storage_v2_replication_repl"};
std::filesystem::path repl2_data_directory{std::filesystem::temp_directory_path() /
"MG_test_unit_storage_v2_replication_repl2"};
void SetUp() override { Clear(); }
void TearDown() override { Clear(); }
Config main_conf = [&] {
Config config{
.durability =
{
.snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL,
},
.salient.items = {.properties_on_edges = true},
};
UpdatePaths(config, main_data_directory);
return config;
}();
Config repl1_conf = [&] {
Config config{
.durability =
{
.snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL,
},
.salient.items = {.properties_on_edges = true},
};
UpdatePaths(config, repl1_data_directory);
return config;
}();
Config repl2_conf = [&] {
Config config{
.durability =
{
.snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL,
},
.salient.items = {.properties_on_edges = true},
};
UpdatePaths(config, repl2_data_directory);
return config;
}();
const std::string local_host = ("127.0.0.1");
const std::array<uint16_t, 2> ports{10000, 20000};
const std::array<std::string, 2> replicas = {"REPLICA1", "REPLICA2"};
private:
void Clear() {
if (std::filesystem::exists(main_data_directory)) std::filesystem::remove_all(main_data_directory);
if (std::filesystem::exists(repl1_data_directory)) std::filesystem::remove_all(repl1_data_directory);
if (std::filesystem::exists(repl2_data_directory)) std::filesystem::remove_all(repl2_data_directory);
}
};
struct MinMemgraph {
MinMemgraph(const memgraph::storage::Config &conf)
: auth{conf.durability.storage_directory / "auth", memgraph::auth::Auth::Config{/* default */}},
repl_state{ReplicationStateRootPath(conf)},
dbms{conf, repl_state
#ifdef MG_ENTERPRISE
,
auth, true
#endif
},
db_acc{dbms.Get()},
db{*db_acc.get()},
repl_handler(repl_state, dbms
#ifdef MG_ENTERPRISE
,
system_, auth
#endif
) {
}
memgraph::auth::SynchedAuth auth;
memgraph::system::System system_;
memgraph::replication::ReplicationState repl_state;
memgraph::dbms::DbmsHandler dbms;
memgraph::dbms::DatabaseAccess db_acc;
memgraph::dbms::Database &db;
ReplicationHandler repl_handler;
};
;
TEST_F(RoutingTableTest, GetSingleRouterRoutingTable) {
CoordinatorInstance instance1;
auto routing = std::map<std::string, std::string>{{"address", "localhost:7688"}};
auto routing_table = instance1.GetRoutingTable(routing);
ASSERT_EQ(routing_table.size(), 1);
auto const routers = routing_table[0];
ASSERT_EQ(routers.first, std::vector<std::string>{"localhost:7688"});
ASSERT_EQ(routers.second, "ROUTE");
}
TEST_F(RoutingTableTest, GetMixedRoutingTable) {
auto instance1 = RaftState::MakeRaftState([]() {}, []() {});
auto routing = std::map<std::string, std::string>{{"address", "localhost:7690"}};
instance1.AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig{
.instance_name = "instance2",
.mgt_server = Endpoint{"127.0.0.1", 10011},
.bolt_server = Endpoint{"127.0.0.1", 7687},
.replication_client_info = ReplicationClientInfo{.instance_name = "instance2",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10001}}});
instance1.GetAllCoordinators();
// auto routing_table = instance1.GetRoutingTable(routing);
// ASSERT_EQ(routing_table.size(), 1);
// auto const routers = routing_table[0];
// ASSERT_EQ(routers.second, "ROUTE");
}
// TEST_F(RoutingTableTest, GetMultipleRoutersRoutingTable) {
//
// CoordinatorInstance instance1;
// instance1.AddCoordinatorInstance(CoordinatorToCoordinatorConfig{.coordinator_server_id = 1,
// .bolt_server = Endpoint{"127.0.0.1", 7689},
// .coordinator_server = Endpoint{"127.0.0.1",
// 10111}});
//
// auto routing = std::map<std::string, std::string>{{"address", "localhost:7688"}};
// auto routing_table = instance1.GetRoutingTable(routing);
//
// ASSERT_EQ(routing_table.size(), 1);
//
// auto const routers = routing_table[0];
// ASSERT_EQ(routers.second, "ROUTE");
// ASSERT_EQ(routers.first.size(), 2);
// auto const expected_routers = std::vector<std::string>{"localhost:7689", "localhost:7688"};
// ASSERT_EQ(routers.first, expected_routers);
// }

View File

@ -11,8 +11,9 @@
#include <gtest/gtest.h>
#include "coordination/coordinator_config.hpp"
#include "coordination/coordinator_communication_config.hpp"
#include "coordination/coordinator_slk.hpp"
#include "io/network/endpoint.hpp"
#include "replication/config.hpp"
#include "replication_coordination_glue/mode.hpp"
#include "slk_common.hpp"
@ -20,6 +21,8 @@
#include "storage/v2/replication/slk.hpp"
#include "storage/v2/temporal.hpp"
using memgraph::io::network::Endpoint;
TEST(SlkAdvanced, PropertyValueList) {
std::vector<memgraph::storage::PropertyValue> original{
memgraph::storage::PropertyValue("hello world!"),
@ -119,24 +122,19 @@ TEST(SlkAdvanced, PropertyValueComplex) {
}
TEST(SlkAdvanced, ReplicationClientConfigs) {
using ReplicationClientInfo = memgraph::coordination::CoordinatorClientConfig::ReplicationClientInfo;
using ReplicationClientInfo = memgraph::coordination::ReplicationClientInfo;
using ReplicationClientInfoVec = std::vector<ReplicationClientInfo>;
using ReplicationMode = memgraph::replication_coordination_glue::ReplicationMode;
ReplicationClientInfoVec original{ReplicationClientInfo{.instance_name = "replica1",
.replication_mode = ReplicationMode::SYNC,
.replication_ip_address = "127.0.0.1",
.replication_port = 10000},
.replication_server = Endpoint{"127.0.0.1", 10000}},
ReplicationClientInfo{.instance_name = "replica2",
.replication_mode = ReplicationMode::ASYNC,
.replication_ip_address = "127.0.1.1",
.replication_port = 10010},
ReplicationClientInfo{
.instance_name = "replica3",
.replication_mode = ReplicationMode::ASYNC,
.replication_ip_address = "127.1.1.1",
.replication_port = 1110,
}};
.replication_server = Endpoint{"127.0.0.1", 10010}},
ReplicationClientInfo{.instance_name = "replica3",
.replication_mode = ReplicationMode::ASYNC,
.replication_server = Endpoint{"127.0.0.1", 10011}}};
memgraph::slk::Loopback loopback;
auto builder = loopback.GetBuilder();