Only leader performing callbacks
This commit is contained in:
parent
cf80687d1d
commit
6e758d3b5a
src/coordination
tests/e2e/high_availability_experimental
@ -252,20 +252,28 @@ auto CoordinatorData::SetInstanceToMain(std::string instance_name) -> SetInstanc
|
|||||||
|
|
||||||
auto CoordinatorData::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus {
|
auto CoordinatorData::RegisterInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus {
|
||||||
auto lock = std::lock_guard{coord_data_lock_};
|
auto lock = std::lock_guard{coord_data_lock_};
|
||||||
if (std::ranges::any_of(repl_instances_, [&config](ReplicationInstance const &instance) {
|
|
||||||
return instance.InstanceName() == config.instance_name;
|
auto const name_matches = [&config](ReplicationInstance const &instance) {
|
||||||
})) {
|
return instance.InstanceName() == config.instance_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, name_matches)) {
|
||||||
return RegisterInstanceCoordinatorStatus::NAME_EXISTS;
|
return RegisterInstanceCoordinatorStatus::NAME_EXISTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std::ranges::any_of(repl_instances_, [&config](ReplicationInstance const &instance) {
|
auto const socket_address_matches = [&config](ReplicationInstance const &instance) {
|
||||||
return instance.SocketAddress() == config.SocketAddress();
|
return instance.SocketAddress() == config.SocketAddress();
|
||||||
})) {
|
};
|
||||||
|
|
||||||
|
if (std::ranges::any_of(repl_instances_, socket_address_matches)) {
|
||||||
return RegisterInstanceCoordinatorStatus::ENDPOINT_EXISTS;
|
return RegisterInstanceCoordinatorStatus::ENDPOINT_EXISTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
repl_instances_.emplace_back(this, std::move(config), replica_succ_cb_, replica_fail_cb_);
|
auto *repl_instance = &repl_instances_.emplace_back(this, std::move(config), replica_succ_cb_, replica_fail_cb_);
|
||||||
|
if (self_.IsLeader()) {
|
||||||
|
repl_instance->StartFrequentCheck();
|
||||||
|
}
|
||||||
return RegisterInstanceCoordinatorStatus::SUCCESS;
|
return RegisterInstanceCoordinatorStatus::SUCCESS;
|
||||||
|
|
||||||
} catch (CoordinatorRegisterInstanceException const &) {
|
} catch (CoordinatorRegisterInstanceException const &) {
|
||||||
|
@ -35,6 +35,8 @@ CoordinatorInstance::CoordinatorInstance()
|
|||||||
state_machine_ = cs_new<CoordinatorStateMachine>();
|
state_machine_ = cs_new<CoordinatorStateMachine>();
|
||||||
logger_ = nullptr;
|
logger_ = nullptr;
|
||||||
|
|
||||||
|
// TODO: (andi) Maybe params file
|
||||||
|
|
||||||
// ASIO options
|
// ASIO options
|
||||||
asio_service::options asio_opts;
|
asio_service::options asio_opts;
|
||||||
asio_opts.thread_pool_size_ = 1; // TODO: (andi) Improve this
|
asio_opts.thread_pool_size_ = 1; // TODO: (andi) Improve this
|
||||||
@ -94,5 +96,7 @@ auto CoordinatorInstance::GetAllCoordinators() const -> std::vector<ptr<srv_conf
|
|||||||
return all_srv_configs;
|
return all_srv_configs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto CoordinatorInstance::IsLeader() const -> bool { return raft_server_->is_leader(); }
|
||||||
|
|
||||||
} // namespace memgraph::coordination
|
} // namespace memgraph::coordination
|
||||||
#endif
|
#endif
|
||||||
|
@ -38,9 +38,12 @@ class CoordinatorInstance {
|
|||||||
|
|
||||||
auto InstanceName() const -> std::string;
|
auto InstanceName() const -> std::string;
|
||||||
auto RaftSocketAddress() const -> std::string;
|
auto RaftSocketAddress() const -> std::string;
|
||||||
|
|
||||||
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void;
|
auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void;
|
||||||
auto GetAllCoordinators() const -> std::vector<ptr<srv_config>>;
|
auto GetAllCoordinators() const -> std::vector<ptr<srv_config>>;
|
||||||
|
|
||||||
|
auto IsLeader() const -> bool;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ptr<state_machine> state_machine_;
|
ptr<state_machine> state_machine_;
|
||||||
ptr<state_mgr> state_manager_;
|
ptr<state_mgr> state_manager_;
|
||||||
|
@ -51,6 +51,7 @@ class ReplicationInstance {
|
|||||||
HealthCheckCallback main_fail_cb) -> bool;
|
HealthCheckCallback main_fail_cb) -> bool;
|
||||||
auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool;
|
auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool;
|
||||||
|
|
||||||
|
auto StartFrequentCheck() -> void;
|
||||||
auto PauseFrequentCheck() -> void;
|
auto PauseFrequentCheck() -> void;
|
||||||
auto ResumeFrequentCheck() -> void;
|
auto ResumeFrequentCheck() -> void;
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ ReplicationInstance::ReplicationInstance(CoordinatorData *data, CoordinatorClien
|
|||||||
if (!client_.DemoteToReplica()) {
|
if (!client_.DemoteToReplica()) {
|
||||||
throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica", client_.InstanceName());
|
throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica", client_.InstanceName());
|
||||||
}
|
}
|
||||||
client_.StartFrequentCheck();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ReplicationInstance::OnSuccessPing() -> void {
|
auto ReplicationInstance::OnSuccessPing() -> void {
|
||||||
@ -75,6 +74,7 @@ auto ReplicationInstance::DemoteToReplica(HealthCheckCallback replica_succ_cb, H
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto ReplicationInstance::StartFrequentCheck() -> void { client_.StartFrequentCheck(); }
|
||||||
auto ReplicationInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); }
|
auto ReplicationInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); }
|
||||||
auto ReplicationInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); }
|
auto ReplicationInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); }
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
find_package(gflags REQUIRED)
|
find_package(gflags REQUIRED)
|
||||||
|
|
||||||
copy_e2e_python_files(ha_experimental coordinator.py)
|
copy_e2e_python_files(ha_experimental coordinator.py)
|
||||||
copy_e2e_python_files(ha_experimental automatic_failover.py)
|
copy_e2e_python_files(ha_experimental single_coordinator.py)
|
||||||
copy_e2e_python_files(ha_experimental distributed_coordinators.py)
|
copy_e2e_python_files(ha_experimental distributed_coordinators.py)
|
||||||
copy_e2e_python_files(ha_experimental manual_setting_replicas.py)
|
copy_e2e_python_files(ha_experimental manual_setting_replicas.py)
|
||||||
copy_e2e_python_files(ha_experimental not_replicate_from_old_main.py)
|
copy_e2e_python_files(ha_experimental not_replicate_from_old_main.py)
|
||||||
|
@ -29,10 +29,49 @@ interactive_mg_runner.MEMGRAPH_BINARY = os.path.normpath(os.path.join(interactiv
|
|||||||
TEMP_DIR = tempfile.TemporaryDirectory().name
|
TEMP_DIR = tempfile.TemporaryDirectory().name
|
||||||
|
|
||||||
MEMGRAPH_INSTANCES_DESCRIPTION = {
|
MEMGRAPH_INSTANCES_DESCRIPTION = {
|
||||||
"coordinator1": {
|
"instance_1": {
|
||||||
"args": [
|
"args": [
|
||||||
"--bolt-port",
|
"--bolt-port",
|
||||||
"7687",
|
"7687",
|
||||||
|
"--log-level",
|
||||||
|
"TRACE",
|
||||||
|
"--coordinator-server-port",
|
||||||
|
"10011",
|
||||||
|
],
|
||||||
|
"log_file": "instance_1.log",
|
||||||
|
"data_directory": f"{TEMP_DIR}/instance_1",
|
||||||
|
"setup_queries": [],
|
||||||
|
},
|
||||||
|
"instance_2": {
|
||||||
|
"args": [
|
||||||
|
"--bolt-port",
|
||||||
|
"7688",
|
||||||
|
"--log-level",
|
||||||
|
"TRACE",
|
||||||
|
"--coordinator-server-port",
|
||||||
|
"10012",
|
||||||
|
],
|
||||||
|
"log_file": "instance_2.log",
|
||||||
|
"data_directory": f"{TEMP_DIR}/instance_2",
|
||||||
|
"setup_queries": [],
|
||||||
|
},
|
||||||
|
"instance_3": {
|
||||||
|
"args": [
|
||||||
|
"--bolt-port",
|
||||||
|
"7689",
|
||||||
|
"--log-level",
|
||||||
|
"TRACE",
|
||||||
|
"--coordinator-server-port",
|
||||||
|
"10013",
|
||||||
|
],
|
||||||
|
"log_file": "instance_3.log",
|
||||||
|
"data_directory": f"{TEMP_DIR}/instance_3",
|
||||||
|
"setup_queries": [],
|
||||||
|
},
|
||||||
|
"coordinator_1": {
|
||||||
|
"args": [
|
||||||
|
"--bolt-port",
|
||||||
|
"7690",
|
||||||
"--log-level=TRACE",
|
"--log-level=TRACE",
|
||||||
"--raft-server-id=1",
|
"--raft-server-id=1",
|
||||||
"--raft-server-port=10111",
|
"--raft-server-port=10111",
|
||||||
@ -40,10 +79,10 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
|
|||||||
"log_file": "coordinator1.log",
|
"log_file": "coordinator1.log",
|
||||||
"setup_queries": [],
|
"setup_queries": [],
|
||||||
},
|
},
|
||||||
"coordinator2": {
|
"coordinator_2": {
|
||||||
"args": [
|
"args": [
|
||||||
"--bolt-port",
|
"--bolt-port",
|
||||||
"7688",
|
"7691",
|
||||||
"--log-level=TRACE",
|
"--log-level=TRACE",
|
||||||
"--raft-server-id=2",
|
"--raft-server-id=2",
|
||||||
"--raft-server-port=10112",
|
"--raft-server-port=10112",
|
||||||
@ -51,10 +90,10 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
|
|||||||
"log_file": "coordinator2.log",
|
"log_file": "coordinator2.log",
|
||||||
"setup_queries": [],
|
"setup_queries": [],
|
||||||
},
|
},
|
||||||
"coordinator3": {
|
"coordinator_3": {
|
||||||
"args": [
|
"args": [
|
||||||
"--bolt-port",
|
"--bolt-port",
|
||||||
"7689",
|
"7692",
|
||||||
"--log-level=TRACE",
|
"--log-level=TRACE",
|
||||||
"--raft-server-id=3",
|
"--raft-server-id=3",
|
||||||
"--raft-server-port=10113",
|
"--raft-server-port=10113",
|
||||||
@ -63,6 +102,10 @@ MEMGRAPH_INSTANCES_DESCRIPTION = {
|
|||||||
"setup_queries": [
|
"setup_queries": [
|
||||||
"ADD COORDINATOR 1 ON '127.0.0.1:10111'",
|
"ADD COORDINATOR 1 ON '127.0.0.1:10111'",
|
||||||
"ADD COORDINATOR 2 ON '127.0.0.1:10112'",
|
"ADD COORDINATOR 2 ON '127.0.0.1:10112'",
|
||||||
|
"REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';",
|
||||||
|
"REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';",
|
||||||
|
"REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';",
|
||||||
|
"SET INSTANCE instance_3 TO MAIN",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -72,73 +115,83 @@ def test_coordinators_communication():
|
|||||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||||
|
|
||||||
coordinator3_cursor = connect(host="localhost", port=7689).cursor()
|
coordinator3_cursor = connect(host="localhost", port=7692).cursor()
|
||||||
|
|
||||||
def check_coordinator3():
|
def check_coordinator3():
|
||||||
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
|
return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES")))
|
||||||
|
|
||||||
expected_cluster = [
|
expected_cluster_coord3 = [
|
||||||
("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"),
|
("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"),
|
||||||
("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"),
|
("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"),
|
||||||
("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"),
|
("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"),
|
||||||
|
("instance_1", "", "127.0.0.1:10011", True, "replica"),
|
||||||
|
("instance_2", "", "127.0.0.1:10012", True, "replica"),
|
||||||
|
("instance_3", "", "127.0.0.1:10013", True, "main"),
|
||||||
]
|
]
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator3)
|
mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3)
|
||||||
|
|
||||||
coordinator1_cursor = connect(host="localhost", port=7687).cursor()
|
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
|
||||||
|
|
||||||
def check_coordinator1():
|
def check_coordinator1():
|
||||||
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
|
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator1)
|
# TODO: (andi) This should be solved eventually
|
||||||
|
expected_cluster_not_shared = [
|
||||||
|
("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"),
|
||||||
|
("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"),
|
||||||
|
("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"),
|
||||||
|
]
|
||||||
|
|
||||||
coordinator2_cursor = connect(host="localhost", port=7688).cursor()
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1)
|
||||||
|
|
||||||
|
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
|
||||||
|
|
||||||
def check_coordinator2():
|
def check_coordinator2():
|
||||||
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
|
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator2)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2)
|
||||||
|
|
||||||
|
|
||||||
def test_coordinators_communication_with_restarts():
|
def test_coordinators_communication_with_restarts():
|
||||||
safe_execute(shutil.rmtree, TEMP_DIR)
|
safe_execute(shutil.rmtree, TEMP_DIR)
|
||||||
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION)
|
||||||
|
|
||||||
expected_cluster = [
|
expected_cluster_not_shared = [
|
||||||
("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"),
|
("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"),
|
||||||
("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"),
|
("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"),
|
||||||
("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"),
|
("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"),
|
||||||
]
|
]
|
||||||
|
|
||||||
coordinator1_cursor = connect(host="localhost", port=7687).cursor()
|
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
|
||||||
|
|
||||||
def check_coordinator1():
|
def check_coordinator1():
|
||||||
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
|
return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES")))
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator1)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1)
|
||||||
|
|
||||||
coordinator2_cursor = connect(host="localhost", port=7688).cursor()
|
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
|
||||||
|
|
||||||
def check_coordinator2():
|
def check_coordinator2():
|
||||||
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
|
return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES")))
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator2)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2)
|
||||||
|
|
||||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator1")
|
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1")
|
||||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator1")
|
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1")
|
||||||
coordinator1_cursor = connect(host="localhost", port=7687).cursor()
|
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator1)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1)
|
||||||
|
|
||||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator1")
|
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1")
|
||||||
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator2")
|
interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_2")
|
||||||
|
|
||||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator1")
|
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1")
|
||||||
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator2")
|
interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_2")
|
||||||
coordinator1_cursor = connect(host="localhost", port=7687).cursor()
|
coordinator1_cursor = connect(host="localhost", port=7690).cursor()
|
||||||
coordinator2_cursor = connect(host="localhost", port=7688).cursor()
|
coordinator2_cursor = connect(host="localhost", port=7691).cursor()
|
||||||
|
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator1)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1)
|
||||||
mg_sleep_and_assert(expected_cluster, check_coordinator2)
|
mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -28,9 +28,9 @@ workloads:
|
|||||||
args: ["high_availability_experimental/coordinator.py"]
|
args: ["high_availability_experimental/coordinator.py"]
|
||||||
<<: *ha_cluster
|
<<: *ha_cluster
|
||||||
|
|
||||||
- name: "Automatic failover"
|
- name: "Single coordinator"
|
||||||
binary: "tests/e2e/pytest_runner.sh"
|
binary: "tests/e2e/pytest_runner.sh"
|
||||||
args: ["high_availability_experimental/automatic_failover.py"]
|
args: ["high_availability_experimental/single_coordinator.py"]
|
||||||
|
|
||||||
- name: "Disabled manual setting of replication cluster"
|
- name: "Disabled manual setting of replication cluster"
|
||||||
binary: "tests/e2e/pytest_runner.sh"
|
binary: "tests/e2e/pytest_runner.sh"
|
||||||
|
Loading…
Reference in New Issue
Block a user