Add information on show replicas to express how up-to-date a replica is (#412)
* Add test * Add implementation and adapted test * Update workloads.yaml to have a timeout > 0 * Update tests (failing due to merging of "add replica state")
This commit is contained in:
parent
7a2bbd4bb3
commit
65a7ba01da
@ -231,6 +231,11 @@ class ReplQueryHandler final : public query::ReplicationQueryHandler {
|
|||||||
if (repl_info.timeout) {
|
if (repl_info.timeout) {
|
||||||
replica.timeout = *repl_info.timeout;
|
replica.timeout = *repl_info.timeout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
replica.current_timestamp_of_replica = repl_info.timestamp_info.current_timestamp_of_replica;
|
||||||
|
replica.current_number_of_timestamp_behind_master =
|
||||||
|
repl_info.timestamp_info.current_number_of_timestamp_behind_master;
|
||||||
|
|
||||||
switch (repl_info.state) {
|
switch (repl_info.state) {
|
||||||
case storage::replication::ReplicaState::READY:
|
case storage::replication::ReplicaState::READY:
|
||||||
replica.state = ReplicationQuery::ReplicaState::READY;
|
replica.state = ReplicationQuery::ReplicaState::READY;
|
||||||
@ -245,6 +250,7 @@ class ReplQueryHandler final : public query::ReplicationQueryHandler {
|
|||||||
replica.state = ReplicationQuery::ReplicaState::INVALID;
|
replica.state = ReplicationQuery::ReplicaState::INVALID;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return replica;
|
return replica;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -512,7 +518,13 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
|
|||||||
}
|
}
|
||||||
|
|
||||||
case ReplicationQuery::Action::SHOW_REPLICAS: {
|
case ReplicationQuery::Action::SHOW_REPLICAS: {
|
||||||
callback.header = {"name", "socket_address", "sync_mode", "timeout", "state"};
|
callback.header = {"name",
|
||||||
|
"socket_address",
|
||||||
|
"sync_mode",
|
||||||
|
"timeout",
|
||||||
|
"current_timestamp_of_replica",
|
||||||
|
"number_of_timestamp_behind_master",
|
||||||
|
"state"};
|
||||||
callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, replica_nfields = callback.header.size()] {
|
callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, replica_nfields = callback.header.size()] {
|
||||||
const auto &replicas = handler.ShowReplicas();
|
const auto &replicas = handler.ShowReplicas();
|
||||||
auto typed_replicas = std::vector<std::vector<TypedValue>>{};
|
auto typed_replicas = std::vector<std::vector<TypedValue>>{};
|
||||||
@ -539,6 +551,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters &
|
|||||||
typed_replica.emplace_back(TypedValue());
|
typed_replica.emplace_back(TypedValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typed_replica.emplace_back(TypedValue(static_cast<int64_t>(replica.current_timestamp_of_replica)));
|
||||||
|
typed_replica.emplace_back(
|
||||||
|
TypedValue(static_cast<int64_t>(replica.current_number_of_timestamp_behind_master)));
|
||||||
|
|
||||||
switch (replica.state) {
|
switch (replica.state) {
|
||||||
case ReplicationQuery::ReplicaState::READY:
|
case ReplicationQuery::ReplicaState::READY:
|
||||||
typed_replica.emplace_back(TypedValue("ready"));
|
typed_replica.emplace_back(TypedValue("ready"));
|
||||||
|
@ -127,6 +127,8 @@ class ReplicationQueryHandler {
|
|||||||
std::string socket_address;
|
std::string socket_address;
|
||||||
ReplicationQuery::SyncMode sync_mode;
|
ReplicationQuery::SyncMode sync_mode;
|
||||||
std::optional<double> timeout;
|
std::optional<double> timeout;
|
||||||
|
uint64_t current_timestamp_of_replica;
|
||||||
|
uint64_t current_number_of_timestamp_behind_master;
|
||||||
ReplicationQuery::ReplicaState state;
|
ReplicationQuery::ReplicaState state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -537,6 +537,33 @@ std::vector<Storage::ReplicationClient::RecoveryStep> Storage::ReplicationClient
|
|||||||
return recovery_steps;
|
return recovery_steps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Storage::TimestampInfo Storage::ReplicationClient::GetTimestampInfo() {
|
||||||
|
Storage::TimestampInfo info;
|
||||||
|
info.current_timestamp_of_replica = 0;
|
||||||
|
info.current_number_of_timestamp_behind_master = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
auto stream{rpc_client_->Stream<replication::TimestampRpc>()};
|
||||||
|
const auto response = stream.AwaitResponse();
|
||||||
|
const auto is_success = response.success;
|
||||||
|
if (!is_success) {
|
||||||
|
replica_state_.store(replication::ReplicaState::INVALID);
|
||||||
|
HandleRpcFailure();
|
||||||
|
}
|
||||||
|
auto main_time_stamp = storage_->last_commit_timestamp_.load();
|
||||||
|
info.current_timestamp_of_replica = response.current_commit_timestamp;
|
||||||
|
info.current_number_of_timestamp_behind_master = response.current_commit_timestamp - main_time_stamp;
|
||||||
|
} catch (const rpc::RpcFailedException &) {
|
||||||
|
{
|
||||||
|
std::unique_lock client_guard(client_lock_);
|
||||||
|
replica_state_.store(replication::ReplicaState::INVALID);
|
||||||
|
}
|
||||||
|
HandleRpcFailure(); // mutex already unlocked, if the new enqueued task dispatches immediately it probably won't block
|
||||||
|
}
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
////// TimeoutDispatcher //////
|
////// TimeoutDispatcher //////
|
||||||
void Storage::ReplicationClient::TimeoutDispatcher::WaitForTaskToFinish() {
|
void Storage::ReplicationClient::TimeoutDispatcher::WaitForTaskToFinish() {
|
||||||
// Wait for the previous timeout task to finish
|
// Wait for the previous timeout task to finish
|
||||||
|
@ -124,6 +124,8 @@ class Storage::ReplicationClient {
|
|||||||
|
|
||||||
const auto &Endpoint() const { return rpc_client_->Endpoint(); }
|
const auto &Endpoint() const { return rpc_client_->Endpoint(); }
|
||||||
|
|
||||||
|
Storage::TimestampInfo GetTimestampInfo();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void FinalizeTransactionReplicationInternal();
|
void FinalizeTransactionReplicationInternal();
|
||||||
|
|
||||||
|
@ -80,6 +80,10 @@ Storage::ReplicationServer::ReplicationServer(Storage *storage, io::network::End
|
|||||||
spdlog::debug("Received CurrentWalRpc");
|
spdlog::debug("Received CurrentWalRpc");
|
||||||
this->CurrentWalHandler(req_reader, res_builder);
|
this->CurrentWalHandler(req_reader, res_builder);
|
||||||
});
|
});
|
||||||
|
rpc_server_->Register<replication::TimestampRpc>([this](auto *req_reader, auto *res_builder) {
|
||||||
|
spdlog::debug("Received TimestampRpc");
|
||||||
|
this->TimestampHandler(req_reader, res_builder);
|
||||||
|
});
|
||||||
rpc_server_->Start();
|
rpc_server_->Start();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -284,6 +288,14 @@ void Storage::ReplicationServer::LoadWal(replication::Decoder *decoder) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Storage::ReplicationServer::TimestampHandler(slk::Reader *req_reader, slk::Builder *res_builder) {
|
||||||
|
replication::TimestampReq req;
|
||||||
|
slk::Load(&req, req_reader);
|
||||||
|
|
||||||
|
replication::TimestampRes res{true, storage_->last_commit_timestamp_.load()};
|
||||||
|
slk::Save(res, res_builder);
|
||||||
|
}
|
||||||
|
|
||||||
Storage::ReplicationServer::~ReplicationServer() {
|
Storage::ReplicationServer::~ReplicationServer() {
|
||||||
if (rpc_server_) {
|
if (rpc_server_) {
|
||||||
rpc_server_->Shutdown();
|
rpc_server_->Shutdown();
|
||||||
|
@ -34,6 +34,7 @@ class Storage::ReplicationServer {
|
|||||||
void SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
void SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
||||||
void WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
void WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
||||||
void CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
void CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
||||||
|
void TimestampHandler(slk::Reader *req_reader, slk::Builder *res_builder);
|
||||||
|
|
||||||
void LoadWal(replication::Decoder *decoder);
|
void LoadWal(replication::Decoder *decoder);
|
||||||
uint64_t ReadAndApplyDelta(durability::BaseDecoder *decoder);
|
uint64_t ReadAndApplyDelta(durability::BaseDecoder *decoder);
|
||||||
|
@ -67,6 +67,12 @@ cpp<#
|
|||||||
((success :bool)
|
((success :bool)
|
||||||
(current-commit-timestamp :uint64_t))))
|
(current-commit-timestamp :uint64_t))))
|
||||||
|
|
||||||
|
(lcp:define-rpc timestamp
|
||||||
|
(:request ())
|
||||||
|
(:response
|
||||||
|
((success :bool)
|
||||||
|
(current-commit-timestamp :uint64_t))))
|
||||||
|
|
||||||
(lcp:pop-namespace) ;; replication
|
(lcp:pop-namespace) ;; replication
|
||||||
(lcp:pop-namespace) ;; storage
|
(lcp:pop-namespace) ;; storage
|
||||||
(lcp:pop-namespace) ;; memgraph
|
(lcp:pop-namespace) ;; memgraph
|
||||||
|
@ -1954,7 +1954,8 @@ std::vector<Storage::ReplicaInfo> Storage::ReplicasInfo() {
|
|||||||
replica_info.reserve(clients.size());
|
replica_info.reserve(clients.size());
|
||||||
std::transform(clients.begin(), clients.end(), std::back_inserter(replica_info),
|
std::transform(clients.begin(), clients.end(), std::back_inserter(replica_info),
|
||||||
[](const auto &client) -> ReplicaInfo {
|
[](const auto &client) -> ReplicaInfo {
|
||||||
return {client->Name(), client->Mode(), client->Timeout(), client->Endpoint(), client->State()};
|
return {client->Name(), client->Mode(), client->Timeout(),
|
||||||
|
client->Endpoint(), client->State(), client->GetTimestampInfo()};
|
||||||
});
|
});
|
||||||
return replica_info;
|
return replica_info;
|
||||||
});
|
});
|
||||||
|
@ -425,12 +425,18 @@ class Storage final {
|
|||||||
|
|
||||||
ReplicationRole GetReplicationRole() const;
|
ReplicationRole GetReplicationRole() const;
|
||||||
|
|
||||||
|
struct TimestampInfo {
|
||||||
|
uint64_t current_timestamp_of_replica;
|
||||||
|
uint64_t current_number_of_timestamp_behind_master;
|
||||||
|
};
|
||||||
|
|
||||||
struct ReplicaInfo {
|
struct ReplicaInfo {
|
||||||
std::string name;
|
std::string name;
|
||||||
replication::ReplicationMode mode;
|
replication::ReplicationMode mode;
|
||||||
std::optional<double> timeout;
|
std::optional<double> timeout;
|
||||||
io::network::Endpoint endpoint;
|
io::network::Endpoint endpoint;
|
||||||
replication::ReplicaState state;
|
replication::ReplicaState state;
|
||||||
|
TimestampInfo timestamp_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<ReplicaInfo> ReplicasInfo();
|
std::vector<ReplicaInfo> ReplicasInfo();
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import time
|
||||||
|
|
||||||
from common import execute_and_fetch_all
|
from common import execute_and_fetch_all
|
||||||
|
|
||||||
@ -27,5 +28,84 @@ def test_show_replication_role(port, role, connection):
|
|||||||
assert data[0][0] == role
|
assert data[0][0] == role
|
||||||
|
|
||||||
|
|
||||||
|
def test_show_replicas(connection):
|
||||||
|
cursor = connection(7687, "main").cursor()
|
||||||
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
|
|
||||||
|
expected_column_names = {
|
||||||
|
"name",
|
||||||
|
"socket_address",
|
||||||
|
"sync_mode",
|
||||||
|
"timeout",
|
||||||
|
"current_timestamp_of_replica",
|
||||||
|
"number_of_timestamp_behind_master",
|
||||||
|
"state",
|
||||||
|
}
|
||||||
|
actual_column_names = {x.name for x in cursor.description}
|
||||||
|
assert expected_column_names == actual_column_names
|
||||||
|
|
||||||
|
expected_data = {
|
||||||
|
("replica_1", "127.0.0.1:10001", "sync", 2.0, 0, 0, "ready"),
|
||||||
|
("replica_2", "127.0.0.1:10002", "sync", 1.0, 0, 0, "ready"),
|
||||||
|
("replica_3", "127.0.0.1:10003", "async", None, 0, 0, "ready"),
|
||||||
|
}
|
||||||
|
assert expected_data == actual_data
|
||||||
|
|
||||||
|
|
||||||
|
def test_show_replicas_while_inserting_data(connection):
|
||||||
|
# Goal is to check the timestamp are correctly computed from the information we get from replicas.
|
||||||
|
# 0/ Check original state of replicas.
|
||||||
|
# 1/ Add some data on main.
|
||||||
|
# 2/ Check state of replicas.
|
||||||
|
# 3/ Execute a read only query.
|
||||||
|
# 4/ Check that the states have not changed.
|
||||||
|
|
||||||
|
# 0/
|
||||||
|
cursor = connection(7687, "main").cursor()
|
||||||
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
|
|
||||||
|
expected_column_names = {
|
||||||
|
"name",
|
||||||
|
"socket_address",
|
||||||
|
"sync_mode",
|
||||||
|
"timeout",
|
||||||
|
"current_timestamp_of_replica",
|
||||||
|
"number_of_timestamp_behind_master",
|
||||||
|
"state",
|
||||||
|
}
|
||||||
|
actual_column_names = {x.name for x in cursor.description}
|
||||||
|
assert expected_column_names == actual_column_names
|
||||||
|
|
||||||
|
expected_data = {
|
||||||
|
("replica_1", "127.0.0.1:10001", "sync", 2.0, 0, 0, "ready"),
|
||||||
|
("replica_2", "127.0.0.1:10002", "sync", 1.0, 0, 0, "ready"),
|
||||||
|
("replica_3", "127.0.0.1:10003", "async", None, 0, 0, "ready"),
|
||||||
|
}
|
||||||
|
assert expected_data == actual_data
|
||||||
|
|
||||||
|
# 1/
|
||||||
|
execute_and_fetch_all(cursor, "CREATE (n1:Number {name: 'forty_two', value:42});")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# 2/
|
||||||
|
expected_data = {
|
||||||
|
("replica_1", "127.0.0.1:10001", "sync", 2.0, 4, 0, "ready"),
|
||||||
|
("replica_2", "127.0.0.1:10002", "sync", 1.0, 4, 0, "ready"),
|
||||||
|
("replica_3", "127.0.0.1:10003", "async", None, 4, 0, "ready"),
|
||||||
|
}
|
||||||
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
|
print("actual_data=" + str(actual_data))
|
||||||
|
print("expected_data=" + str(expected_data))
|
||||||
|
assert expected_data == actual_data
|
||||||
|
|
||||||
|
# 3/
|
||||||
|
res = execute_and_fetch_all(cursor, "MATCH (node) return node;")
|
||||||
|
assert 1 == len(res)
|
||||||
|
|
||||||
|
# 4/
|
||||||
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
|
assert expected_data == actual_data
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(pytest.main([__file__, "-rA"]))
|
sys.exit(pytest.main([__file__, "-rA"]))
|
||||||
|
@ -78,16 +78,24 @@ def test_show_replicas(connection):
|
|||||||
|
|
||||||
# 1/
|
# 1/
|
||||||
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
EXPECTED_COLUMN_NAMES = {"name", "socket_address", "sync_mode", "timeout", "state"}
|
EXPECTED_COLUMN_NAMES = {
|
||||||
|
"name",
|
||||||
|
"socket_address",
|
||||||
|
"sync_mode",
|
||||||
|
"timeout",
|
||||||
|
"current_timestamp_of_replica",
|
||||||
|
"number_of_timestamp_behind_master",
|
||||||
|
"state",
|
||||||
|
}
|
||||||
|
|
||||||
actual_column_names = {x.name for x in cursor.description}
|
actual_column_names = {x.name for x in cursor.description}
|
||||||
assert EXPECTED_COLUMN_NAMES == actual_column_names
|
assert EXPECTED_COLUMN_NAMES == actual_column_names
|
||||||
|
|
||||||
expected_data = {
|
expected_data = {
|
||||||
("replica_1", "127.0.0.1:10001", "sync", 0, "ready"),
|
("replica_1", "127.0.0.1:10001", "sync", 0, 0, 0, "ready"),
|
||||||
("replica_2", "127.0.0.1:10002", "sync", 1.0, "ready"),
|
("replica_2", "127.0.0.1:10002", "sync", 1.0, 0, 0, "ready"),
|
||||||
("replica_3", "127.0.0.1:10003", "async", None, "ready"),
|
("replica_3", "127.0.0.1:10003", "async", None, 0, 0, "ready"),
|
||||||
("replica_4", "127.0.0.1:10004", "async", None, "ready"),
|
("replica_4", "127.0.0.1:10004", "async", None, 0, 0, "ready"),
|
||||||
}
|
}
|
||||||
assert expected_data == actual_data
|
assert expected_data == actual_data
|
||||||
|
|
||||||
@ -95,9 +103,9 @@ def test_show_replicas(connection):
|
|||||||
execute_and_fetch_all(cursor, "DROP REPLICA replica_2")
|
execute_and_fetch_all(cursor, "DROP REPLICA replica_2")
|
||||||
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
expected_data = {
|
expected_data = {
|
||||||
("replica_1", "127.0.0.1:10001", "sync", 0, "ready"),
|
("replica_1", "127.0.0.1:10001", "sync", 0, 0, 0, "ready"),
|
||||||
("replica_3", "127.0.0.1:10003", "async", None, "ready"),
|
("replica_3", "127.0.0.1:10003", "async", None, 0, 0, "ready"),
|
||||||
("replica_4", "127.0.0.1:10004", "async", None, "ready"),
|
("replica_4", "127.0.0.1:10004", "async", None, 0, 0, "ready"),
|
||||||
}
|
}
|
||||||
assert expected_data == actual_data
|
assert expected_data == actual_data
|
||||||
|
|
||||||
@ -110,9 +118,9 @@ def test_show_replicas(connection):
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;"))
|
||||||
expected_data = {
|
expected_data = {
|
||||||
("replica_1", "127.0.0.1:10001", "sync", 0, "invalid"),
|
("replica_1", "127.0.0.1:10001", "sync", 0, 0, 0, "invalid"),
|
||||||
("replica_3", "127.0.0.1:10003", "async", None, "invalid"),
|
("replica_3", "127.0.0.1:10003", "async", None, 0, 0, "invalid"),
|
||||||
("replica_4", "127.0.0.1:10004", "async", None, "invalid"),
|
("replica_4", "127.0.0.1:10004", "async", None, 0, 0, "invalid"),
|
||||||
}
|
}
|
||||||
assert expected_data == actual_data
|
assert expected_data == actual_data
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ workloads:
|
|||||||
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
||||||
log_file: "replication-e2e-main.log"
|
log_file: "replication-e2e-main.log"
|
||||||
setup_queries: [
|
setup_queries: [
|
||||||
"REGISTER REPLICA replica_1 SYNC WITH TIMEOUT 0 TO '127.0.0.1:10001'",
|
"REGISTER REPLICA replica_1 SYNC WITH TIMEOUT 2 TO '127.0.0.1:10001'",
|
||||||
"REGISTER REPLICA replica_2 SYNC WITH TIMEOUT 1 TO '127.0.0.1:10002'",
|
"REGISTER REPLICA replica_2 SYNC WITH TIMEOUT 1 TO '127.0.0.1:10002'",
|
||||||
"REGISTER REPLICA replica_3 ASYNC TO '127.0.0.1:10003'"
|
"REGISTER REPLICA replica_3 ASYNC TO '127.0.0.1:10003'"
|
||||||
]
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user