Allow the RsmClient to store multiple in-flight requests. Update the ShardRequestManager to use the new request tokens and refactor some bug-prone aspects of it

This commit is contained in:
Tyler Neely 2022-11-15 17:52:38 +00:00
parent 5c0e41ed44
commit 631d18465b
2 changed files with 188 additions and 154 deletions

View File

@ -14,6 +14,7 @@
#include <iostream>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "io/address.hpp"
@ -36,6 +37,21 @@ using memgraph::io::rsm::WriteRequest;
using memgraph::io::rsm::WriteResponse;
using memgraph::utils::BasicResult;
class AsyncRequestToken {
size_t id_;
public:
AsyncRequestToken(size_t id) : id_(id) {}
size_t GetId() const { return id_; }
};
template <typename RequestT, typename ResponseT>
struct AsyncRequest {
Time start_time;
RequestT request;
ResponseFuture<ResponseT> future;
};
template <typename IoImpl, typename WriteRequestT, typename WriteResponseT, typename ReadRequestT,
typename ReadResponseT>
class RsmClient {
@ -47,13 +63,10 @@ class RsmClient {
/// State for single async read/write operations. In the future this could become a map
/// of async operations that can be accessed via an ID etc...
std::optional<Time> async_read_before_;
std::optional<ResponseFuture<ReadResponse<ReadResponseT>>> async_read_;
ReadRequestT current_read_request_;
std::unordered_map<size_t, AsyncRequest<ReadRequestT, ReadResponse<ReadResponseT>>> async_reads_;
std::unordered_map<size_t, AsyncRequest<WriteRequestT, WriteResponse<WriteResponseT>>> async_writes_;
std::optional<Time> async_write_before_;
std::optional<ResponseFuture<WriteResponse<WriteResponseT>>> async_write_;
WriteRequestT current_write_request_;
size_t async_token_generator_ = 0;
void SelectRandomLeader() {
std::uniform_int_distribution<size_t> addr_distrib(0, (server_addrs_.size() - 1));
@ -156,42 +169,56 @@ class RsmClient {
}
/// AsyncRead methods
void SendAsyncReadRequest(const ReadRequestT &req) {
MG_ASSERT(!async_read_);
AsyncRequestToken SendAsyncReadRequest(const ReadRequestT &req) {
size_t token = async_token_generator_++;
ReadRequest<ReadRequestT> read_req = {.operation = req};
if (!async_read_before_) {
async_read_before_ = io_.Now();
}
current_read_request_ = std::move(req);
async_read_ = io_.template Request<ReadRequest<ReadRequestT>, ReadResponse<ReadResponseT>>(leader_, read_req);
AsyncRequest<ReadRequestT, ReadResponse<ReadResponseT>> async_request{
.start_time = io_.Now(),
.request = std::move(req),
.future = io_.template Request<ReadRequest<ReadRequestT>, ReadResponse<ReadResponseT>>(leader_, read_req),
};
async_reads_.emplace(token, std::move(async_request));
return AsyncRequestToken(token);
}
std::optional<BasicResult<TimedOut, ReadResponseT>> PollAsyncReadRequest() {
MG_ASSERT(async_read_);
void ResendAsyncReadRequest(AsyncRequestToken &token) {
auto &async_request = async_reads_.at(token.GetId());
if (!async_read_->IsReady()) {
ReadRequest<ReadRequestT> read_req = {.operation = async_request.request};
async_request.future =
io_.template Request<ReadRequest<ReadRequestT>, ReadResponse<ReadResponseT>>(leader_, read_req);
}
std::optional<BasicResult<TimedOut, ReadResponseT>> PollAsyncReadRequest(AsyncRequestToken &token) {
auto &async_request = async_reads_.at(token.GetId());
if (!async_request.future.IsReady()) {
return std::nullopt;
}
return AwaitAsyncReadRequest();
}
std::optional<BasicResult<TimedOut, ReadResponseT>> AwaitAsyncReadRequest() {
ResponseResult<ReadResponse<ReadResponseT>> get_response_result = std::move(*async_read_).Wait();
async_read_.reset();
std::optional<BasicResult<TimedOut, ReadResponseT>> AwaitAsyncReadRequest(AsyncRequestToken &token) {
auto &async_request = async_reads_.at(token.GetId());
ResponseResult<ReadResponse<ReadResponseT>> get_response_result = std::move(async_request.future).Wait();
const Duration overall_timeout = io_.GetDefaultTimeout();
const bool past_time_out = io_.Now() < *async_read_before_ + overall_timeout;
const bool past_time_out = io_.Now() > async_request.start_time + overall_timeout;
const bool result_has_error = get_response_result.HasError();
if (result_has_error && past_time_out) {
// TODO static assert the exact type of error.
spdlog::debug("client timed out while trying to communicate with leader server {}", leader_.ToString());
async_read_before_ = std::nullopt;
async_reads_.erase(token.GetId());
return TimedOut{};
}
if (!result_has_error) {
ResponseEnvelope<ReadResponse<ReadResponseT>> &&get_response_envelope = std::move(get_response_result.GetValue());
ReadResponse<ReadResponseT> &&read_get_response = std::move(get_response_envelope.message);
@ -199,54 +226,69 @@ class RsmClient {
PossiblyRedirectLeader(read_get_response);
if (read_get_response.success) {
async_read_before_ = std::nullopt;
async_reads_.erase(token.GetId());
return std::move(read_get_response.read_return);
}
SendAsyncReadRequest(current_read_request_);
} else if (result_has_error) {
} else {
SelectRandomLeader();
SendAsyncReadRequest(current_read_request_);
}
ResendAsyncReadRequest(token);
return std::nullopt;
}
/// AsyncWrite methods
void SendAsyncWriteRequest(const WriteRequestT &req) {
MG_ASSERT(!async_write_);
AsyncRequestToken SendAsyncWriteRequest(const WriteRequestT &req) {
size_t token = async_token_generator_++;
WriteRequest<WriteRequestT> write_req = {.operation = req};
if (!async_write_before_) {
async_write_before_ = io_.Now();
}
current_write_request_ = std::move(req);
async_write_ = io_.template Request<WriteRequest<WriteRequestT>, WriteResponse<WriteResponseT>>(leader_, write_req);
AsyncRequest<WriteRequestT, WriteResponse<WriteResponseT>> async_request{
.start_time = io_.Now(),
.request = std::move(req),
.future = io_.template Request<WriteRequest<WriteRequestT>, WriteResponse<WriteResponseT>>(leader_, write_req),
};
async_writes_.emplace(token, std::move(async_request));
return AsyncRequestToken(token);
}
std::optional<BasicResult<TimedOut, WriteResponseT>> PollAsyncWriteRequest() {
MG_ASSERT(async_write_);
void ResendAsyncWriteRequest(AsyncRequestToken &token) {
auto &async_request = async_writes_.at(token.GetId());
if (!async_write_->IsReady()) {
WriteRequest<WriteRequestT> write_req = {.operation = async_request.request};
async_request.future =
io_.template Request<WriteRequest<WriteRequestT>, WriteResponse<WriteResponseT>>(leader_, write_req);
}
std::optional<BasicResult<TimedOut, WriteResponseT>> PollAsyncWriteRequest(AsyncRequestToken &token) {
auto &async_request = async_writes_.at(token.GetId());
if (!async_request.future.IsReady()) {
return std::nullopt;
}
return AwaitAsyncWriteRequest();
}
std::optional<BasicResult<TimedOut, WriteResponseT>> AwaitAsyncWriteRequest() {
ResponseResult<WriteResponse<WriteResponseT>> get_response_result = std::move(*async_write_).Wait();
async_write_.reset();
std::optional<BasicResult<TimedOut, WriteResponseT>> AwaitAsyncWriteRequest(AsyncRequestToken &token) {
auto &async_request = async_writes_.at(token.GetId());
ResponseResult<WriteResponse<WriteResponseT>> get_response_result = std::move(async_request.future).Wait();
const Duration overall_timeout = io_.GetDefaultTimeout();
const bool past_time_out = io_.Now() < *async_write_before_ + overall_timeout;
const bool past_time_out = io_.Now() > async_request.start_time + overall_timeout;
const bool result_has_error = get_response_result.HasError();
if (result_has_error && past_time_out) {
// TODO static assert the exact type of error.
spdlog::debug("client timed out while trying to communicate with leader server {}", leader_.ToString());
async_write_before_ = std::nullopt;
async_writes_.erase(token.GetId());
return TimedOut{};
}
if (!result_has_error) {
ResponseEnvelope<WriteResponse<WriteResponseT>> &&get_response_envelope =
std::move(get_response_result.GetValue());
@ -255,14 +297,15 @@ class RsmClient {
PossiblyRedirectLeader(write_get_response);
if (write_get_response.success) {
async_write_before_ = std::nullopt;
async_writes_.erase(token.GetId());
return std::move(write_get_response.write_return);
}
SendAsyncWriteRequest(current_write_request_);
} else if (result_has_error) {
} else {
SelectRandomLeader();
SendAsyncWriteRequest(current_write_request_);
}
ResendAsyncWriteRequest(token);
return std::nullopt;
}
};

View File

@ -72,6 +72,13 @@ class RsmStorageClientManager {
std::map<Shard, TStorageClient> cli_cache_;
};
template <typename TRequest>
struct ShardRequestState {
memgraph::coordinator::Shard shard;
TRequest request;
std::optional<io::rsm::AsyncRequestToken> async_request_token;
};
template <typename TRequest>
struct ExecutionState {
using CompoundKey = memgraph::io::rsm::ShardRsmKey;
@ -91,14 +98,13 @@ struct ExecutionState {
// it pulled all the requested data from the given Shard, it will be removed from the Vector. When the Vector becomes
// empty, it means that all of the requests have completed succefully.
// TODO(gvolfing)
// Maybe make this into a more complex object to be able to keep track of paginated resutls. E.g. instead of a vector
// Maybe make this into a more complex object to be able to keep track of paginated results. E.g. instead of a vector
// of Shards make it into a std::vector<std::pair<Shard, PaginatedResultType>> (probably a struct instead of a pair)
// where PaginatedResultType is an enum signaling the progress on the given request. This way we can easily check if
// a partial response on a shard(if there is one) is finished and we can send off the request for the next batch.
std::vector<Shard> shard_cache;
// 1-1 mapping with `shard_cache`.
// A vector that tracks request metadata for each shard (For example, next_id for a ScanAll on Shard A)
std::vector<TRequest> requests;
std::vector<ShardRequestState<TRequest>> requests;
State state = INITIALIZING;
};
@ -259,8 +265,8 @@ class ShardRequestManager : public ShardRequestManagerInterface {
};
std::map<Shard, PaginatedResponseState> paginated_response_tracker;
for (const auto &shard : state.shard_cache) {
paginated_response_tracker.insert(std::make_pair(shard, PaginatedResponseState::Pending));
for (const auto &request : state.requests) {
paginated_response_tracker.insert(std::make_pair(request.shard, PaginatedResponseState::Pending));
}
do {
@ -278,15 +284,14 @@ class ShardRequestManager : public ShardRequestManagerInterface {
MG_ASSERT(!new_vertices.empty());
MaybeInitializeExecutionState(state, new_vertices);
std::vector<CreateVerticesResponse> responses;
auto &shard_cache_ref = state.shard_cache;
// 1. Send the requests.
SendAllRequests(state, shard_cache_ref);
SendAllRequests(state);
// 2. Block untill all the futures are exhausted
do {
AwaitOnResponses(state, responses);
} while (!state.shard_cache.empty());
} while (!state.requests.empty());
MaybeCompleteState(state);
// TODO(kostasrim) Before returning start prefetching the batch (this shall be done once we get MgFuture as return
@ -299,11 +304,9 @@ class ShardRequestManager : public ShardRequestManagerInterface {
MG_ASSERT(!new_edges.empty());
MaybeInitializeExecutionState(state, new_edges);
std::vector<CreateExpandResponse> responses;
auto &shard_cache_ref = state.shard_cache;
size_t id{0};
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end(); ++id) {
auto &storage_client = GetStorageClientForShard(*shard_it);
WriteRequests req = state.requests[id];
for (auto &request : state.requests) {
auto &storage_client = GetStorageClientForShard(request.shard);
WriteRequests req = request.request;
auto write_response_result = storage_client.SendWriteRequest(std::move(req));
if (write_response_result.HasError()) {
throw std::runtime_error("CreateVertices request timedout");
@ -315,9 +318,9 @@ class ShardRequestManager : public ShardRequestManagerInterface {
throw std::runtime_error("CreateExpand request did not succeed");
}
responses.push_back(mapped_response);
shard_it = shard_cache_ref.erase(shard_it);
}
// We are done with this state
state.requests.clear();
MaybeCompleteState(state);
return responses;
}
@ -330,15 +333,14 @@ class ShardRequestManager : public ShardRequestManagerInterface {
// must be fetched again with an ExpandOne(Edges.dst)
MaybeInitializeExecutionState(state, std::move(request));
std::vector<ExpandOneResponse> responses;
auto &shard_cache_ref = state.shard_cache;
// 1. Send the requests.
SendAllRequests(state, shard_cache_ref);
SendAllRequests(state);
// 2. Block untill all the futures are exhausted
do {
AwaitOnResponses(state, responses);
} while (!state.shard_cache.empty());
} while (!state.requests.empty());
std::vector<ExpandOneResultRow> result_rows;
const auto total_row_count = std::accumulate(
responses.begin(), responses.end(), 0,
@ -402,13 +404,17 @@ class ShardRequestManager : public ShardRequestManagerInterface {
if (!per_shard_request_table.contains(shard)) {
CreateVerticesRequest create_v_rqst{.transaction_id = transaction_id_};
per_shard_request_table.insert(std::pair(shard, std::move(create_v_rqst)));
state.shard_cache.push_back(shard);
}
per_shard_request_table[shard].new_vertices.push_back(std::move(new_vertex));
}
for (auto &[shard, rqst] : per_shard_request_table) {
state.requests.push_back(std::move(rqst));
for (auto &[shard, request] : per_shard_request_table) {
ShardRequestState<CreateVerticesRequest> shard_request_state{
.shard = shard,
.request = request,
.async_request_token = std::nullopt,
};
state.requests.emplace_back(std::move(shard_request_state));
}
state.state = ExecutionState<CreateVerticesRequest>::EXECUTING;
}
@ -445,8 +451,12 @@ class ShardRequestManager : public ShardRequestManagerInterface {
}
for (auto &[shard, request] : per_shard_request_table) {
state.shard_cache.push_back(shard);
state.requests.push_back(std::move(request));
ShardRequestState<CreateExpandRequest> shard_request_state{
.shard = shard,
.request = request,
.async_request_token = std::nullopt,
};
state.requests.emplace_back(std::move(shard_request_state));
}
state.state = ExecutionState<CreateExpandRequest>::EXECUTING;
}
@ -470,11 +480,18 @@ class ShardRequestManager : public ShardRequestManagerInterface {
for (auto &shards : multi_shards) {
for (auto &[key, shard] : shards) {
MG_ASSERT(!shard.empty());
state.shard_cache.push_back(std::move(shard));
ScanVerticesRequest rqst;
rqst.transaction_id = transaction_id_;
rqst.start_id.second = storage::conversions::ConvertValueVector(key);
state.requests.push_back(std::move(rqst));
ScanVerticesRequest request;
request.transaction_id = transaction_id_;
request.start_id.second = storage::conversions::ConvertValueVector(key);
ShardRequestState<ScanVerticesRequest> shard_request_state{
.shard = shard,
.request = std::move(request),
.async_request_token = std::nullopt,
};
state.requests.emplace_back(std::move(shard_request_state));
}
}
state.state = ExecutionState<ScanVerticesRequest>::EXECUTING;
@ -497,13 +514,18 @@ class ShardRequestManager : public ShardRequestManagerInterface {
shards_map_.GetShardForKey(vertex.first.id, storage::conversions::ConvertPropertyVector(vertex.second));
if (!per_shard_request_table.contains(shard)) {
per_shard_request_table.insert(std::pair(shard, top_level_rqst_template));
state.shard_cache.push_back(shard);
}
per_shard_request_table[shard].src_vertices.push_back(vertex);
}
for (auto &[shard, rqst] : per_shard_request_table) {
state.requests.push_back(std::move(rqst));
for (auto &[shard, request] : per_shard_request_table) {
ShardRequestState<ExpandOneRequest> shard_request_state{
.shard = shard,
.request = request,
.async_request_token = std::nullopt,
};
state.requests.emplace_back(std::move(shard_request_state));
}
state.state = ExecutionState<ExpandOneRequest>::EXECUTING;
}
@ -533,65 +555,46 @@ class ShardRequestManager : public ShardRequestManagerInterface {
}
void SendAllRequests(ExecutionState<ScanVerticesRequest> &state) {
int64_t shard_idx = 0;
for (const auto &request : state.requests) {
const auto &current_shard = state.shard_cache[shard_idx];
for (auto &request : state.requests) {
const auto &current_shard = request.shard;
auto &storage_client = GetStorageClientForShard(current_shard);
ReadRequests req = request;
storage_client.SendAsyncReadRequest(request);
ReadRequests req = request.request;
++shard_idx;
request.async_request_token = storage_client.SendAsyncReadRequest(request.request);
}
}
void SendAllRequests(ExecutionState<CreateVerticesRequest> &state,
std::vector<memgraph::coordinator::Shard> &shard_cache_ref) {
size_t id = 0;
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end(); ++shard_it) {
// This is fine because all new_vertices of each request end up on the same shard
const auto labels = state.requests[id].new_vertices[0].label_ids;
auto req_deep_copy = state.requests[id];
void SendAllRequests(ExecutionState<CreateVerticesRequest> &state) {
for (auto &request : state.requests) {
auto req_deep_copy = request.request;
for (auto &new_vertex : req_deep_copy.new_vertices) {
new_vertex.label_ids.erase(new_vertex.label_ids.begin());
}
auto &storage_client = GetStorageClientForShard(*shard_it);
auto &storage_client = GetStorageClientForShard(request.shard);
WriteRequests req = req_deep_copy;
storage_client.SendAsyncWriteRequest(req);
++id;
request.async_request_token = storage_client.SendAsyncWriteRequest(req);
}
}
void SendAllRequests(ExecutionState<ExpandOneRequest> &state,
std::vector<memgraph::coordinator::Shard> &shard_cache_ref) {
size_t id = 0;
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end(); ++shard_it) {
auto &storage_client = GetStorageClientForShard(*shard_it);
ReadRequests req = state.requests[id];
storage_client.SendAsyncReadRequest(req);
++id;
void SendAllRequests(ExecutionState<ExpandOneRequest> &state) {
for (auto &request : state.requests) {
auto &storage_client = GetStorageClientForShard(request.shard);
ReadRequests req = request.request;
request.async_request_token = storage_client.SendAsyncReadRequest(req);
}
}
void AwaitOnResponses(ExecutionState<CreateVerticesRequest> &state, std::vector<CreateVerticesResponse> &responses) {
auto &shard_cache_ref = state.shard_cache;
int64_t request_idx = 0;
for (auto &request : state.requests) {
auto &storage_client = GetStorageClientForShard(request.shard);
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end();) {
// This is fine because all new_vertices of each request end up on the same shard
const auto labels = state.requests[request_idx].new_vertices[0].label_ids;
auto &storage_client = GetStorageClientForShard(*shard_it);
auto poll_result = storage_client.AwaitAsyncWriteRequest();
if (!poll_result) {
++shard_it;
++request_idx;
continue;
auto poll_result = storage_client.AwaitAsyncWriteRequest(request.async_request_token.value());
while (!poll_result) {
poll_result = storage_client.AwaitAsyncWriteRequest(request.async_request_token.value());
}
if (poll_result->HasError()) {
@ -605,26 +608,17 @@ class ShardRequestManager : public ShardRequestManagerInterface {
throw std::runtime_error("CreateVertices request did not succeed");
}
responses.push_back(response);
shard_it = shard_cache_ref.erase(shard_it);
// Needed to maintain the 1-1 mapping between the ShardCache and the requests.
auto it = state.requests.begin() + request_idx;
state.requests.erase(it);
}
state.requests.clear();
}
void AwaitOnResponses(ExecutionState<ExpandOneRequest> &state, std::vector<ExpandOneResponse> &responses) {
auto &shard_cache_ref = state.shard_cache;
int64_t request_idx = 0;
for (auto &request : state.requests) {
auto &storage_client = GetStorageClientForShard(request.shard);
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end();) {
auto &storage_client = GetStorageClientForShard(*shard_it);
auto poll_result = storage_client.PollAsyncReadRequest();
if (!poll_result) {
++shard_it;
++request_idx;
continue;
auto poll_result = storage_client.AwaitAsyncReadRequest(request.async_request_token.value());
while (!poll_result) {
poll_result = storage_client.AwaitAsyncReadRequest(request.async_request_token.value());
}
if (poll_result->HasError()) {
@ -642,36 +636,28 @@ class ShardRequestManager : public ShardRequestManagerInterface {
}
responses.push_back(std::move(response));
shard_it = shard_cache_ref.erase(shard_it);
// Needed to maintain the 1-1 mapping between the ShardCache and the requests.
auto it = state.requests.begin() + request_idx;
state.requests.erase(it);
}
state.requests.clear();
}
void AwaitOnPaginatedRequests(ExecutionState<ScanVerticesRequest> &state,
std::vector<ScanVerticesResponse> &responses,
std::map<Shard, PaginatedResponseState> &paginated_response_tracker) {
auto &shard_cache_ref = state.shard_cache;
std::vector<int> to_erase{};
// Find the first request that is not holding a paginated response.
int64_t request_idx = 0;
for (auto shard_it = shard_cache_ref.begin(); shard_it != shard_cache_ref.end();) {
if (paginated_response_tracker.at(*shard_it) != PaginatedResponseState::Pending) {
++shard_it;
++request_idx;
for (int i = 0; i < state.requests.size(); i++) {
auto &request = state.requests[i];
// only operate on paginated requests
if (paginated_response_tracker.at(request.shard) != PaginatedResponseState::Pending) {
continue;
}
auto &storage_client = GetStorageClientForShard(*shard_it);
auto &storage_client = GetStorageClientForShard(request.shard);
auto await_result = storage_client.AwaitAsyncReadRequest();
if (!await_result) {
// Redirection has occured.
++shard_it;
++request_idx;
continue;
// drive it to completion
auto await_result = storage_client.AwaitAsyncReadRequest(request.async_request_token.value());
while (!await_result) {
await_result = storage_client.AwaitAsyncReadRequest(request.async_request_token.value());
}
if (await_result->HasError()) {
@ -685,17 +671,22 @@ class ShardRequestManager : public ShardRequestManagerInterface {
}
if (!response.next_start_id) {
paginated_response_tracker.erase((*shard_it));
shard_cache_ref.erase(shard_it);
// Needed to maintain the 1-1 mapping between the ShardCache and the requests.
auto it = state.requests.begin() + request_idx;
state.requests.erase(it);
paginated_response_tracker.erase(request.shard);
to_erase.push_back(i);
} else {
state.requests[request_idx].start_id.second = response.next_start_id->second;
paginated_response_tracker[*shard_it] = PaginatedResponseState::PartiallyFinished;
request.request.start_id.second = response.next_start_id->second;
paginated_response_tracker[request.shard] = PaginatedResponseState::PartiallyFinished;
}
responses.push_back(std::move(response));
// reverse sort to_erase to remove requests in reverse order for correctness
std::sort(to_erase.begin(), to_erase.end(), std::greater<>());
auto requests_begin = state.requests.begin();
for (int i : to_erase) {
state.requests.erase(requests_begin + i);
}
}
}