Compare commits

...

1 Commits

Author SHA1 Message Date
Matej Ferencevic
d4315b3242 Prepare release v0.11.0 2018-07-04 23:01:56 +02:00
215 changed files with 144 additions and 24135 deletions

View File

@ -137,9 +137,6 @@ if (USE_READLINE)
endif()
endif()
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost 1.62 REQUIRED COMPONENTS iostreams serialization)
# OpenSSL
find_package(OpenSSL REQUIRED)
@ -193,7 +190,6 @@ option(EXPERIMENTAL "Build experimental binaries" OFF)
option(CUSTOMERS "Build customer binaries" OFF)
option(TEST_COVERAGE "Generate coverage reports from running memgraph" OFF)
option(TOOLS "Build tools binaries" ON)
option(MG_COMMUNITY "Build Memgraph Community Edition" OFF)
option(ASAN "Build with Address Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(TSAN "Build with Thread Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(UBSAN "Build with Undefined Behaviour Sanitizer" OFF)
@ -208,10 +204,6 @@ if (TEST_COVERAGE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()
if (MG_COMMUNITY)
add_definitions(-DMG_COMMUNITY)
endif()
if (ASAN)
# Enable Addres sanitizer and get nicer stack traces in error messages.
# NOTE: AddressSanitizer uses llvm-symbolizer binary from the Clang

View File

@ -3,7 +3,6 @@
- build_debug/memgraph
- build_release/memgraph
- build_release/tools/src/mg_import_csv
- build_release/tools/src/mg_statsd
- config
filename: binaries.tar.gz

View File

@ -33,13 +33,8 @@
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1200 make -j$THREADS memgraph tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot
# Generate distributed card fraud dataset.
cd ../tests/distributed/card_fraud
./generate_dataset.sh
cd ../../..
# Checkout to parent commit and initialize.
cd ../parent
cd ../../parent
git checkout HEAD~1
TIMEOUT=1200 ./init
@ -88,7 +83,3 @@
cd ../../docs/user_technical
# TODO (mferencevic): uncomment this once couscous is replaced with pandoc
#./bundle_community
# Generate distributed card fraud dataset.
cd ../../tests/distributed/card_fraud
./generate_dataset.sh

View File

@ -13,36 +13,12 @@ set(memgraph_src_files
communication/helpers.cpp
communication/init.cpp
communication/bolt/v1/decoder/decoded_value.cpp
communication/rpc/client.cpp
communication/rpc/protocol.cpp
communication/rpc/server.cpp
data_structures/concurrent/skiplist_gc.cpp
database/config.cpp
database/counters.cpp
database/graph_db.cpp
database/graph_db_accessor.cpp
database/state_delta.cpp
distributed/bfs_rpc_clients.cpp
distributed/bfs_subcursor.cpp
distributed/cluster_discovery_master.cpp
distributed/cluster_discovery_worker.cpp
distributed/coordination.cpp
distributed/coordination_master.cpp
distributed/coordination_worker.cpp
distributed/durability_rpc_clients.cpp
distributed/durability_rpc_server.cpp
distributed/index_rpc_server.cpp
distributed/plan_consumer.cpp
distributed/plan_dispatcher.cpp
distributed/cache.cpp
distributed/data_manager.cpp
distributed/data_rpc_clients.cpp
distributed/data_rpc_server.cpp
distributed/produce_rpc_server.cpp
distributed/pull_rpc_clients.cpp
distributed/serialization.cpp
distributed/updates_rpc_clients.cpp
distributed/updates_rpc_server.cpp
durability/paths.cpp
durability/recovery.cpp
durability/snapshooter.cpp
@ -61,41 +37,16 @@ set(memgraph_src_files
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/typed_value.cpp
stats/metrics.cpp
stats/stats.cpp
storage/concurrent_id_mapper_master.cpp
storage/concurrent_id_mapper_worker.cpp
storage/dynamic_graph_partitioner/dgp.cpp
storage/dynamic_graph_partitioner/vertex_migrator.cpp
storage/edge_accessor.cpp
storage/locking/record_lock.cpp
storage/property_value.cpp
storage/property_value_store.cpp
storage/record_accessor.cpp
storage/vertex_accessor.cpp
transactions/engine_master.cpp
transactions/engine_single_node.cpp
transactions/engine_worker.cpp
transactions/snapshot.cpp
)
# -----------------------------------------------------------------------------
# Use this function to add each capnp file to generation. This way each file is
# standalone and we avoid recompiling everything.
# NOTE: memgraph_src_files and generated_capnp_files are globally updated.
function(add_capnp capnp_src_file)
set(cpp_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.c++)
set(h_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.h)
add_custom_command(OUTPUT ${cpp_file} ${h_file}
COMMAND ${CAPNP_EXE} compile -o${CAPNP_CXX_EXE} ${capnp_src_file} -I ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file} capnproto-proj
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Update *global* generated_capnp_files
set(generated_capnp_files ${generated_capnp_files} ${cpp_file} ${h_file} PARENT_SCOPE)
# Update *global* memgraph_src_files
set(memgraph_src_files ${memgraph_src_files} ${cpp_file} PARENT_SCOPE)
endfunction(add_capnp)
# Lisp C++ Preprocessing
set(lcp_exe ${CMAKE_SOURCE_DIR}/tools/lcp)
@ -135,67 +86,19 @@ function(add_lcp lcp_file)
set(generated_lcp_files ${generated_lcp_files} ${h_file} ${cpp_file} ${capnp_file} PARENT_SCOPE)
endfunction(add_lcp)
add_lcp(database/counters_rpc_messages.lcp CAPNP_SCHEMA @0x95a2c3ea3871e945)
add_capnp(database/counters_rpc_messages.capnp)
add_lcp(database/state_delta.lcp CAPNP_SCHEMA @0xdea01657b3563887)
add_capnp(database/state_delta.capnp)
add_lcp(distributed/bfs_rpc_messages.lcp CAPNP_SCHEMA @0x8e508640b09b6d2a)
add_capnp(distributed/bfs_rpc_messages.capnp)
add_lcp(distributed/coordination_rpc_messages.lcp CAPNP_SCHEMA @0x93df0c4703cf98fb)
add_capnp(distributed/coordination_rpc_messages.capnp)
add_lcp(distributed/data_rpc_messages.lcp CAPNP_SCHEMA @0xc1c8a341ba37aaf5)
add_capnp(distributed/data_rpc_messages.capnp)
add_lcp(distributed/durability_rpc_messages.lcp CAPNP_SCHEMA @0xf5e53bc271e2163d)
add_capnp(distributed/durability_rpc_messages.capnp)
add_lcp(distributed/index_rpc_messages.lcp CAPNP_SCHEMA @0xa8aab46862945bd6)
add_capnp(distributed/index_rpc_messages.capnp)
add_lcp(distributed/plan_rpc_messages.lcp CAPNP_SCHEMA @0xfcbc48dc9f106d28)
add_capnp(distributed/plan_rpc_messages.capnp)
add_lcp(distributed/pull_produce_rpc_messages.lcp CAPNP_SCHEMA @0xa78a9254a73685bd)
add_capnp(distributed/pull_produce_rpc_messages.capnp)
add_lcp(distributed/storage_gc_rpc_messages.lcp CAPNP_SCHEMA @0xd705663dfe36cf81)
add_capnp(distributed/storage_gc_rpc_messages.capnp)
add_lcp(distributed/token_sharing_rpc_messages.lcp CAPNP_SCHEMA @0x8f295db54ec4caec)
add_capnp(distributed/token_sharing_rpc_messages.capnp)
add_lcp(distributed/transactional_cache_cleaner_rpc_messages.lcp CAPNP_SCHEMA @0xe2be6183a1ff9e11)
add_capnp(distributed/transactional_cache_cleaner_rpc_messages.capnp)
add_lcp(distributed/updates_rpc_messages.lcp CAPNP_SCHEMA @0x82d5f38d73c7b53a)
add_capnp(distributed/updates_rpc_messages.capnp)
add_lcp(query/plan/operator.lcp CAPNP_SCHEMA @0xe5cae8d045d30c42)
add_capnp(query/plan/operator.capnp)
add_lcp(stats/stats_rpc_messages.lcp CAPNP_SCHEMA @0xc19a87c81b9b4512)
add_capnp(stats/stats_rpc_messages.capnp)
add_lcp(storage/concurrent_id_mapper_rpc_messages.lcp CAPNP_SCHEMA @0xa6068dae93d225dd)
add_capnp(storage/concurrent_id_mapper_rpc_messages.capnp)
add_lcp(transactions/engine_rpc_messages.lcp CAPNP_SCHEMA @0xde02b7c49180cad5)
add_capnp(transactions/engine_rpc_messages.capnp)
add_custom_target(generate_lcp DEPENDS ${generated_lcp_files})
# Registering capnp must come after registering lcp files.
add_capnp(communication/rpc/messages.capnp)
add_capnp(distributed/serialization.capnp)
add_capnp(durability/recovery.capnp)
add_capnp(query/common.capnp)
add_capnp(query/frontend/ast/ast.capnp)
add_capnp(query/frontend/semantic/symbol.capnp)
add_capnp(storage/serialization.capnp)
add_capnp(transactions/common.capnp)
add_capnp(utils/serialization.capnp)
add_custom_target(generate_capnp DEPENDS generate_lcp ${generated_capnp_files})
# -----------------------------------------------------------------------------
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
# memgraph_lib depend on these libraries
set(MEMGRAPH_ALL_LIBS stdc++fs Threads::Threads fmt cppitertools
antlr_opencypher_parser_lib dl glog gflags capnp kj
antlr_opencypher_parser_lib dl glog gflags
${OPENSSL_LIBRARIES}
${Boost_IOSTREAMS_LIBRARY_RELEASE}
${Boost_SERIALIZATION_LIBRARY_RELEASE}
mg-utils mg-io)
if (USE_LTALLOC)
@ -214,7 +117,6 @@ target_link_libraries(memgraph_lib ${MEMGRAPH_ALL_LIBS})
target_include_directories(memgraph_lib PRIVATE ${OPENSSL_INCLUDE_DIR})
add_dependencies(memgraph_lib generate_opencypher_parser)
add_dependencies(memgraph_lib generate_lcp)
add_dependencies(memgraph_lib generate_capnp)
# STATIC library used to store key-value pairs
add_library(kvstore_lib STATIC storage/kvstore.cpp)

View File

@ -7,6 +7,8 @@
#include "communication/bolt/v1/encoder/chunked_encoder_buffer.hpp"
#include "communication/bolt/v1/encoder/client_encoder.hpp"
#include "communication/client.hpp"
#include "query/typed_value.hpp"
#include "utils/exceptions.hpp"

View File

@ -10,7 +10,6 @@
#include "communication/bolt/v1/decoder/decoded_value.hpp"
#include "communication/bolt/v1/state.hpp"
#include "database/graph_db.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "query/exceptions.hpp"
#include "query/typed_value.hpp"
#include "utils/exceptions.hpp"
@ -122,13 +121,6 @@ State HandleRun(TSession &session, State state, Marker marker) {
return State::Result;
}
session.db_accessor_->AdvanceCommand();
if (session.db_.type() == database::GraphDb::Type::DISTRIBUTED_MASTER) {
auto tx_id = session.db_accessor_->transaction_id();
auto futures =
session.db_.pull_clients().NotifyAllTransactionCommandAdvanced(
tx_id);
for (auto &future : futures) future.wait();
}
}
auto &params_map = params.ValueMap();

View File

@ -1,23 +0,0 @@
#pragma once
#include "communication/rpc/messages.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft {
enum class RpcType { REQUEST_VOTE, APPEND_ENTRIES };
template <class State>
struct PeerRpcRequest {
RpcType type;
RequestVoteRequest request_vote;
AppendEntriesRequest<State> append_entries;
};
struct PeerRpcReply {
RpcType type;
RequestVoteReply request_vote;
AppendEntriesReply append_entries;
};
} // namespace communication::raft

View File

@ -1,699 +0,0 @@
#pragma once
#include <algorithm>
#include "fmt/format.h"
#include "glog/logging.h"
namespace communication::raft {
namespace impl {
template <class State>
RaftMemberImpl<State>::RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id,
const RaftConfig &config)
: network_(network), storage_(storage), id_(id), config_(config) {
std::lock_guard<std::mutex> lock(mutex_);
tie(term_, voted_for_) = storage_.GetTermAndVotedFor();
for (const auto &peer_id : config_.members) {
peer_states_[peer_id] = std::make_unique<RaftPeerState>();
}
SetElectionTimer();
}
template <class State>
RaftMemberImpl<State>::~RaftMemberImpl() {
Stop();
}
template <class State>
void RaftMemberImpl<State>::Stop() {
{
std::lock_guard<std::mutex> lock(mutex_);
if (!exiting_) {
LogInfo("Stopping...");
exiting_ = true;
}
}
state_changed_.notify_all();
}
template <class State>
template <class... Args>
void RaftMemberImpl<State>::LogInfo(const std::string &format,
Args &&... args) {
LOG(INFO) << fmt::format("[id = {}, term = {}] {}", id_, term_,
fmt::format(format, std::forward<Args>(args)...))
<< std::endl;
}
template <class State>
void RaftMemberImpl<State>::TimerThreadMain() {
std::unique_lock<std::mutex> lock(mutex_);
while (!exiting_) {
if (Clock::now() >= next_election_time_) {
StartNewElection();
}
state_changed_.wait_until(lock, next_election_time_);
}
}
template <class State>
void RaftMemberImpl<State>::PeerThreadMain(std::string peer_id) {
RaftPeerState &peer_state = *peer_states_[peer_id];
LogInfo("Peer thread started for {}", peer_id);
std::unique_lock<std::mutex> lock(mutex_);
/* This loop will either call a function that issues an RPC or wait on the
* condition variable. It must not do both! Lock on `mutex_` is released while
* waiting for RPC response, which might cause us to miss a notification on
* `state_changed_` conditional variable and wait indefinitely. The safest
* thing to do is to assume some important part of state was modified while we
* were waiting for the response and loop around to check. */
while (!exiting_) {
TimePoint now = Clock::now();
TimePoint wait_until;
if (mode_ != RaftMode::FOLLOWER && peer_state.backoff_until > now) {
wait_until = peer_state.backoff_until;
} else {
switch (mode_) {
case RaftMode::FOLLOWER:
wait_until = TimePoint::max();
break;
case RaftMode::CANDIDATE:
if (!peer_state.request_vote_done) {
RequestVote(peer_id, peer_state, lock);
continue;
}
break;
case RaftMode::LEADER:
if (peer_state.next_index <= storage_.GetLastLogIndex() ||
now >= peer_state.next_heartbeat_time) {
AppendEntries(peer_id, peer_state, lock);
continue;
} else {
wait_until = peer_state.next_heartbeat_time;
}
break;
}
}
state_changed_.wait_until(lock, wait_until);
}
LogInfo("Peer thread exiting for {}", peer_id);
}
template <class State>
void RaftMemberImpl<State>::CandidateOrLeaderTransitionToFollower() {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderTransitionToFollower` called from follower mode";
mode_ = RaftMode::FOLLOWER;
leader_ = {};
SetElectionTimer();
}
template <class State>
void RaftMemberImpl<State>::CandidateTransitionToLeader() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CandidateTransitionToLeader` called while not in candidate mode";
mode_ = RaftMode::LEADER;
leader_ = id_;
/* We don't want to trigger elections while in leader mode. */
next_election_time_ = TimePoint::max();
/* [Raft thesis, Section 6.4]
* "The Leader Completeness Property guarantees that a leader has all
* committed entries, but at the start of its term, it may not know which
* those are. To find out, it needs to commit an entry from its term. Raft
* handles this by having each leader commit a blank no-op entry into the log
* at the start of its term. As soon as this no-op entry is committed, the
* leaders commit index will be at least as large as any other servers
* during its term." */
LogEntry<State> entry;
entry.term = term_;
entry.command = std::experimental::nullopt;
storage_.AppendLogEntry(entry);
}
template <class State>
bool RaftMemberImpl<State>::CandidateOrLeaderNoteTerm(const TermId new_term) {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderNoteTerm` called from follower mode";
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (term_ < new_term) {
UpdateTermAndVotedFor(new_term, {});
CandidateOrLeaderTransitionToFollower();
return true;
}
return false;
}
template <class State>
void RaftMemberImpl<State>::UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for) {
term_ = new_term;
voted_for_ = new_voted_for;
leader_ = {};
storage_.WriteTermAndVotedFor(term_, voted_for_);
}
template <class State>
void RaftMemberImpl<State>::SetElectionTimer() {
/* [Raft thesis, section 3.4]
* "Raft uses randomized election timeouts to ensure that split votes are rare
* and that they are resolved quickly. To prevent split votes in the first
* place, election timeouts are chosen randomly from a fixed interval (e.g.,
* 150-300 ms)." */
std::uniform_int_distribution<uint64_t> distribution(
config_.leader_timeout_min.count(), config_.leader_timeout_max.count());
Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_));
next_election_time_ = Clock::now() + wait_interval;
}
template <class State>
void RaftMemberImpl<State>::StartNewElection() {
LogInfo("Starting new election");
/* [Raft thesis, section 3.4]
* "To begin an election, a follower increments its current term and
* transitions to candidate state. It then votes for itself and issues
* RequestVote RPCs in parallel to each of the other servers in the cluster."
*/
UpdateTermAndVotedFor(term_ + 1, id_);
mode_ = RaftMode::CANDIDATE;
/* [Raft thesis, section 3.4]
* "Each candidate restarts its randomized election timeout at the start of an
* election, and it waits for that timeout to elapse before starting the next
* election; this reduces the likelihood of another split vote in the new
* election." */
SetElectionTimer();
for (const auto &peer_id : config_.members) {
if (peer_id == id_) {
continue;
}
auto &peer_state = peer_states_[peer_id];
peer_state->request_vote_done = false;
peer_state->voted_for_me = false;
peer_state->match_index = 0;
peer_state->next_index = storage_.GetLastLogIndex() + 1;
/* [Raft thesis, section 3.5]
* "Until the leader has discovered where it and the follower's logs match,
* the leader can send AppendEntries with no entries (like heartbeats) to
* save bandwidth. Then, once the matchIndex immediately precedes the
* nextIndex, the leader should begin to send the actual entries." */
peer_state->suppress_log_entries = true;
/* [Raft thesis, section 3.4]
* "Once a candidate wins an election, it becomes leader. It then sends
* heartbeat messages to all of the other servers to establish its authority
* and prevent new elections."
*
* This will make newly elected leader send heartbeats immediately.
*/
peer_state->next_heartbeat_time = TimePoint::min();
peer_state->backoff_until = TimePoint::min();
}
// We already have the majority if we're in a single node cluster.
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
/* Notify peer threads to start issuing RequestVote RPCs. */
state_changed_.notify_all();
}
template <class State>
bool RaftMemberImpl<State>::CountVotes() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CountVotes` should only be called from candidate mode";
int num_votes = 0;
for (const auto &peer_id : config_.members) {
if (peer_id == id_ || peer_states_[peer_id]->voted_for_me) {
num_votes++;
}
}
return 2 * num_votes > config_.members.size();
}
template <class State>
void RaftMemberImpl<State>::RequestVote(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Requesting vote from {}", peer_id);
RequestVoteRequest request;
request.candidate_term = term_;
request.candidate_id = id_;
request.last_log_index = storage_.GetLastLogIndex();
request.last_log_term = storage_.GetLogTerm(request.last_log_index);
RequestVoteReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendRequestVote(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.candidate_term || mode_ != RaftMode::CANDIDATE ||
exiting_) {
LogInfo("Ignoring RequestVote RPC reply from {}", peer_id);
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(reply.term == term_) << "Stale RequestVote RPC reply";
peer_state.request_vote_done = true;
if (reply.vote_granted) {
peer_state.voted_for_me = true;
LogInfo("Got vote from {}", peer_id);
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
} else {
LogInfo("Vote denied from {}", peer_id);
}
state_changed_.notify_all();
}
template <class State>
void RaftMemberImpl<State>::AdvanceCommitIndex() {
DCHECK(mode_ == RaftMode::LEADER)
<< "`AdvanceCommitIndex` can only be called from leader mode";
std::vector<LogIndex> match_indices;
for (const auto &peer : peer_states_) {
match_indices.push_back(peer.second->match_index);
}
match_indices.push_back(storage_.GetLastLogIndex());
std::sort(match_indices.begin(), match_indices.end(),
std::greater<LogIndex>());
LogIndex new_commit_index_ = match_indices[(config_.members.size() - 1) / 2];
LogInfo("Trying to advance commit index {} to {}", commit_index_,
new_commit_index_);
/* This can happen because we reset `match_index` to 0 for every peer when
* elected. */
if (commit_index_ >= new_commit_index_) {
return;
}
/* [Raft thesis, section 3.6.2]
* (...) Raft never commits log entries from previous terms by counting
* replicas. Only log entries from the leader's current term are committed by
* counting replicas; once an entry from the current term has been committed
* in this way, then all prior entries are committed indirectly because of the
* Log Matching Property." */
if (storage_.GetLogTerm(new_commit_index_) != term_) {
LogInfo("Cannot commit log entry from previous term");
return;
}
commit_index_ = std::max(commit_index_, new_commit_index_);
}
template <class State>
void RaftMemberImpl<State>::AppendEntries(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Appending entries to {}", peer_id);
AppendEntriesRequest<State> request;
request.leader_term = term_;
request.leader_id = id_;
request.prev_log_index = peer_state.next_index - 1;
request.prev_log_term = storage_.GetLogTerm(peer_state.next_index - 1);
if (!peer_state.suppress_log_entries &&
peer_state.next_index <= storage_.GetLastLogIndex()) {
request.entries = storage_.GetLogSuffix(peer_state.next_index);
} else {
request.entries = {};
}
request.leader_commit = commit_index_;
AppendEntriesReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendAppendEntries(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
/* There is probably something wrong with this peer, let's avoid sending log
* entries. */
peer_state.suppress_log_entries = true;
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.leader_term || exiting_) {
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(mode_ == RaftMode::LEADER)
<< "Elected leader for term should never change";
DCHECK(reply.term == term_) << "Got stale AppendEntries reply";
if (reply.success) {
/* We've found a match, we can start sending log entries. */
peer_state.suppress_log_entries = false;
LogIndex new_match_index = request.prev_log_index + request.entries.size();
DCHECK(peer_state.match_index <= new_match_index)
<< "`match_index` should increase monotonically within a term";
peer_state.match_index = new_match_index;
AdvanceCommitIndex();
peer_state.next_index = peer_state.match_index + 1;
peer_state.next_heartbeat_time = Clock::now() + config_.heartbeat_interval;
} else {
DCHECK(peer_state.next_index > 1)
<< "Log replication should not fail for first log entry.";
--peer_state.next_index;
}
state_changed_.notify_all();
}
template <class State>
RequestVoteReply RaftMemberImpl<State>::OnRequestVote(
const RequestVoteRequest &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("RequestVote RPC request from {}", request.candidate_id);
RequestVoteReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.candidate_term < term_) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.candidate_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.candidate_term, {});
}
/* [Raft thesis, Section 3.6.1]
* "Raft uses the voting process to prevent a candidate from winning an
* election unless its log contains all committed entries. (...) The
* RequestVote RPC implements this restriction: the RPC includes information
* about the candidate's log, and the voter denies its vote if its own log is
* more up-to-date than that of the candidate. Raft determines which of two
* logs is more up-to-date by comparing the index and term of the last entries
* in the logs. If the logs have last entries with different terms, then the
* log with the later term is more up-to-date. If the logs end with the same
* term, then whichever log is longer is more up-to-date." */
LogIndex my_last_log_index = storage_.GetLastLogIndex();
TermId my_last_log_term = storage_.GetLogTerm(my_last_log_index);
if (my_last_log_term > request.last_log_term ||
(my_last_log_term == request.last_log_term &&
my_last_log_index > request.last_log_index)) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.4]
* "Each server will vote for at most one candidate in a given term, on a
* firstcome-first-served basis."
*/
/* We voted for someone else in this term. */
if (request.candidate_term == term_ && voted_for_ &&
*voted_for_ != request.candidate_id) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* Now we know we will vote for this candidate, because it's term is at least
* as big as ours and we haven't voted for anyone else. */
UpdateTermAndVotedFor(request.candidate_term, request.candidate_id);
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = request.candidate_term;
reply.vote_granted = true;
return reply;
}
template <class State>
AppendEntriesReply RaftMemberImpl<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("AppendEntries RPC request from {}", request.leader_id);
AppendEntriesReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.leader_term < term_) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.leader_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.leader_term, {});
}
/* [Raft thesis, Section 3.4]
* "While waiting for votes, a candidate may receive an AppendEntries RPC from
* another server claiming to be leader. If the leader's term (included in its
* RPC) is at least as large as the candidate's current term, then the
* candidate recognizes the leader as legitimate and returns to follower
* state." */
if (mode_ == RaftMode::CANDIDATE && request.leader_term == term_) {
CandidateOrLeaderTransitionToFollower();
}
DCHECK(mode_ != RaftMode::LEADER)
<< "Leader cannot accept `AppendEntries` RPC";
DCHECK(term_ == request.leader_term) << "Term should be equal to request "
"term when accepting `AppendEntries` "
"RPC";
leader_ = request.leader_id;
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
/* [Raft thesis, Section 3.5]
* "When sending an AppendEntries RPC, the leader includes the index and term
* of the entry in its log that immediately precedes the new entries. If the
* follower does not find an entry in its log with the same index and term,
* then it refuses the new entries." */
if (request.prev_log_index > storage_.GetLastLogIndex() ||
storage_.GetLogTerm(request.prev_log_index) != request.prev_log_term) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.5]
* "To bring a follower's log into consistency with its own, the leader must
* find the latest log entry where the two logs agree, delete any entries in
* the follower's log after that point, and send the follower all of the
* leader's entries after that point." */
/* Entry at `request.prev_log_index` is the last entry where ours and leader's
* logs agree. It's time to replace the tail of the log with new entries from
* the leader. We have to be careful here as duplicated AppendEntries RPCs
* could cause data loss.
*
* There is a possibility that an old AppendEntries RPC is duplicated and
* received after processing newer one. For example, leader appends entry 3
* and then entry 4, but follower recieves entry 3, then entry 4, and then
* entry 3 again. We have to be careful not to delete entry 4 from log when
* processing the last RPC. */
LogIndex index = request.prev_log_index;
auto it = request.entries.begin();
for (; it != request.entries.end(); ++it) {
++index;
if (index > storage_.GetLastLogIndex()) {
break;
}
if (storage_.GetLogTerm(index) != it->term) {
LogInfo("Truncating log suffix from index {}", index);
DCHECK(commit_index_ < index)
<< "Committed entries should never be truncated from the log";
storage_.TruncateLogSuffix(index);
break;
}
}
LogInfo("Appending {} out of {} logs from {}.", request.entries.end() - it,
request.entries.size(), request.leader_id);
for (; it != request.entries.end(); ++it) {
storage_.AppendLogEntry(*it);
}
commit_index_ = std::max(commit_index_, request.leader_commit);
/* Let's bump election timer once again, we don't want to take down the leader
* because of our long disk writes. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = term_;
reply.success = true;
return reply;
}
template <class State>
ClientResult RaftMemberImpl<State>::AddCommand(
const typename State::Change &command, bool blocking) {
std::unique_lock<std::mutex> lock(mutex_);
if (mode_ != RaftMode::LEADER) {
return ClientResult::NOT_LEADER;
}
LogEntry<State> entry;
entry.term = term_;
entry.command = command;
storage_.AppendLogEntry(entry);
// Entry is already replicated if this is a single node cluster.
AdvanceCommitIndex();
state_changed_.notify_all();
if (!blocking) {
return ClientResult::OK;
}
LogIndex index = storage_.GetLastLogIndex();
while (!exiting_ && term_ == entry.term) {
if (commit_index_ >= index) {
return ClientResult::OK;
}
state_changed_.wait(lock);
}
return ClientResult::NOT_LEADER;
}
} // namespace impl
template <class State>
RaftMember<State>::RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config)
: network_(network), impl_(network, storage, id, config) {
timer_thread_ =
std::thread(&impl::RaftMemberImpl<State>::TimerThreadMain, &impl_);
for (const auto &peer_id : config.members) {
if (peer_id != id) {
peer_threads_.emplace_back(&impl::RaftMemberImpl<State>::PeerThreadMain,
&impl_, peer_id);
}
}
network_.Start(*this);
}
template <class State>
RaftMember<State>::~RaftMember() {
impl_.Stop();
timer_thread_.join();
for (auto &peer_thread : peer_threads_) {
peer_thread.join();
}
}
template <class State>
ClientResult RaftMember<State>::AddCommand(
const typename State::Change &command, bool blocking) {
return impl_.AddCommand(command, blocking);
}
template <class State>
RequestVoteReply RaftMember<State>::OnRequestVote(
const RequestVoteRequest &request) {
return impl_.OnRequestVote(request);
}
template <class State>
AppendEntriesReply RaftMember<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
return impl_.OnAppendEntries(request);
}
} // namespace communication::raft

View File

@ -1,277 +0,0 @@
#pragma once
#include <chrono>
#include <condition_variable>
#include <experimental/optional>
#include <map>
#include <mutex>
#include <random>
#include <set>
#include <thread>
#include <vector>
#include "boost/serialization/vector.hpp"
#include "glog/logging.h"
#include "utils/serialization.hpp"
namespace communication::raft {
template <class State>
class RaftMember;
enum class ClientResult { NOT_LEADER, OK };
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
using MemberId = std::string;
using TermId = uint64_t;
using ClientId = uint64_t;
using CommandId = uint64_t;
using LogIndex = uint64_t;
template <class State>
struct LogEntry {
int term;
std::experimental::optional<typename State::Change> command;
bool operator==(const LogEntry &rhs) const {
return term == rhs.term && command == rhs.command;
}
bool operator!=(const LogEntry &rhs) const { return !(*this == rhs); }
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &command;
}
};
/* Raft RPC requests and replies as described in [Raft thesis, Figure 3.1]. */
struct RequestVoteRequest {
TermId candidate_term;
MemberId candidate_id;
LogIndex last_log_index;
TermId last_log_term;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &candidate_term;
ar &candidate_id;
ar &last_log_index;
ar &last_log_term;
}
};
struct RequestVoteReply {
TermId term;
bool vote_granted;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &vote_granted;
}
};
template <class State>
struct AppendEntriesRequest {
TermId leader_term;
MemberId leader_id;
LogIndex prev_log_index;
TermId prev_log_term;
std::vector<LogEntry<State>> entries;
LogIndex leader_commit;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &leader_term;
ar &leader_id;
ar &prev_log_index;
ar &prev_log_term;
ar &entries;
ar &leader_commit;
}
};
struct AppendEntriesReply {
TermId term;
bool success;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &success;
}
};
template <class State>
class RaftNetworkInterface {
public:
virtual ~RaftNetworkInterface() = default;
/* These function return false if RPC failed for some reason (e.g. cannot
* establish connection or request cancelled). Otherwise
* `reply` contains response from peer. */
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) = 0;
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) = 0;
/* This will be called once the RaftMember is ready to start receiving RPCs.
*/
virtual void Start(RaftMember<State> &member) = 0;
};
template <class State>
class RaftStorageInterface {
public:
virtual ~RaftStorageInterface() = default;
virtual void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) = 0;
virtual std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() = 0;
virtual void AppendLogEntry(const LogEntry<State> &entry) = 0;
virtual TermId GetLogTerm(const LogIndex index) = 0;
virtual LogEntry<State> GetLogEntry(const LogIndex index) = 0;
virtual std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) = 0;
virtual LogIndex GetLastLogIndex() = 0;
virtual void TruncateLogSuffix(const LogIndex index) = 0;
};
struct RaftConfig {
std::vector<MemberId> members;
std::chrono::milliseconds leader_timeout_min;
std::chrono::milliseconds leader_timeout_max;
std::chrono::milliseconds heartbeat_interval;
std::chrono::milliseconds rpc_backoff;
};
namespace impl {
enum class RaftMode { FOLLOWER, CANDIDATE, LEADER };
struct RaftPeerState {
bool request_vote_done;
bool voted_for_me;
LogIndex match_index;
LogIndex next_index;
bool suppress_log_entries;
Clock::time_point next_heartbeat_time;
Clock::time_point backoff_until;
};
template <class State>
class RaftMemberImpl {
public:
explicit RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config);
~RaftMemberImpl();
void Stop();
void TimerThreadMain();
void PeerThreadMain(std::string peer_id);
void UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for);
void CandidateOrLeaderTransitionToFollower();
void CandidateTransitionToLeader();
bool CandidateOrLeaderNoteTerm(const TermId new_term);
void StartNewElection();
void SetElectionTimer();
bool CountVotes();
void RequestVote(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
void AdvanceCommitIndex();
void AppendEntries(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
ClientResult AddCommand(const typename State::Change &command, bool blocking);
template <class... Args>
void LogInfo(const std::string &, Args &&...);
RaftNetworkInterface<State> &network_;
RaftStorageInterface<State> &storage_;
MemberId id_;
RaftConfig config_;
TermId term_;
RaftMode mode_ = RaftMode::FOLLOWER;
std::experimental::optional<MemberId> voted_for_ = std::experimental::nullopt;
std::experimental::optional<MemberId> leader_ = std::experimental::nullopt;
TimePoint next_election_time_;
LogIndex commit_index_ = 0;
bool exiting_ = false;
std::map<std::string, std::unique_ptr<RaftPeerState>> peer_states_;
/* This mutex protects all of the internal state. */
std::mutex mutex_;
/* Used to notify waiting threads that some of the internal state has changed.
* It is notified when following events occurr:
* - mode change
* - election start
* - `next_election_time_` update on RPC from leader or candidate
* - destructor is called
* - `commit_index_` is advanced
*/
std::condition_variable state_changed_;
std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}());
};
} // namespace impl
template <class State>
class RaftMember final {
public:
explicit RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage, const MemberId &id,
const RaftConfig &config);
~RaftMember();
ClientResult AddCommand(const typename State::Change &command, bool blocking);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
private:
RaftNetworkInterface<State> &network_;
impl::RaftMemberImpl<State> impl_;
/* Timer thread for triggering elections. */
std::thread timer_thread_;
/* One thread per peer for outgoing RPCs. */
std::vector<std::thread> peer_threads_;
};
} // namespace communication::raft
#include "raft-inl.hpp"

View File

@ -1,120 +0,0 @@
#pragma once
#include <unordered_map>
#include "glog/logging.h"
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
#include "communication/rpc/client.hpp"
#include "communication/rpc/server.hpp"
#include "io/network/endpoint.hpp"
/* Implementation of `RaftNetworkInterface` using RPC. Raft RPC requests and
* responses are wrapped in `PeerRpcRequest` and `PeerRpcReply`. */
// TODO(mtomic): Unwrap RPCs and use separate request-response protocols instead
// of `PeerProtocol`, or at least use an union to avoid sending unnecessary data
// over the wire.
namespace communication::raft {
template <class State>
using PeerProtocol = rpc::RequestResponse<PeerRpcRequest<State>, PeerRpcReply>;
template <class State>
class RpcNetwork : public RaftNetworkInterface<State> {
public:
RpcNetwork(rpc::Server &server,
std::unordered_map<std::string, io::network::Endpoint> directory)
: server_(server), directory_(std::move(directory)) {}
virtual void Start(RaftMember<State> &member) override {
// TODO: Serialize RPC via Cap'n Proto
// server_.Register<PeerProtocol<State>>(
// [&member](const auto &req_reader, auto *res_builder) {
// PeerRpcRequest<State> request;
// request.Load(req_reader);
// PeerRpcReply reply;
// reply.type = request.type;
// switch (request.type) {
// case RpcType::REQUEST_VOTE:
// reply.request_vote = member.OnRequestVote(request.request_vote);
// break;
// case RpcType::APPEND_ENTRIES:
// reply.append_entries =
// member.OnAppendEntries(request.append_entries);
// break;
// default:
// LOG(ERROR) << "Unknown RPC type: "
// << static_cast<int>(request.type);
// }
// reply.Save(res_builder);
// });
}
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.append_entries;
return true;
}
private:
bool SendRpc(const MemberId &recipient, const PeerRpcRequest<State> &request,
PeerRpcReply &reply) {
auto &client = GetClient(recipient);
auto response = client.template Call<PeerProtocol<State>>(request);
if (!response) {
return false;
}
reply = *response;
return true;
}
rpc::Client &GetClient(const MemberId &id) {
auto it = clients_.find(id);
if (it == clients_.end()) {
auto ne = directory_[id];
it = clients_.try_emplace(id, ne).first;
}
return it->second;
}
rpc::Server &server_;
// TODO(mtomic): how to update and distribute this?
std::unordered_map<MemberId, io::network::Endpoint> directory_;
std::unordered_map<MemberId, rpc::Client> clients_;
};
} // namespace communication::raft

View File

@ -1,239 +0,0 @@
/**
* @file
*
* Raft log is stored inside a folder. Each log entry is stored in a file named
* by its index. There is a special file named "metadata" which stores Raft
* metadata and also the last log index, which is used on startup to identify
* which log entry files are valid.
*/
#pragma once
#include <fcntl.h>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/iostreams/device/file_descriptor.hpp"
#include "boost/iostreams/stream.hpp"
#include "communication/raft/raft.hpp"
#include "communication/raft/storage/memory.hpp"
#include "utils/file.hpp"
namespace communication::raft {
struct SimpleFileStorageMetadata {
TermId term;
std::experimental::optional<MemberId> voted_for;
LogIndex last_log_index;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term &voted_for &last_log_index;
}
};
template <class State>
class SimpleFileStorage : public RaftStorageInterface<State> {
public:
explicit SimpleFileStorage(const fs::path &parent_dir) : memory_storage_() {
try {
dir_ = utils::OpenDir(parent_dir);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Error opening log directory: {}", e.what());
}
auto md = utils::TryOpenFile(dir_, "metadata", O_RDONLY);
if (!md) {
LOG(WARNING) << fmt::format("No metadata file found in directory '{}'",
parent_dir);
return;
}
boost::iostreams::file_descriptor_source src(
md->Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(src);
boost::archive::binary_iarchive iar(is);
SimpleFileStorageMetadata metadata;
try {
iar >> metadata;
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Failed to deserialize Raft metadata: " << e.what();
}
LOG(INFO) << fmt::format(
"Read term = {} and voted_for = {} from storage", metadata.term,
metadata.voted_for ? *metadata.voted_for : "(none)");
memory_storage_.term_ = metadata.term;
memory_storage_.voted_for_ = metadata.voted_for;
memory_storage_.log_.reserve(metadata.last_log_index);
for (LogIndex idx = 1; idx <= metadata.last_log_index; ++idx) {
utils::File entry_file;
try {
entry_file = utils::OpenFile(dir_, fmt::format("{}", idx), O_RDONLY);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open entry file {}: {}", idx,
e.what());
}
boost::iostreams::file_descriptor_source src(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(
src);
boost::archive::binary_iarchive iar(is);
LogEntry<State> entry;
try {
iar >> entry;
memory_storage_.log_.emplace_back(std::move(entry));
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to deserialize log entry {}: {}", idx,
e.what());
}
}
LOG(INFO) << fmt::format("Read {} log entries", metadata.last_log_index);
}
void WriteTermAndVotedFor(
TermId term,
const std::experimental::optional<MemberId> &voted_for) override {
memory_storage_.WriteTermAndVotedFor(term, voted_for);
WriteMetadata();
// Metadata file might be newly created so we have to fsync the directory.
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
std::pair<TermId, std::experimental::optional<MemberId>> GetTermAndVotedFor()
override {
return memory_storage_.GetTermAndVotedFor();
}
void AppendLogEntry(const LogEntry<State> &entry) override {
memory_storage_.AppendLogEntry(entry);
utils::File entry_file;
try {
entry_file = utils::OpenFile(
dir_, fmt::format("{}", memory_storage_.GetLastLogIndex()),
O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open log entry file: {}", e.what());
}
boost::iostreams::file_descriptor_sink sink(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << entry;
os.flush();
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to serialize log entry: {}", e.what());
}
try {
utils::Fsync(entry_file);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to write log entry file to disk: {}",
e.what());
}
// We update the metadata only after the log entry file is written to
// disk. This ensures that no file in range [1, last_log_index] is
// corrupted.
WriteMetadata();
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
TermId GetLogTerm(const LogIndex index) override {
return memory_storage_.GetLogTerm(index);
}
LogEntry<State> GetLogEntry(const LogIndex index) override {
return memory_storage_.GetLogEntry(index);
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) override {
return memory_storage_.GetLogSuffix(index);
}
LogIndex GetLastLogIndex() override {
return memory_storage_.GetLastLogIndex();
}
void TruncateLogSuffix(const LogIndex index) override {
return memory_storage_.TruncateLogSuffix(index);
}
private:
InMemoryStorage<State> memory_storage_;
utils::File dir_;
void WriteMetadata() {
// We first write data to a temporary file, ensure data is safely written
// to disk, and then rename the file. Since rename is an atomic operation,
// "metadata" file won't get corrupted in case of program crash.
utils::File md_tmp;
try {
md_tmp =
OpenFile(dir_, "metadata.new", O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open temporary metadata file: {}",
e.what());
}
boost::iostreams::file_descriptor_sink sink(
md_tmp.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << SimpleFileStorageMetadata{
memory_storage_.GetTermAndVotedFor().first,
memory_storage_.GetTermAndVotedFor().second,
memory_storage_.GetLastLogIndex()};
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Error serializing Raft metadata";
}
os.flush();
try {
utils::Fsync(md_tmp);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format(
"Failed to write temporary metadata file to disk: {}", e.what());
}
try {
utils::Rename(dir_, "metadata.new", dir_, "metadata");
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to move temporary metadata file: {}",
e.what());
}
}
};
} // namespace communication::raft

View File

@ -1,63 +0,0 @@
#pragma once
#include "communication/raft/raft.hpp"
namespace communication::raft {
template <class State>
class InMemoryStorage : public RaftStorageInterface<State> {
public:
InMemoryStorage()
: term_(0), voted_for_(std::experimental::nullopt), log_() {}
InMemoryStorage(const TermId term,
const std::experimental::optional<std::string> &voted_for,
const std::vector<LogEntry<State>> log)
: term_(term), voted_for_(voted_for), log_(log) {}
void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) {
term_ = term;
voted_for_ = voted_for;
}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {term_, voted_for_};
}
void AppendLogEntry(const LogEntry<State> &entry) { log_.push_back(entry); }
TermId GetLogTerm(const LogIndex index) {
CHECK(0 <= index && index <= log_.size())
<< "Trying to read nonexistent log entry";
return index > 0 ? log_[index - 1].term : 0;
}
LogEntry<State> GetLogEntry(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entry";
return log_[index - 1];
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entries";
return std::vector<LogEntry<State>>(log_.begin() + index - 1, log_.end());
}
LogIndex GetLastLogIndex(void) { return log_.size(); }
void TruncateLogSuffix(const LogIndex index) {
CHECK(1 <= index <= log_.size())
<< "Trying to remove nonexistent log entries";
log_.erase(log_.begin() + index - 1, log_.end());
}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft

View File

@ -1,141 +0,0 @@
#include <functional>
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft::test_utils {
struct DummyState {
struct Change {
bool operator==(const Change &) const { return true; }
bool operator!=(const Change &) const { return false; }
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
struct IntState {
int x;
struct Change {
enum Type { ADD, SUB, SET };
Type t;
int d;
bool operator==(const Change &rhs) const {
return t == rhs.t && d == rhs.d;
}
bool operator!=(const Change &rhs) const { return !(*this == rhs); };
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &t;
ar &d;
}
};
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &x;
}
};
/* Implementations of `RaftNetworkInterface` for simpler unit testing. */
/* `NoOpNetworkInterface` doesn't do anything -- it's like a server disconnected
* from the network. */
template <class State>
class NoOpNetworkInterface : public RaftNetworkInterface<State> {
public:
~NoOpNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &, const RequestVoteRequest &,
RequestVoteReply &) override {
return false;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &,
AppendEntriesReply &) override {
return false;
}
virtual void Start(RaftMember<State> &) override {}
};
/* `NextReplyNetworkInterface` has two fields: `on_request_` and `next_reply_`
* which is optional. `on_request_` is a callback that will be called before
* processing requets. If `next_reply_` is not set, `Send*` functions will
* return false, otherwise they return that reply. */
template <class State>
class NextReplyNetworkInterface : public RaftNetworkInterface<State> {
public:
~NextReplyNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::REQUEST_VOTE)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::APPEND_ENTRIES)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->append_entries;
return true;
}
virtual void Start(RaftMember<State> &) override {}
std::function<void(const PeerRpcRequest<State> &)> on_request_;
std::experimental::optional<PeerRpcReply> next_reply_;
};
template <class State>
class NoOpStorageInterface : public RaftStorageInterface<State> {
public:
NoOpStorageInterface() {}
void WriteTermAndVotedFor(const TermId,
const std::experimental::optional<std::string> &) {}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {0, {}};
}
void AppendLogEntry(const LogEntry<State> &) {}
TermId GetLogTerm(const LogIndex) { return 0; }
LogEntry<State> GetLogEntry(const LogIndex) { assert(false); }
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex) { return {}; }
LogIndex GetLastLogIndex() { return 0; }
void TruncateLogSuffix(const LogIndex) {}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft::test_utils

View File

@ -1,100 +0,0 @@
#include <chrono>
#include <thread>
#include "gflags/gflags.h"
#include "communication/rpc/client.hpp"
DEFINE_HIDDEN_bool(rpc_random_latency, false,
"If a random wait should happen on each RPC call, to "
"simulate network latency.");
namespace communication::rpc {
Client::Client(const io::network::Endpoint &endpoint) : endpoint_(endpoint) {}
std::experimental::optional<::capnp::FlatArrayMessageReader> Client::Send(
::capnp::MessageBuilder *message) {
std::lock_guard<std::mutex> guard(mutex_);
if (FLAGS_rpc_random_latency) {
auto microseconds = (int)(1000 * rand_(gen_));
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
}
// Check if the connection is broken (if we haven't used the client for a
// long time the server could have died).
if (client_ && client_->ErrorStatus()) {
client_ = std::experimental::nullopt;
}
// Connect to the remote server.
if (!client_) {
client_.emplace(&context_);
if (!client_->Connect(endpoint_)) {
LOG(ERROR) << "Couldn't connect to remote address " << endpoint_;
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
}
// Serialize and send request.
auto request_words = ::capnp::messageToFlatArray(*message);
auto request_bytes = request_words.asBytes();
CHECK(request_bytes.size() <= std::numeric_limits<MessageSize>::max())
<< fmt::format(
"Trying to send message of size {}, max message size is {}",
request_bytes.size(), std::numeric_limits<MessageSize>::max());
MessageSize request_data_size = request_bytes.size();
if (!client_->Write(reinterpret_cast<uint8_t *>(&request_data_size),
sizeof(MessageSize), true)) {
LOG(ERROR) << "Couldn't send request size to " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
if (!client_->Write(request_bytes.begin(), request_bytes.size())) {
LOG(ERROR) << "Couldn't send request data to " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
// Receive response data size.
if (!client_->Read(sizeof(MessageSize))) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
MessageSize response_data_size =
*reinterpret_cast<MessageSize *>(client_->GetData());
client_->ShiftData(sizeof(MessageSize));
// Receive response data.
if (!client_->Read(response_data_size)) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
// Read the response message.
auto data = ::kj::arrayPtr(client_->GetData(), response_data_size);
// Our data is word aligned and padded to 64bit because we use regular
// (non-packed) serialization of Cap'n Proto. So we can use reinterpret_cast.
auto data_words =
::kj::arrayPtr(reinterpret_cast<::capnp::word *>(data.begin()),
reinterpret_cast<::capnp::word *>(data.end()));
::capnp::FlatArrayMessageReader response_message(data_words.asConst());
client_->ShiftData(response_data_size);
return std::experimental::make_optional(std::move(response_message));
}
void Client::Abort() {
if (!client_) return;
// We need to call Shutdown on the client to abort any pending read or
// write operations.
client_->Shutdown();
client_ = std::experimental::nullopt;
}
} // namespace communication::rpc

View File

@ -1,101 +0,0 @@
#pragma once
#include <experimental/optional>
#include <memory>
#include <mutex>
#include <random>
#include <capnp/message.h>
#include <capnp/serialize.h>
#include <glog/logging.h>
#include "communication/client.hpp"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
/// Client is thread safe, but it is recommended to use thread_local clients.
class Client {
public:
explicit Client(const io::network::Endpoint &endpoint);
/// Call function can initiate only one request at the time. Function blocks
/// until there is a response. If there was an error nullptr is returned.
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> Call(
Args &&... args) {
return CallWithLoad<TRequestResponse>(
[](const auto &reader) {
typename TRequestResponse::Response response;
response.Load(reader);
return response;
},
std::forward<Args>(args)...);
}
/// Same as `Call` but the first argument is a response loading function.
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> CallWithLoad(
std::function<typename TRequestResponse::Response(
const typename TRequestResponse::Response::Capnp::Reader &)>
load,
Args &&... args) {
typename TRequestResponse::Request request(std::forward<Args>(args)...);
auto req_type = TRequestResponse::Request::TypeInfo;
VLOG(12) << "[RpcClient] sent " << req_type.name;
::capnp::MallocMessageBuilder req_msg;
{
auto builder = req_msg.initRoot<capnp::Message>();
builder.setTypeId(req_type.id);
auto data_builder = builder.initData();
auto req_builder =
data_builder
.template initAs<typename TRequestResponse::Request::Capnp>();
request.Save(&req_builder);
}
auto maybe_response = Send(&req_msg);
if (!maybe_response) {
return std::experimental::nullopt;
}
auto res_msg = maybe_response->getRoot<capnp::Message>();
auto res_type = TRequestResponse::Response::TypeInfo;
if (res_msg.getTypeId() != res_type.id) {
// Since message_id was checked in private Call function, this means
// something is very wrong (probably on the server side).
LOG(ERROR) << "Message response was of unexpected type";
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
VLOG(12) << "[RpcClient] received " << res_type.name;
auto data_reader =
res_msg.getData()
.template getAs<typename TRequestResponse::Response::Capnp>();
return std::experimental::make_optional(load(data_reader));
}
/// Call this function from another thread to abort a pending RPC call.
void Abort();
private:
std::experimental::optional<::capnp::FlatArrayMessageReader> Send(
::capnp::MessageBuilder *message);
io::network::Endpoint endpoint_;
// TODO (mferencevic): currently the RPC client is hardcoded not to use SSL
communication::ClientContext context_;
std::experimental::optional<communication::Client> client_;
std::mutex mutex_;
// Random generator for simulated network latency (enable with a flag).
// Distribution parameters are rule-of-thumb chosen.
std::mt19937 gen_{std::random_device{}()};
std::lognormal_distribution<> rand_{0.0, 1.11};
};
} // namespace communication::rpc

View File

@ -1,68 +0,0 @@
#pragma once
#include <mutex>
#include <stack>
#include "communication/rpc/client.hpp"
namespace communication::rpc {
/**
* A simple client pool that creates new RPC clients on demand. Useful when you
* want to send RPCs to the same server from multiple threads without them
* blocking each other.
*/
class ClientPool {
public:
explicit ClientPool(const io::network::Endpoint &endpoint)
: endpoint_(endpoint) {}
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> Call(
Args &&... args) {
return WithUnusedClient([&](const auto &client) {
return client->template Call<TRequestResponse>(
std::forward<Args>(args)...);
});
};
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> CallWithLoad(
std::function<typename TRequestResponse::Response(
const typename TRequestResponse::Response::Capnp::Reader &)>
load,
Args &&... args) {
return WithUnusedClient([&](const auto &client) {
return client->template CallWithLoad<TRequestResponse>(
load, std::forward<Args>(args)...);
});
};
private:
template <class TFun>
auto WithUnusedClient(const TFun &fun) {
std::unique_ptr<Client> client;
std::unique_lock<std::mutex> lock(mutex_);
if (unused_clients_.empty()) {
client = std::make_unique<Client>(endpoint_);
} else {
client = std::move(unused_clients_.top());
unused_clients_.pop();
}
lock.unlock();
auto res = fun(client);
lock.lock();
unused_clients_.push(std::move(client));
return res;
}
io::network::Endpoint endpoint_;
std::mutex mutex_;
std::stack<std::unique_ptr<Client>> unused_clients_;
};
} // namespace communication::rpc

View File

@ -1,9 +0,0 @@
@0xd3832c9a1a3d8ec7;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("communication::rpc::capnp");
struct Message {
typeId @0 :UInt64;
data @1 :AnyPointer;
}

View File

@ -1,54 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
namespace communication::rpc {
using MessageSize = uint32_t;
/// Type information on a RPC message.
/// Each message should have a static member `TypeInfo` with this information.
struct MessageType {
/// Unique ID for a message.
uint64_t id;
/// Pretty name of the type.
std::string name;
};
inline bool operator==(const MessageType &a, const MessageType &b) {
return a.id == b.id;
}
inline bool operator!=(const MessageType &a, const MessageType &b) {
return a.id != b.id;
}
inline bool operator<(const MessageType &a, const MessageType &b) {
return a.id < b.id;
}
inline bool operator<=(const MessageType &a, const MessageType &b) {
return a.id <= b.id;
}
inline bool operator>(const MessageType &a, const MessageType &b) {
return a.id > b.id;
}
inline bool operator>=(const MessageType &a, const MessageType &b) {
return a.id >= b.id;
}
/// Each RPC is defined via this struct.
///
/// `TRequest` and `TResponse` are required to be classes which have a static
/// member `TypeInfo` of `MessageType` type. This is used for proper
/// registration and deserialization of RPC types. Additionally, both `TRequest`
/// and `TResponse` are required to define a nested `Capnp` type, which
/// corresponds to the Cap'n Proto schema type, as well as defined the following
/// serialization functions:
/// * void Save(Capnp::Builder *, ...) const
/// * void Load(const Capnp::Reader &, ...)
template <typename TRequest, typename TResponse>
struct RequestResponse {
using Request = TRequest;
using Response = TResponse;
};
} // namespace communication::rpc

View File

@ -1,77 +0,0 @@
#include <sstream>
#include "capnp/message.h"
#include "capnp/serialize.h"
#include "fmt/format.h"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/rpc/server.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
Session::Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream)
: server_(server),
input_stream_(input_stream),
output_stream_(output_stream) {}
void Session::Execute() {
if (input_stream_.size() < sizeof(MessageSize)) return;
MessageSize request_len =
*reinterpret_cast<MessageSize *>(input_stream_.data());
uint64_t request_size = sizeof(MessageSize) + request_len;
input_stream_.Resize(request_size);
if (input_stream_.size() < request_size) return;
// Read the request message.
auto data =
::kj::arrayPtr(input_stream_.data() + sizeof(request_len), request_len);
// Our data is word aligned and padded to 64bit because we use regular
// (non-packed) serialization of Cap'n Proto. So we can use reinterpret_cast.
auto data_words =
::kj::arrayPtr(reinterpret_cast<::capnp::word *>(data.begin()),
reinterpret_cast<::capnp::word *>(data.end()));
::capnp::FlatArrayMessageReader request_message(data_words.asConst());
auto request = request_message.getRoot<capnp::Message>();
input_stream_.Shift(sizeof(MessageSize) + request_len);
auto callbacks_accessor = server_.callbacks_.access();
auto it = callbacks_accessor.find(request.getTypeId());
if (it == callbacks_accessor.end()) {
// Throw exception to close the socket and cleanup the session.
throw SessionException(
"Session trying to execute an unregistered RPC call!");
}
VLOG(12) << "[RpcServer] received " << it->second.req_type.name;
::capnp::MallocMessageBuilder response_message;
// callback fills the message data
auto response_builder = response_message.initRoot<capnp::Message>();
it->second.callback(request, &response_builder);
// Serialize and send response
auto response_words = ::capnp::messageToFlatArray(response_message);
auto response_bytes = response_words.asBytes();
if (response_bytes.size() > std::numeric_limits<MessageSize>::max()) {
throw SessionException(fmt::format(
"Trying to send response of size {}, max response size is {}",
response_bytes.size(), std::numeric_limits<MessageSize>::max()));
}
MessageSize input_stream_size = response_bytes.size();
if (!output_stream_.Write(reinterpret_cast<uint8_t *>(&input_stream_size),
sizeof(MessageSize), true)) {
throw SessionException("Couldn't send response size!");
}
if (!output_stream_.Write(response_bytes.begin(), response_bytes.size())) {
throw SessionException("Couldn't send response data!");
}
VLOG(12) << "[RpcServer] sent " << it->second.res_type.name;
}
} // namespace communication::rpc

View File

@ -1,55 +0,0 @@
#pragma once
#include <chrono>
#include <cstdint>
#include <memory>
#include "communication/rpc/messages.hpp"
#include "communication/session.hpp"
/**
* @brief Protocol
*
* Has classes and functions that implement the server side of our
* RPC protocol.
*
* Message layout: MessageSize message_size,
* message_size bytes serialized_message
*/
namespace communication::rpc {
// Forward declaration of class Server
class Server;
/**
* This class is thrown when the Session wants to indicate that a fatal error
* occured during execution.
*/
class SessionException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/**
* Distributed Protocol Session
*
* This class is responsible for handling a single client connection.
*/
class Session {
public:
Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream);
/**
* Executes the protocol after data has been read into the stream.
* Goes through the protocol states in order to execute commands from the
* client.
*/
void Execute();
private:
Server &server_;
communication::InputStream &input_stream_;
communication::OutputStream &output_stream_;
};
} // namespace communication::rpc

View File

@ -1,17 +0,0 @@
#include "communication/rpc/server.hpp"
namespace communication::rpc {
Server::Server(const io::network::Endpoint &endpoint,
size_t workers_count)
: server_(endpoint, *this, &context_, -1, "RPC", workers_count) {}
void Server::StopProcessingCalls() {
server_.Shutdown();
server_.AwaitShutdown();
}
const io::network::Endpoint &Server::endpoint() const {
return server_.endpoint();
}
} // namespace communication::rpc

View File

@ -1,86 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "capnp/any.h"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "data_structures/queue.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
class Server {
public:
Server(const io::network::Endpoint &endpoint,
size_t workers_count = std::thread::hardware_concurrency());
Server(const Server &) = delete;
Server(Server &&) = delete;
Server &operator=(const Server &) = delete;
Server &operator=(Server &&) = delete;
void StopProcessingCalls();
const io::network::Endpoint &endpoint() const;
template <class TRequestResponse>
void Register(std::function<
void(const typename TRequestResponse::Request::Capnp::Reader &,
typename TRequestResponse::Response::Capnp::Builder *)>
callback) {
RpcCallback rpc;
rpc.req_type = TRequestResponse::Request::TypeInfo;
rpc.res_type = TRequestResponse::Response::TypeInfo;
rpc.callback = [callback = callback](const auto &reader, auto *builder) {
auto req_data =
reader.getData()
.template getAs<typename TRequestResponse::Request::Capnp>();
builder->setTypeId(TRequestResponse::Response::TypeInfo.id);
auto data_builder = builder->initData();
auto res_builder =
data_builder
.template initAs<typename TRequestResponse::Response::Capnp>();
callback(req_data, &res_builder);
};
auto callbacks_accessor = callbacks_.access();
auto got =
callbacks_accessor.insert(TRequestResponse::Request::TypeInfo.id, rpc);
CHECK(got.second) << "Callback for that message type already registered";
VLOG(12) << "[RpcServer] register " << rpc.req_type.name << " -> "
<< rpc.res_type.name;
}
template <typename TRequestResponse>
void UnRegister() {
const MessageType &type = TRequestResponse::Request::TypeInfo;
auto callbacks_accessor = callbacks_.access();
auto deleted = callbacks_accessor.remove(type.id);
CHECK(deleted) << "Trying to remove unknown message type callback";
}
private:
friend class Session;
struct RpcCallback {
MessageType req_type;
std::function<void(const capnp::Message::Reader &,
capnp::Message::Builder *)>
callback;
MessageType res_type;
};
ConcurrentMap<uint64_t, RpcCallback> callbacks_;
std::mutex mutex_;
// TODO (mferencevic): currently the RPC server is hardcoded not to use SSL
communication::ServerContext context_;
communication::Server<Session, Server> server_;
}; // namespace communication::rpc
} // namespace communication::rpc

View File

@ -32,41 +32,6 @@ DEFINE_string(properties_on_disk, "",
"Property names of properties which will be stored on available "
"disk. Property names have to be separated with comma (,).");
#ifndef MG_COMMUNITY
// Distributed master/worker flags.
DEFINE_VALIDATED_HIDDEN_int32(worker_id, 0,
"ID of a worker in a distributed system. Igored "
"in single-node.",
FLAG_IN_RANGE(0, 1 << gid::kWorkerIdSize));
DEFINE_HIDDEN_string(master_host, "0.0.0.0",
"For master node indicates the host served on. For worker "
"node indicates the master location.");
DEFINE_VALIDATED_HIDDEN_int32(
master_port, 0,
"For master node the port on which to serve. For "
"worker node indicates the master's port.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_HIDDEN_string(worker_host, "0.0.0.0",
"For worker node indicates the host served on. For master "
"node this flag is not used.");
DEFINE_VALIDATED_HIDDEN_int32(
worker_port, 0,
"For master node it's unused. For worker node "
"indicates the port on which to serve. If zero (default value), a port is "
"chosen at random. Sent to the master when registring worker node.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_VALIDATED_HIDDEN_int32(rpc_num_workers,
std::max(std::thread::hardware_concurrency(), 1U),
"Number of workers (RPC)",
FLAG_IN_RANGE(1, INT32_MAX));
DEFINE_VALIDATED_int32(recovering_cluster_size, 0,
"Number of workers (including master) in the "
"previously snapshooted/wal cluster.",
FLAG_IN_RANGE(0, INT32_MAX));
DEFINE_bool(dynamic_graph_partitioner_enabled, false,
"If the dynamic graph partitioner should be enabled.");
#endif
// clang-format off
database::Config::Config()
// Durability flags.
@ -81,17 +46,5 @@ database::Config::Config()
query_execution_time_sec{FLAGS_query_execution_time_sec},
// Data location.
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ","))
#ifndef MG_COMMUNITY
,
// Distributed flags.
dynamic_graph_partitioner_enabled{FLAGS_dynamic_graph_partitioner_enabled},
rpc_num_workers{FLAGS_rpc_num_workers},
worker_id{FLAGS_worker_id},
master_endpoint{FLAGS_master_host,
static_cast<uint16_t>(FLAGS_master_port)},
worker_endpoint{FLAGS_worker_host,
static_cast<uint16_t>(FLAGS_worker_port)},
recovering_cluster_size{FLAGS_recovering_cluster_size}
#endif
{}
// clang-format on

View File

@ -1,7 +1,5 @@
#include "database/counters.hpp"
#include "database/counters_rpc_messages.hpp"
namespace database {
int64_t SingleNodeCounters::Get(const std::string &name) {
@ -16,33 +14,4 @@ void SingleNodeCounters::Set(const std::string &name, int64_t value) {
if (!name_counter_pair.second) name_counter_pair.first->second.store(value);
}
MasterCounters::MasterCounters(communication::rpc::Server &server)
: rpc_server_(server) {
rpc_server_.Register<CountersGetRpc>(
[this](const auto &req_reader, auto *res_builder) {
CountersGetRes res(Get(req_reader.getName()));
res.Save(res_builder);
});
rpc_server_.Register<CountersSetRpc>(
[this](const auto &req_reader, auto *res_builder) {
Set(req_reader.getName(), req_reader.getValue());
return std::make_unique<CountersSetRes>();
});
}
WorkerCounters::WorkerCounters(
communication::rpc::ClientPool &master_client_pool)
: master_client_pool_(master_client_pool) {}
int64_t WorkerCounters::Get(const std::string &name) {
auto response = master_client_pool_.Call<CountersGetRpc>(name);
CHECK(response) << "CountersGetRpc failed";
return response->value;
}
void WorkerCounters::Set(const std::string &name, int64_t value) {
auto response = master_client_pool_.Call<CountersSetRpc>(name, value);
CHECK(response) << "CountersSetRpc failed";
}
} // namespace database

View File

@ -4,8 +4,6 @@
#include <cstdint>
#include <string>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
namespace database {
@ -41,25 +39,4 @@ class SingleNodeCounters : public Counters {
ConcurrentMap<std::string, std::atomic<int64_t>> counters_;
};
/** Implementation for distributed master. */
class MasterCounters : public SingleNodeCounters {
public:
explicit MasterCounters(communication::rpc::Server &server);
private:
communication::rpc::Server &rpc_server_;
};
/** Implementation for distributed worker. */
class WorkerCounters : public Counters {
public:
explicit WorkerCounters(communication::rpc::ClientPool &master_client_pool);
int64_t Get(const std::string &name) override;
void Set(const std::string &name, int64_t value) override;
private:
communication::rpc::ClientPool &master_client_pool_;
};
} // namespace database

View File

@ -1,23 +0,0 @@
#>cpp
#pragma once
#include <string>
#include "communication/rpc/messages.hpp"
#include "database/counters_rpc_messages.capnp.h"
cpp<#
(lcp:namespace database)
(lcp:capnp-namespace "database")
(lcp:define-rpc counters-get
(:request ((name "std::string")))
(:response ((value :int64_t))))
(lcp:define-rpc counters-set
(:request ((name "std::string")
(value :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; database

View File

@ -2,41 +2,14 @@
#include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
#include "database/storage_gc_master.hpp"
#include "database/graph_db_accessor.hpp"
#include "database/storage_gc_single_node.hpp"
#include "database/storage_gc_worker.hpp"
#include "distributed/bfs_rpc_clients.hpp"
#include "distributed/bfs_rpc_server.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_worker.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_server.hpp"
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "distributed/durability_rpc_server.hpp"
#include "distributed/index_rpc_server.hpp"
#include "distributed/plan_consumer.hpp"
#include "distributed/plan_dispatcher.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "distributed/token_sharing_rpc_server.hpp"
#include "distributed/transactional_cache_cleaner.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "distributed/updates_rpc_server.hpp"
#include "durability/paths.hpp"
#include "durability/recovery.hpp"
#include "durability/snapshooter.hpp"
#include "storage/concurrent_id_mapper_master.hpp"
#include "storage/concurrent_id_mapper_single_node.hpp"
#include "storage/concurrent_id_mapper_worker.hpp"
#include "transactions/engine_master.hpp"
#include "transactions/engine_single_node.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/file.hpp"
#include "utils/flag_validation.hpp"
@ -44,6 +17,7 @@ using namespace std::literals::chrono_literals;
using namespace storage;
namespace database {
namespace impl {
class PrivateBase : public GraphDb {
@ -76,22 +50,6 @@ class PrivateBase : public GraphDb {
std::make_unique<Storage>(WorkerId(), config_.properties_on_disk);
}
distributed::PullRpcClients &pull_clients() override {
LOG(FATAL) << "Remote pull clients only available in master.";
}
distributed::ProduceRpcServer &produce_server() override {
LOG(FATAL) << "Remote produce server only available in worker.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan consumer only available in distributed worker.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan dispatcher only available in distributed master.";
}
distributed::IndexRpcClients &index_rpc_clients() override {
LOG(FATAL) << "Index RPC clients only available in distributed master.";
}
protected:
std::unique_ptr<Storage> storage_ =
std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk);
@ -128,7 +86,6 @@ struct TypemapPack {
class SingleNode : public PrivateBase {
public:
explicit SingleNode(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override { return GraphDb::Type::SINGLE_NODE; }
IMPL_GETTERS
tx::SingleNodeEngine tx_engine_{&wal_};
@ -139,33 +96,6 @@ class SingleNode : public PrivateBase {
storage_->PropertiesOnDisk()};
database::SingleNodeCounters counters_;
std::vector<int> GetWorkerIds() const override { return {0}; }
distributed::BfsRpcServer &bfs_subcursor_server() override {
LOG(FATAL) << "Subcursor server not available in single-node.";
}
distributed::BfsRpcClients &bfs_subcursor_clients() override {
LOG(FATAL) << "Subcursor clients not available in single-node.";
}
distributed::DataRpcServer &data_server() override {
LOG(FATAL) << "Remote data server not available in single-node.";
}
distributed::DataRpcClients &data_clients() override {
LOG(FATAL) << "Remote data clients not available in single-node.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan Dispatcher not available in single-node.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan Consumer not available in single-node.";
}
distributed::UpdatesRpcServer &updates_server() override {
LOG(FATAL) << "Remote updates server not available in single-node.";
}
distributed::UpdatesRpcClients &updates_clients() override {
LOG(FATAL) << "Remote updates clients not available in single-node.";
}
distributed::DataManager &data_manager() override {
LOG(FATAL) << "Remote data manager not available in single-node.";
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
@ -175,170 +105,6 @@ class SingleNode : public PrivateBase {
}
};
#define IMPL_DISTRIBUTED_GETTERS \
std::vector<int> GetWorkerIds() const override { \
return coordination_.GetWorkerIds(); \
} \
distributed::BfsRpcServer &bfs_subcursor_server() override { \
return bfs_subcursor_server_; \
} \
distributed::BfsRpcClients &bfs_subcursor_clients() override { \
return bfs_subcursor_clients_; \
} \
distributed::DataRpcServer &data_server() override { return data_server_; } \
distributed::DataRpcClients &data_clients() override { \
return data_clients_; \
} \
distributed::UpdatesRpcServer &updates_server() override { \
return updates_server_; \
} \
distributed::UpdatesRpcClients &updates_clients() override { \
return updates_clients_; \
} \
distributed::DataManager &data_manager() override { return data_manager_; }
class Master : public PrivateBase {
public:
explicit Master(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_MASTER;
}
// Makes a local snapshot and forces the workers to do the same. Snapshot is
// written here only if workers sucesfully created their own snapshot
bool MakeSnapshot(GraphDbAccessor &accessor) override {
auto workers_snapshot =
durability_rpc_clients_.MakeSnapshot(accessor.transaction_id());
if (!workers_snapshot.get()) return false;
// This can be further optimized by creating master snapshot at the same
// time as workers snapshots but this forces us to delete the master
// snapshot if we succeed in creating it and workers somehow fail. Because
// we have an assumption that every snapshot that exists on master with some
// tx_id visibility also exists on workers
return PrivateBase::MakeSnapshot(accessor);
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanDispatcher &plan_dispatcher() override {
return plan_dispatcher_;
}
distributed::PullRpcClients &pull_clients() override { return pull_clients_; }
distributed::IndexRpcClients &index_rpc_clients() override {
return index_rpc_clients_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
}
communication::rpc::Server server_{
config_.master_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
tx::MasterEngine tx_engine_{server_, rpc_worker_clients_, &wal_};
distributed::MasterCoordination coordination_{server_.endpoint()};
std::unique_ptr<StorageGcMaster> storage_gc_ =
std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
TypemapPack<MasterConcurrentIdMapper> typemap_pack_{server_};
database::MasterCounters counters_{server_};
distributed::BfsSubcursorStorage subcursor_storage_{this};
distributed::BfsRpcServer bfs_subcursor_server_{this, &server_,
&subcursor_storage_};
distributed::BfsRpcClients bfs_subcursor_clients_{this, &subcursor_storage_,
&rpc_worker_clients_};
distributed::DurabilityRpcClients durability_rpc_clients_{
rpc_worker_clients_};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanDispatcher plan_dispatcher_{rpc_worker_clients_};
distributed::PullRpcClients pull_clients_{rpc_worker_clients_};
distributed::IndexRpcClients index_rpc_clients_{rpc_worker_clients_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::TransactionalCacheCleaner cache_cleaner_{
tx_engine_, updates_server_, data_manager_};
distributed::ClusterDiscoveryMaster cluster_discovery_{server_, coordination_,
rpc_worker_clients_};
distributed::TokenSharingRpcClients token_sharing_clients_{
&rpc_worker_clients_};
distributed::TokenSharingRpcServer token_sharing_server_{
this, config_.worker_id, &coordination_, &server_,
&token_sharing_clients_};
};
class Worker : public PrivateBase {
public:
explicit Worker(const Config &config) : PrivateBase(config) {
cluster_discovery_.RegisterWorker(config.worker_id);
}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_WORKER;
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanConsumer &plan_consumer() override { return plan_consumer_; }
distributed::ProduceRpcServer &produce_server() override {
return produce_server_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
}
communication::rpc::Server server_{
config_.worker_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
distributed::WorkerCoordination coordination_{server_,
config_.master_endpoint};
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
tx::WorkerEngine tx_engine_{rpc_worker_clients_.GetClientPool(0)};
std::unique_ptr<StorageGcWorker> storage_gc_ =
std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
TypemapPack<WorkerConcurrentIdMapper> typemap_pack_{
rpc_worker_clients_.GetClientPool(0)};
database::WorkerCounters counters_{rpc_worker_clients_.GetClientPool(0)};
distributed::BfsSubcursorStorage subcursor_storage_{this};
distributed::BfsRpcServer bfs_subcursor_server_{this, &server_,
&subcursor_storage_};
distributed::BfsRpcClients bfs_subcursor_clients_{this, &subcursor_storage_,
&rpc_worker_clients_};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanConsumer plan_consumer_{server_};
distributed::ProduceRpcServer produce_server_{*this, tx_engine_, server_,
plan_consumer_};
distributed::IndexRpcServer index_rpc_server_{*this, server_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::WorkerTransactionalCacheCleaner cache_cleaner_{
tx_engine_, server_, produce_server_, updates_server_, data_manager_};
distributed::DurabilityRpcServer durability_rpc_server_{*this, server_};
distributed::ClusterDiscoveryWorker cluster_discovery_{
server_, coordination_, rpc_worker_clients_.GetClientPool(0)};
distributed::TokenSharingRpcClients token_sharing_clients_{
&rpc_worker_clients_};
distributed::TokenSharingRpcServer token_sharing_server_{
this, config_.worker_id, &coordination_, &server_,
&token_sharing_clients_};
};
#undef IMPL_GETTERS
PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
: impl_(std::move(impl)) {
if (impl_->config_.durability_enabled)
@ -346,61 +112,18 @@ PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
// Durability recovery.
{
auto db_type = impl_->type();
// What we should recover.
std::experimental::optional<durability::RecoveryInfo>
required_recovery_info;
if (db_type == Type::DISTRIBUTED_WORKER) {
required_recovery_info = dynamic_cast<impl::Worker *>(impl_.get())
->cluster_discovery_.recovery_info();
}
// What we recover.
std::experimental::optional<durability::RecoveryInfo> recovery_info;
// Recover only if necessary.
if ((db_type != Type::DISTRIBUTED_WORKER &&
impl_->config_.db_recover_on_startup) ||
(db_type == Type::DISTRIBUTED_WORKER && required_recovery_info)) {
if (impl_->config_.db_recover_on_startup) {
recovery_info = durability::Recover(impl_->config_.durability_directory,
*impl_, required_recovery_info);
}
// Post-recovery setup and checking.
switch (db_type) {
case Type::DISTRIBUTED_MASTER:
dynamic_cast<impl::Master *>(impl_.get())
->coordination_.SetRecoveryInfo(recovery_info);
if (recovery_info) {
CHECK(impl_->config_.recovering_cluster_size > 0)
<< "Invalid cluster recovery size flag. Recovered cluster size "
"should be at least 1";
while (dynamic_cast<impl::Master *>(impl_.get())
->coordination_.CountRecoveredWorkers() !=
impl_->config_.recovering_cluster_size - 1) {
LOG(INFO) << "Waiting for workers to finish recovering..";
std::this_thread::sleep_for(2s);
}
}
// Start the dynamic graph partitioner inside token sharing server
if (impl_->config_.dynamic_graph_partitioner_enabled) {
dynamic_cast<impl::Master *>(impl_.get())
->token_sharing_server_.StartTokenSharing();
}
break;
case Type::DISTRIBUTED_WORKER:
if (required_recovery_info != recovery_info)
LOG(FATAL) << "Memgraph worker failed to recover the database state "
"recovered on the master";
dynamic_cast<impl::Worker *>(impl_.get())
->cluster_discovery_.NotifyWorkerRecovered();
break;
case Type::SINGLE_NODE:
break;
}
}
if (impl_->config_.durability_enabled) {
@ -434,14 +157,12 @@ PublicBase::~PublicBase() {
// If we are not a worker we can do a snapshot on exit if it's enabled. Doing
// this on the master forces workers to do the same through rpcs
if (impl_->config_.snapshot_on_exit &&
impl_->type() != Type::DISTRIBUTED_WORKER) {
if (impl_->config_.snapshot_on_exit) {
GraphDbAccessor dba(*this);
MakeSnapshot(dba);
}
}
GraphDb::Type PublicBase::type() const { return impl_->type(); }
Storage &PublicBase::storage() { return impl_->storage(); }
durability::WriteAheadLog &PublicBase::wal() { return impl_->wal(); }
tx::Engine &PublicBase::tx_engine() { return impl_->tx_engine(); }
@ -460,42 +181,6 @@ int PublicBase::WorkerId() const { return impl_->WorkerId(); }
std::vector<int> PublicBase::GetWorkerIds() const {
return impl_->GetWorkerIds();
}
distributed::BfsRpcServer &PublicBase::bfs_subcursor_server() {
return impl_->bfs_subcursor_server();
}
distributed::BfsRpcClients &PublicBase::bfs_subcursor_clients() {
return impl_->bfs_subcursor_clients();
}
distributed::DataRpcServer &PublicBase::data_server() {
return impl_->data_server();
}
distributed::DataRpcClients &PublicBase::data_clients() {
return impl_->data_clients();
}
distributed::PlanDispatcher &PublicBase::plan_dispatcher() {
return impl_->plan_dispatcher();
}
distributed::IndexRpcClients &PublicBase::index_rpc_clients() {
return impl_->index_rpc_clients();
}
distributed::PlanConsumer &PublicBase::plan_consumer() {
return impl_->plan_consumer();
}
distributed::PullRpcClients &PublicBase::pull_clients() {
return impl_->pull_clients();
}
distributed::ProduceRpcServer &PublicBase::produce_server() {
return impl_->produce_server();
}
distributed::UpdatesRpcServer &PublicBase::updates_server() {
return impl_->updates_server();
}
distributed::UpdatesRpcClients &PublicBase::updates_clients() {
return impl_->updates_clients();
}
distributed::DataManager &PublicBase::data_manager() {
return impl_->data_manager();
}
bool PublicBase::MakeSnapshot(GraphDbAccessor &accessor) {
return impl_->MakeSnapshot(accessor);
@ -524,31 +209,4 @@ MasterBase::~MasterBase() { snapshot_creator_ = nullptr; }
SingleNode::SingleNode(Config config)
: MasterBase(std::make_unique<impl::SingleNode>(config)) {}
Master::Master(Config config)
: MasterBase(std::make_unique<impl::Master>(config)) {}
io::network::Endpoint Master::endpoint() const {
return dynamic_cast<impl::Master *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Master::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Master *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
Worker::Worker(Config config)
: PublicBase(std::make_unique<impl::Worker>(config)) {}
io::network::Endpoint Worker::endpoint() const {
return dynamic_cast<impl::Worker *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Worker::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Worker *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
void Worker::WaitForShutdown() {
dynamic_cast<impl::Worker *>(impl_.get())->coordination_.WaitForShutdown();
}
} // namespace database

View File

@ -14,21 +14,6 @@
#include "transactions/engine.hpp"
#include "utils/scheduler.hpp"
namespace distributed {
class BfsRpcServer;
class BfsRpcClients;
class DataRpcServer;
class DataRpcClients;
class PlanDispatcher;
class PlanConsumer;
class PullRpcClients;
class ProduceRpcServer;
class UpdatesRpcServer;
class UpdatesRpcClients;
class DataManager;
class IndexRpcClients;
} // namespace distributed
namespace database {
/// Database configuration. Initialized from flags, but modifiable.
@ -84,12 +69,9 @@ struct Config {
*/
class GraphDb {
public:
enum class Type { SINGLE_NODE, DISTRIBUTED_MASTER, DISTRIBUTED_WORKER };
GraphDb() {}
virtual ~GraphDb() {}
virtual Type type() const = 0;
virtual Storage &storage() = 0;
virtual durability::WriteAheadLog &wal() = 0;
virtual tx::Engine &tx_engine() = 0;
@ -102,25 +84,6 @@ class GraphDb {
virtual int WorkerId() const = 0;
virtual std::vector<int> GetWorkerIds() const = 0;
// Supported only in distributed master and worker, not in single-node.
virtual distributed::BfsRpcServer &bfs_subcursor_server() = 0;
virtual distributed::BfsRpcClients &bfs_subcursor_clients() = 0;
virtual distributed::DataRpcServer &data_server() = 0;
virtual distributed::DataRpcClients &data_clients() = 0;
virtual distributed::UpdatesRpcServer &updates_server() = 0;
virtual distributed::UpdatesRpcClients &updates_clients() = 0;
virtual distributed::DataManager &data_manager() = 0;
// Supported only in distributed master.
virtual distributed::PullRpcClients &pull_clients() = 0;
virtual distributed::PlanDispatcher &plan_dispatcher() = 0;
virtual distributed::IndexRpcClients &index_rpc_clients() = 0;
// Supported only in distributed worker.
// TODO remove once end2end testing is possible.
virtual distributed::ProduceRpcServer &produce_server() = 0;
virtual distributed::PlanConsumer &plan_consumer() = 0;
// Makes a snapshot from the visibility of the given accessor
virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0;
@ -146,7 +109,6 @@ class PrivateBase;
// initialization and cleanup.
class PublicBase : public GraphDb {
public:
Type type() const override;
Storage &storage() override;
durability::WriteAheadLog &wal() override;
tx::Engine &tx_engine() override;
@ -157,18 +119,6 @@ class PublicBase : public GraphDb {
void CollectGarbage() override;
int WorkerId() const override;
std::vector<int> GetWorkerIds() const override;
distributed::BfsRpcServer &bfs_subcursor_server() override;
distributed::BfsRpcClients &bfs_subcursor_clients() override;
distributed::DataRpcServer &data_server() override;
distributed::DataRpcClients &data_clients() override;
distributed::PlanDispatcher &plan_dispatcher() override;
distributed::IndexRpcClients &index_rpc_clients() override;
distributed::PlanConsumer &plan_consumer() override;
distributed::PullRpcClients &pull_clients() override;
distributed::ProduceRpcServer &produce_server() override;
distributed::UpdatesRpcServer &updates_server() override;
distributed::UpdatesRpcClients &updates_clients() override;
distributed::DataManager &data_manager() override;
bool is_accepting_transactions() const { return is_accepting_transactions_; }
bool MakeSnapshot(GraphDbAccessor &accessor) override;
@ -201,24 +151,4 @@ class SingleNode : public MasterBase {
explicit SingleNode(Config config = Config());
};
class Master : public MasterBase {
public:
explicit Master(Config config = Config());
/** Gets this master's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
};
class Worker : public impl::PublicBase {
public:
explicit Worker(Config config = Config());
/** Gets this worker's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
void WaitForShutdown();
};
} // namespace database

View File

@ -4,9 +4,6 @@
#include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "storage/address_types.hpp"
#include "storage/edge.hpp"
#include "storage/edge_accessor.hpp"
@ -92,26 +89,6 @@ VertexAccessor GraphDbAccessor::InsertVertex(
return va;
}
VertexAccessor GraphDbAccessor::InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
CHECK(worker_id != db().WorkerId())
<< "Not allowed to call InsertVertexIntoRemote for local worker";
gid::Gid gid = db().updates_clients().CreateVertex(
worker_id, transaction_id(), labels, properties);
auto vertex = std::make_unique<Vertex>();
vertex->labels_ = labels;
for (auto &kv : properties) vertex->properties_.set(kv.first, kv.second);
db().data_manager()
.Elements<Vertex>(transaction_id())
.emplace(gid, nullptr, std::move(vertex));
return VertexAccessor({gid, worker_id}, *this);
}
std::experimental::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
gid::Gid gid, bool current_state) {
VertexAccessor record_accessor(db_.storage().LocalAddress<Vertex>(gid),
@ -144,8 +121,6 @@ EdgeAccessor GraphDbAccessor::FindEdge(gid::Gid gid, bool current_state) {
void GraphDbAccessor::BuildIndex(storage::Label label,
storage::Property property) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_.type() != GraphDb::Type::DISTRIBUTED_WORKER)
<< "BuildIndex invoked on worker";
db_.storage().index_build_tx_in_progress_.access().insert(transaction_.id_);
@ -192,13 +167,6 @@ void GraphDbAccessor::BuildIndex(storage::Label label,
std::experimental::optional<std::vector<utils::Future<bool>>>
index_rpc_completions;
// Notify all workers to start building an index if we are the master since
// they don't have to wait anymore
if (db_.type() == GraphDb::Type::DISTRIBUTED_MASTER) {
index_rpc_completions.emplace(db_.index_rpc_clients().GetBuildIndexFutures(
label, property, transaction_id(), this->db_.WorkerId()));
}
// Add transaction to the build_tx_in_progress as this transaction doesn't
// change data and shouldn't block other parallel index creations
auto read_transaction_id = dba.transaction().id_;
@ -352,14 +320,6 @@ bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
bool check_empty) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (!vertex_accessor.is_local()) {
auto address = vertex_accessor.address();
db().updates_clients().RemoveVertex(address.worker_id(), transaction_id(),
address.gid(), check_empty);
// We can't know if we are going to be able to remove vertex until deferred
// updates on a remote worker are executed
return true;
}
vertex_accessor.SwitchNew();
// it's possible the vertex was removed already in this transaction
// due to it getting matched multiple times by some patterns
@ -402,59 +362,33 @@ EdgeAccessor GraphDbAccessor::InsertEdge(
storage::EdgeAddress edge_address;
Vertex *from_updated;
if (from.is_local()) {
auto edge_accessor =
InsertOnlyEdge(from.address(), to.address(), edge_type, requested_gid);
edge_address = edge_accessor.address(),
from.SwitchNew();
from_updated = &from.update();
auto edge_accessor =
InsertOnlyEdge(from.address(), to.address(), edge_type, requested_gid);
edge_address = edge_accessor.address(),
// TODO when preparing WAL for distributed, most likely never use
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
// in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, edge_accessor.gid(), from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
from.SwitchNew();
from_updated = &from.update();
} else {
edge_address = db().updates_clients().CreateEdge(transaction_id(), from, to,
edge_type);
// TODO when preparing WAL for distributed, most likely never use
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
// in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, edge_accessor.gid(), from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
from_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(from.gid());
// Create an Edge and insert it into the Cache so we see it locally.
db().data_manager()
.Elements<Edge>(transaction_id())
.emplace(
edge_address.gid(), nullptr,
std::make_unique<Edge>(from.address(), to.address(), edge_type));
}
from_updated->out_.emplace(
db_.storage().LocalizedAddressIfPossible(to.address()), edge_address,
edge_type);
Vertex *to_updated;
if (to.is_local()) {
// ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew();
to_updated = &to.update();
} else {
// The RPC call for the `to` side is already handled if `from` is not local.
if (from.is_local() ||
from.address().worker_id() != to.address().worker_id()) {
db().updates_clients().AddInEdge(
transaction_id(), from,
db().storage().GlobalizedAddress(edge_address), to, edge_type);
}
to_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(to.gid());
}
// ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew();
to_updated = &to.update();
to_updated->in_.emplace(
db_.storage().LocalizedAddressIfPossible(from.address()), edge_address,
edge_type);
@ -492,35 +426,17 @@ int64_t GraphDbAccessor::EdgesCount() const {
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
bool remove_in_edge) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (edge.is_local()) {
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
edge.address().local()->remove(edge.current_, transaction_);
wal().Emplace(
database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
} else {
auto edge_addr = edge.GlobalAddress();
auto from_addr = db().storage().GlobalizedAddress(edge.from_addr());
CHECK(edge_addr.worker_id() == from_addr.worker_id())
<< "Edge and it's 'from' vertex not on the same worker";
auto to_addr = db().storage().GlobalizedAddress(edge.to_addr());
db().updates_clients().RemoveEdge(transaction_id(), edge_addr.worker_id(),
edge_addr.gid(), from_addr.gid(),
to_addr);
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
// Another RPC is necessary only if the first did not handle vertices on
// both sides.
if (edge_addr.worker_id() != to_addr.worker_id()) {
db().updates_clients().RemoveInEdge(transaction_id(), to_addr.worker_id(),
to_addr.gid(), edge_addr);
}
}
edge.address().local()->remove(edge.current_, transaction_);
wal().Emplace(database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
}
storage::Label GraphDbAccessor::Label(const std::string &label_name) {

View File

@ -9,7 +9,6 @@
#include "glog/logging.h"
#include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "query/typed_value.hpp"
#include "storage/address_types.hpp"
#include "storage/edge_accessor.hpp"
@ -78,13 +77,6 @@ class GraphDbAccessor {
VertexAccessor InsertVertex(std::experimental::optional<gid::Gid>
requested_gid = std::experimental::nullopt);
/** Creates a new Vertex on the given worker. It is NOT allowed to call this
* function with this worker's id. */
VertexAccessor InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/**
* Removes the vertex of the given accessor. If the vertex has any outgoing or
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to

View File

@ -3,22 +3,17 @@
#include "communication/bolt/v1/decoder/decoder.hpp"
#include "communication/bolt/v1/encoder/primitive_encoder.hpp"
#include "database/state_delta.capnp.h"
#include "durability/hashed_file_reader.hpp"
#include "durability/hashed_file_writer.hpp"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "storage/property_value.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:namespace database)
(lcp:capnp-namespace "database")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
(lcp:capnp-type-conversion "gid::Gid" "UInt64")
(lcp:capnp-type-conversion "storage::Label" "Storage.Common")
@ -108,7 +103,7 @@ in StateDeltas.")
"Defines StateDelta type. For each type the comment indicates which values
need to be stored. All deltas have the transaction_id member, so that's
omitted in the comment.")
(:serialize :capnp))
(:serialize))
#>cpp
StateDelta() = default;
StateDelta(const enum Type &type, tx::TransactionId tx_id)
@ -174,6 +169,6 @@ omitted in the comment.")
/// Applies CRUD delta to database accessor. Fails on other types of deltas
void Apply(GraphDbAccessor &dba) const;
cpp<#)
(:serialize :capnp))
(:serialize))
(lcp:pop-namespace) ;; database

View File

@ -6,7 +6,6 @@
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/storage.hpp"
#include "mvcc/version_list.hpp"
#include "stats/metrics.hpp"
#include "storage/deferred_deleter.hpp"
#include "storage/edge.hpp"
#include "storage/garbage_collector.hpp"

View File

@ -1,68 +0,0 @@
#pragma once
#include <mutex>
#include "database/storage_gc.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
namespace database {
class StorageGcMaster : public StorageGc {
public:
using StorageGc::StorageGc;
StorageGcMaster(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::Server &rpc_server,
distributed::MasterCoordination &coordination)
: StorageGc(storage, tx_engine, pause_sec),
rpc_server_(rpc_server),
coordination_(coordination) {
rpc_server_.Register<distributed::RanLocalGcRpc>(
[this](const auto &req_reader, auto *res_builder) {
distributed::RanLocalGcReq req;
req.Load(req_reader);
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
worker_safe_transaction_[req.worker_id] = req.local_oldest_active;
});
}
~StorageGcMaster() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
rpc_server_.UnRegister<distributed::RanLocalGcRpc>();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// Workers are sending information when it's safe to delete every
// transaction older than oldest_active from their perspective i.e. there
// won't exist another transaction in the future with id larger than or
// equal to oldest_active that might trigger a query into a commit log about
// the state of transactions which we are deleting.
auto safe_transaction = GetClogSafeTransaction(oldest_active);
if (safe_transaction) {
tx::TransactionId min_safe = *safe_transaction;
{
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
for (auto worker_id : coordination_.GetWorkerIds()) {
// Skip itself
if (worker_id == 0) continue;
min_safe = std::min(min_safe, worker_safe_transaction_[worker_id]);
}
}
// All workers reported back at least once
if (min_safe > 0) {
tx_engine_.GarbageCollectCommitLog(min_safe);
LOG(INFO) << "Clearing master commit log with tx: " << min_safe;
}
}
}
communication::rpc::Server &rpc_server_;
distributed::MasterCoordination &coordination_;
// Mapping of worker ids and oldest active transaction which is safe for
// deletion from worker perspective
std::unordered_map<int, tx::TransactionId> worker_safe_transaction_;
std::mutex worker_safe_transaction_mutex_;
};
} // namespace database

View File

@ -1,46 +0,0 @@
#pragma once
#include "communication/rpc/client_pool.hpp"
#include "database/storage_gc.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
#include "transactions/engine_worker.hpp"
#include "transactions/transaction.hpp"
namespace database {
class StorageGcWorker : public StorageGc {
public:
StorageGcWorker(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::ClientPool &master_client_pool,
int worker_id)
: StorageGc(storage, tx_engine, pause_sec),
master_client_pool_(master_client_pool),
worker_id_(worker_id) {}
~StorageGcWorker() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// We first need to delete transactions that we can delete to be sure that
// the locks are released as well. Otherwise some new transaction might
// try to acquire a lock which hasn't been released (if the transaction
// cache cleaner was not scheduled at this time), and take a look into the
// commit log which no longer contains that transaction id.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.ClearTransactionalCache(oldest_active);
auto safe_to_delete = GetClogSafeTransaction(oldest_active);
if (safe_to_delete) {
master_client_pool_.Call<distributed::RanLocalGcRpc>(*safe_to_delete,
worker_id_);
tx_engine_.GarbageCollectCommitLog(*safe_to_delete);
}
}
communication::rpc::ClientPool &master_client_pool_;
int worker_id_;
};
} // namespace database

View File

@ -1,178 +0,0 @@
#include "distributed/bfs_rpc_messages.hpp"
#include "distributed/data_manager.hpp"
#include "bfs_rpc_clients.hpp"
namespace distributed {
BfsRpcClients::BfsRpcClients(
database::GraphDb *db, distributed::BfsSubcursorStorage *subcursor_storage,
distributed::RpcWorkerClients *clients)
: db_(db), subcursor_storage_(subcursor_storage), clients_(clients) {}
std::unordered_map<int16_t, int64_t> BfsRpcClients::CreateBfsSubcursors(
tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
const std::vector<storage::EdgeType> &edge_types,
query::GraphView graph_view) {
auto futures = clients_->ExecuteOnWorkers<std::pair<int16_t, int64_t>>(
db_->WorkerId(),
[tx_id, direction, &edge_types, graph_view](int worker_id, auto &client) {
auto res = client.template Call<CreateBfsSubcursorRpc>(
tx_id, direction, edge_types, graph_view);
CHECK(res) << "CreateBfsSubcursor RPC failed!";
return std::make_pair(worker_id, res->member);
});
std::unordered_map<int16_t, int64_t> subcursor_ids;
subcursor_ids.emplace(
db_->WorkerId(),
subcursor_storage_->Create(tx_id, direction, edge_types, graph_view));
for (auto &future : futures) {
auto got = subcursor_ids.emplace(future.get());
CHECK(got.second) << "CreateBfsSubcursors failed: duplicate worker id";
}
return subcursor_ids;
}
void BfsRpcClients::RegisterSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<RegisterSubcursorsRpc>(subcursor_ids);
CHECK(res) << "RegisterSubcursors RPC failed!";
});
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->RegisterSubcursors(subcursor_ids);
}
void BfsRpcClients::RemoveBfsSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<RemoveBfsSubcursorRpc>(
subcursor_ids.at(worker_id));
CHECK(res) << "RemoveBfsSubcursor RPC failed!";
});
subcursor_storage_->Erase(subcursor_ids.at(db_->WorkerId()));
}
std::experimental::optional<VertexAccessor> BfsRpcClients::Pull(
int16_t worker_id, int64_t subcursor_id, database::GraphDbAccessor *dba) {
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_id)->Pull();
}
auto res =
clients_->GetClientPool(worker_id).Call<SubcursorPullRpc>(subcursor_id);
CHECK(res) << "SubcursorPull RPC failed!";
if (!res->vertex) return std::experimental::nullopt;
db_->data_manager()
.Elements<Vertex>(dba->transaction_id())
.emplace(res->vertex->global_address.gid(),
std::move(res->vertex->old_element_output),
std::move(res->vertex->new_element_output));
return VertexAccessor(res->vertex->global_address, *dba);
}
bool BfsRpcClients::ExpandLevel(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<bool>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res =
client.template Call<ExpandLevelRpc>(subcursor_ids.at(worker_id));
CHECK(res) << "ExpandLevel RPC failed!";
return res->member;
});
bool expanded =
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))->ExpandLevel();
for (auto &future : futures) {
expanded |= future.get();
}
return expanded;
}
void BfsRpcClients::SetSource(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress source_address) {
CHECK(source_address.is_remote())
<< "SetSource should be called with global address";
int worker_id = source_address.worker_id();
if (worker_id == db_->WorkerId()) {
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->SetSource(source_address);
} else {
auto res = clients_->GetClientPool(worker_id).Call<SetSourceRpc>(
subcursor_ids.at(worker_id), source_address);
CHECK(res) << "SetSourceRpc failed!";
}
}
bool BfsRpcClients::ExpandToRemoteVertex(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
EdgeAccessor edge, VertexAccessor vertex) {
CHECK(!vertex.is_local())
<< "ExpandToRemoteVertex should not be called with local vertex";
int worker_id = vertex.address().worker_id();
auto res = clients_->GetClientPool(worker_id).Call<ExpandToRemoteVertexRpc>(
subcursor_ids.at(worker_id), edge.GlobalAddress(),
vertex.GlobalAddress());
CHECK(res) << "ExpandToRemoteVertex RPC failed!";
return res->member;
}
PathSegment BuildPathSegment(ReconstructPathRes *res,
database::GraphDbAccessor *dba) {
std::vector<EdgeAccessor> edges;
for (auto &edge : res->edges) {
dba->db()
.data_manager()
.Elements<Edge>(dba->transaction_id())
.emplace(edge.global_address.gid(), std::move(edge.old_element_output),
std::move(edge.new_element_output));
edges.emplace_back(edge.global_address, *dba);
}
return PathSegment{edges, res->next_vertex, res->next_edge};
}
PathSegment BfsRpcClients::ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress vertex, database::GraphDbAccessor *dba) {
int worker_id = vertex.worker_id();
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
->ReconstructPath(vertex);
}
auto res = clients_->GetClientPool(worker_id).Call<ReconstructPathRpc>(
subcursor_ids.at(worker_id), vertex);
return BuildPathSegment(&res.value(), dba);
}
PathSegment BfsRpcClients::ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::EdgeAddress edge, database::GraphDbAccessor *dba) {
int worker_id = edge.worker_id();
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
->ReconstructPath(edge);
}
auto res = clients_->GetClientPool(worker_id).Call<ReconstructPathRpc>(
subcursor_ids.at(worker_id), edge);
return BuildPathSegment(&res.value(), dba);
}
void BfsRpcClients::PrepareForExpand(
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear) {
auto res = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [clear, &subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<PrepareForExpandRpc>(
subcursor_ids.at(worker_id), clear);
CHECK(res) << "PrepareForExpand RPC failed!";
});
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->PrepareForExpand(clear);
}
} // namespace distributed

View File

@ -1,62 +0,0 @@
/// @file
#pragma once
#include "distributed/bfs_subcursor.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "transactions/transaction.hpp"
namespace distributed {
/// Along with `BfsRpcServer`, this class is used to expose `BfsSubcursor`
/// interface over the network so that subcursors can communicate during the
/// traversal. It is just a thin wrapper making RPC calls that also takes
/// care for storing remote data into cache upon receival. Special care is taken
/// to avoid sending local RPCs. Instead, subcursor storage is accessed
/// directly.
class BfsRpcClients {
public:
BfsRpcClients(database::GraphDb *db,
distributed::BfsSubcursorStorage *subcursor_storage,
distributed::RpcWorkerClients *clients);
std::unordered_map<int16_t, int64_t> CreateBfsSubcursors(
tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
const std::vector<storage::EdgeType> &edge_types,
query::GraphView graph_view);
void RegisterSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
void RemoveBfsSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
std::experimental::optional<VertexAccessor> Pull(
int16_t worker_id, int64_t subcursor_id, database::GraphDbAccessor *dba);
bool ExpandLevel(const std::unordered_map<int16_t, int64_t> &subcursor_ids);
void SetSource(const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress source_address);
bool ExpandToRemoteVertex(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
EdgeAccessor edge, VertexAccessor vertex);
PathSegment ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::EdgeAddress edge, database::GraphDbAccessor *dba);
PathSegment ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress vertex, database::GraphDbAccessor *dba);
void PrepareForExpand(
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear);
private:
database::GraphDb *db_;
distributed::BfsSubcursorStorage *subcursor_storage_;
distributed::RpcWorkerClients *clients_;
};
} // namespace distributed

View File

@ -1,280 +0,0 @@
#>cpp
#pragma once
#include <tuple>
#include "communication/rpc/messages.hpp"
#include "distributed/bfs_rpc_messages.capnp.h"
#include "distributed/bfs_subcursor.hpp"
#include "query/plan/operator.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'ast "/query/frontend/ast/ast.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'query "/query/common.capnp")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "storage::EdgeAddress" "Storage.Address")
(lcp:capnp-type-conversion "storage::VertexAddress" "Storage.Address")
(defun save-element (builder member)
#>cpp
if (${member}) {
if constexpr (std::is_same<TElement, Vertex>::value) {
auto builder = ${builder}.initVertex();
SaveVertex(*${member}, &builder, worker_id);
} else {
auto builder = ${builder}.initEdge();
SaveEdge(*${member}, &builder, worker_id);
}
} else {
${builder}.setNull();
}
cpp<#)
(defun load-element (reader member)
(let ((output-member (cl-ppcre:regex-replace "input$" member "output")))
#>cpp
if (!${reader}.isNull()) {
if constexpr (std::is_same<TElement, Vertex>::value) {
const auto reader = ${reader}.getVertex();
${output-member} = LoadVertex(reader);
} else {
const auto reader = ${reader}.getEdge();
${output-member} = LoadEdge(reader);
}
}
cpp<#))
(lcp:define-struct (serialized-graph-element t-element) ()
((global-address "storage::Address<mvcc::VersionList<TElement>>"
:capnp-type "Storage.Address")
(old-element-input "TElement *"
:save-fun
"if (old_element_input) {
ar << true;
SaveElement(ar, *old_element_input, worker_id);
} else {
ar << false;
}"
:load-fun ""
:capnp-type '((null "Void") (vertex "Dis.Vertex") (edge "Dis.Edge"))
:capnp-save #'save-element :capnp-load #'load-element)
(old-element-output "std::unique_ptr<TElement>"
:save-fun ""
:load-fun
"bool has_old;
ar >> has_old;
if (has_old) {
if constexpr (std::is_same<TElement, Vertex>::value) {
old_element_output = std::move(LoadVertex(ar));
} else {
old_element_output = std::move(LoadEdge(ar));
}
}"
:capnp-save :dont-save)
(new-element-input "TElement *"
:save-fun
"if (new_element_input) {
ar << true;
SaveElement(ar, *new_element_input, worker_id);
} else {
ar << false;
}"
:load-fun ""
:capnp-type '((null "Void") (vertex "Dis.Vertex") (edge "Dis.Edge"))
:capnp-save #'save-element :capnp-load #'load-element)
(new-element-output "std::unique_ptr<TElement>"
:save-fun ""
:load-fun
"bool has_new;
ar >> has_new;
if (has_new) {
if constexpr (std::is_same<TElement, Vertex>::value) {
new_element_output = std::move(LoadVertex(ar));
} else {
new_element_output = std::move(LoadEdge(ar));
}
}"
:capnp-save :dont-save)
(worker-id :int16_t :save-fun "" :load-fun "" :capnp-save :dont-save))
(:public
#>cpp
SerializedGraphElement(storage::Address<mvcc::VersionList<TElement>> global_address,
TElement *old_element_input, TElement *new_element_input,
int16_t worker_id)
: global_address(global_address),
old_element_input(old_element_input),
old_element_output(nullptr),
new_element_input(new_element_input),
new_element_output(nullptr),
worker_id(worker_id) {
CHECK(global_address.is_remote())
<< "Only global addresses should be used with SerializedGraphElement";
}
SerializedGraphElement(const RecordAccessor<TElement> &accessor)
: SerializedGraphElement(accessor.GlobalAddress(), accessor.GetOld(),
accessor.GetNew(),
accessor.db_accessor().db().WorkerId()) {}
SerializedGraphElement() {}
cpp<#)
(:serialize :capnp :type-args '(vertex edge)))
#>cpp
using SerializedVertex = SerializedGraphElement<Vertex>;
using SerializedEdge = SerializedGraphElement<Edge>;
cpp<#
(lcp:define-rpc create-bfs-subcursor
(:request
((tx-id "tx::TransactionId" :capnp-type "UInt64")
(direction "query::EdgeAtom::Direction"
:capnp-type "Ast.EdgeAtom.Direction" :capnp-init nil
:capnp-save (lcp:capnp-save-enum "::query::capnp::EdgeAtom::Direction"
"query::EdgeAtom::Direction"
'(in out both))
:capnp-load (lcp:capnp-load-enum "::query::capnp::EdgeAtom::Direction"
"query::EdgeAtom::Direction"
'(in out both)))
;; TODO(mtomic): Why isn't edge-types serialized?
(edge-types "std::vector<storage::EdgeType>"
:save-fun "" :load-fun "" :capnp-save :dont-save)
(graph-view "query::GraphView"
:capnp-type "Query.GraphView" :capnp-init nil
:capnp-save (lcp:capnp-save-enum "::query::capnp::GraphView"
"query::GraphView"
'(old new))
:capnp-load (lcp:capnp-load-enum "::query::capnp::GraphView"
"query::GraphView"
'(old new)))))
(:response ((member :int64_t))))
(lcp:define-rpc register-subcursors
(:request ((subcursor-ids "std::unordered_map<int16_t, int64_t>"
:capnp-type "Utils.Map(Utils.BoxInt16, Utils.BoxInt64)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<utils::capnp::BoxInt16, utils::capnp::BoxInt64>(
${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
key_builder.setValue(entry.first);
auto value_builder = builder->initValue();
value_builder.setValue(entry.second);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<utils::capnp::BoxInt16, utils::capnp::BoxInt64>(
&${member}, ${reader},
[](const auto &reader) {
int16_t key = reader.getKey().getValue();
int64_t value = reader.getValue().getValue();
return std::make_pair(key, value);
});
cpp<#))))
(:response ()))
(lcp:define-rpc remove-bfs-subcursor
(:request ((member :int64_t)))
(:response ()))
(lcp:define-rpc expand-level
(:request ((member :int64_t)))
(:response ((member :bool))))
(lcp:define-rpc subcursor-pull
(:request ((member :int64_t)))
(:response ((vertex "std::experimental::optional<SerializedVertex>" :initarg :move
:capnp-type "Utils.Optional(SerializedGraphElement)"
:capnp-save (lcp:capnp-save-optional "capnp::SerializedGraphElement" "SerializedVertex")
:capnp-load (lcp:capnp-load-optional "capnp::SerializedGraphElement" "SerializedVertex")))))
(lcp:define-rpc set-source
(:request
((subcursor-id :int64_t)
(source "storage::VertexAddress")))
(:response ()))
(lcp:define-rpc expand-to-remote-vertex
(:request
((subcursor-id :int64_t)
(edge "storage::EdgeAddress")
(vertex "storage::VertexAddress")))
(:response ((member :bool))))
(lcp:define-rpc reconstruct-path
(:request
((subcursor-id :int64_t)
(vertex "std::experimental::optional<storage::VertexAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::VertexAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::VertexAddress"))
(edge "std::experimental::optional<storage::EdgeAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::EdgeAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::EdgeAddress")))
(:public
#>cpp
using Capnp = capnp::ReconstructPathReq;
static const communication::rpc::MessageType TypeInfo;
ReconstructPathReq() {}
ReconstructPathReq(int64_t subcursor_id, storage::VertexAddress vertex)
: subcursor_id(subcursor_id),
vertex(vertex),
edge(std::experimental::nullopt) {}
ReconstructPathReq(int64_t subcursor_id, storage::EdgeAddress edge)
: subcursor_id(subcursor_id),
vertex(std::experimental::nullopt),
edge(edge) {}
cpp<#))
(:response
((subcursor-id :int64_t ;; TODO(mtomic): Unused?
:save-fun "" :load-fun "" :capnp-save :dont-save)
(edges "std::vector<SerializedEdge>" :capnp-type "List(SerializedGraphElement)"
:capnp-save (lcp:capnp-save-vector "capnp::SerializedGraphElement" "SerializedEdge")
:capnp-load (lcp:capnp-load-vector "capnp::SerializedGraphElement" "SerializedEdge"))
(next-vertex "std::experimental::optional<storage::VertexAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::VertexAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::VertexAddress"))
(next-edge "std::experimental::optional<storage::EdgeAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::EdgeAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::EdgeAddress")))
(:public
#>cpp
using Capnp = capnp::ReconstructPathRes;
static const communication::rpc::MessageType TypeInfo;
ReconstructPathRes() {}
ReconstructPathRes(
const std::vector<EdgeAccessor> &edge_accessors,
std::experimental::optional<storage::VertexAddress> next_vertex,
std::experimental::optional<storage::EdgeAddress> next_edge)
: next_vertex(std::move(next_vertex)), next_edge(std::move(next_edge)) {
CHECK(!static_cast<bool>(next_vertex) || !static_cast<bool>(next_edge))
<< "At most one of `next_vertex` and `next_edge` should be set";
for (const auto &edge : edge_accessors) {
edges.emplace_back(edge);
}
}
cpp<#)))
(lcp:define-rpc prepare-for-expand
(:request
((subcursor-id :int64_t)
(clear :bool)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,126 +0,0 @@
/// @file
#pragma once
#include <map>
#include "communication/rpc/server.hpp"
#include "distributed/bfs_rpc_messages.hpp"
#include "distributed/bfs_subcursor.hpp"
namespace distributed {
/// Along with `BfsRpcClients`, this class is used to expose `BfsSubcursor`
/// interface over the network so that subcursors can communicate during the
/// traversal. It is just a thin wrapper forwarding RPC calls to subcursors in
/// subcursor storage.
class BfsRpcServer {
public:
BfsRpcServer(database::GraphDb *db, communication::rpc::Server *server,
BfsSubcursorStorage *subcursor_storage)
: db_(db), server_(server), subcursor_storage_(subcursor_storage) {
server_->Register<CreateBfsSubcursorRpc>(
[this](const auto &req_reader, auto *res_builder) {
CreateBfsSubcursorReq req;
req.Load(req_reader);
CreateBfsSubcursorRes res(subcursor_storage_->Create(
req.tx_id, req.direction, req.edge_types, req.graph_view));
res.Save(res_builder);
});
server_->Register<RegisterSubcursorsRpc>(
[this](const auto &req_reader, auto *res_builder) {
RegisterSubcursorsReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_ids.at(db_->WorkerId()))
->RegisterSubcursors(req.subcursor_ids);
RegisterSubcursorsRes res;
res.Save(res_builder);
});
server_->Register<RemoveBfsSubcursorRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveBfsSubcursorReq req;
req.Load(req_reader);
subcursor_storage_->Erase(req.member);
RemoveBfsSubcursorRes res;
res.Save(res_builder);
});
server_->Register<SetSourceRpc>(
[this](const auto &req_reader, auto *res_builder) {
SetSourceReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_id)->SetSource(req.source);
SetSourceRes res;
res.Save(res_builder);
});
server_->Register<ExpandLevelRpc>([this](const auto &req_reader,
auto *res_builder) {
ExpandLevelReq req;
req.Load(req_reader);
ExpandLevelRes res(subcursor_storage_->Get(req.member)->ExpandLevel());
res.Save(res_builder);
});
server_->Register<SubcursorPullRpc>(
[this](const auto &req_reader, auto *res_builder) {
SubcursorPullReq req;
req.Load(req_reader);
auto vertex = subcursor_storage_->Get(req.member)->Pull();
if (!vertex) {
SubcursorPullRes res;
res.Save(res_builder);
return;
}
SubcursorPullRes res(*vertex);
res.Save(res_builder);
});
server_->Register<ExpandToRemoteVertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
ExpandToRemoteVertexReq req;
req.Load(req_reader);
ExpandToRemoteVertexRes res(
subcursor_storage_->Get(req.subcursor_id)
->ExpandToLocalVertex(req.edge, req.vertex));
res.Save(res_builder);
});
server_->Register<ReconstructPathRpc>([this](const auto &req_reader,
auto *res_builder) {
ReconstructPathReq req;
req.Load(req_reader);
auto subcursor = subcursor_storage_->Get(req.subcursor_id);
PathSegment result;
if (req.vertex) {
result = subcursor->ReconstructPath(*req.vertex);
} else if (req.edge) {
result = subcursor->ReconstructPath(*req.edge);
} else {
LOG(FATAL) << "`edge` or `vertex` should be set in ReconstructPathReq";
}
ReconstructPathRes res(result.edges, result.next_vertex,
result.next_edge);
res.Save(res_builder);
});
server_->Register<PrepareForExpandRpc>([this](const auto &req_reader,
auto *res_builder) {
PrepareForExpandReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_id)->PrepareForExpand(req.clear);
PrepareForExpandRes res;
res.Save(res_builder);
});
}
private:
database::GraphDb *db_;
communication::rpc::Server *server_;
BfsSubcursorStorage *subcursor_storage_;
};
} // namespace distributed

View File

@ -1,196 +0,0 @@
#include <unordered_map>
#include "distributed/bfs_rpc_clients.hpp"
#include "query/plan/operator.hpp"
#include "storage/address_types.hpp"
#include "storage/vertex_accessor.hpp"
#include "bfs_subcursor.hpp"
namespace distributed {
using query::TypedValue;
ExpandBfsSubcursor::ExpandBfsSubcursor(
database::GraphDb *db, tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types, query::GraphView graph_view)
: dba_(*db, tx_id),
direction_(direction),
edge_types_(std::move(edge_types)),
graph_view_(graph_view) {
Reset();
}
void ExpandBfsSubcursor::Reset() {
pull_index_ = 0;
processed_.clear();
to_visit_current_.clear();
to_visit_next_.clear();
}
void ExpandBfsSubcursor::SetSource(storage::VertexAddress source_address) {
Reset();
auto source = VertexAccessor(source_address, dba_);
SwitchAccessor(source, graph_view_);
processed_.emplace(source, std::experimental::nullopt);
ExpandFromVertex(source);
}
void ExpandBfsSubcursor::PrepareForExpand(bool clear) {
if (clear) {
Reset();
} else {
std::swap(to_visit_current_, to_visit_next_);
to_visit_next_.clear();
}
}
bool ExpandBfsSubcursor::ExpandLevel() {
bool expanded = false;
for (const auto &expansion : to_visit_current_) {
expanded |= ExpandFromVertex(expansion.second);
}
pull_index_ = 0;
return expanded;
}
std::experimental::optional<VertexAccessor> ExpandBfsSubcursor::Pull() {
return pull_index_ < to_visit_next_.size()
? std::experimental::make_optional(
to_visit_next_[pull_index_++].second)
: std::experimental::nullopt;
}
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
VertexAccessor vertex) {
CHECK(vertex.address().is_local())
<< "ExpandToLocalVertex called with remote vertex";
edge = dba_.db().storage().LocalizedAddressIfPossible(edge);
SwitchAccessor(vertex, graph_view_);
std::lock_guard<std::mutex> lock(mutex_);
auto got = processed_.emplace(vertex, edge);
if (got.second) {
to_visit_next_.emplace_back(edge, vertex);
}
return got.second;
}
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
storage::VertexAddress vertex) {
auto vertex_accessor = VertexAccessor(vertex, dba_);
return ExpandToLocalVertex(edge, VertexAccessor(vertex, dba_));
}
PathSegment ExpandBfsSubcursor::ReconstructPath(
storage::EdgeAddress edge_address) {
EdgeAccessor edge(edge_address, dba_);
CHECK(edge.address().is_local()) << "ReconstructPath called with remote edge";
DCHECK(edge.from_addr().is_local()) << "`from` vertex should always be local";
DCHECK(!edge.to_addr().is_local()) << "`to` vertex should be remote when "
"calling ReconstructPath with edge";
PathSegment result;
result.edges.emplace_back(edge);
ReconstructPathHelper(edge.from(), &result);
return result;
}
PathSegment ExpandBfsSubcursor::ReconstructPath(
storage::VertexAddress vertex_addr) {
VertexAccessor vertex(vertex_addr, dba_);
CHECK(vertex.address().is_local())
<< "ReconstructPath called with remote vertex";
PathSegment result;
ReconstructPathHelper(vertex, &result);
return result;
}
void ExpandBfsSubcursor::ReconstructPathHelper(VertexAccessor vertex,
PathSegment *result) {
auto it = processed_.find(vertex);
CHECK(it != processed_.end())
<< "ReconstructPath called with unvisited vertex";
auto in_edge_address = it->second;
while (in_edge_address) {
// In-edge is stored on another worker. It should be returned to master from
// that worker, and path reconstruction should be continued there.
if (in_edge_address->is_remote()) {
result->next_edge = in_edge_address;
break;
}
result->edges.emplace_back(*in_edge_address, dba_);
auto &in_edge = result->edges.back();
auto next_vertex_address =
in_edge.from_is(vertex) ? in_edge.to_addr() : in_edge.from_addr();
// We own the in-edge, but the next vertex on the path is stored on another
// worker.
if (next_vertex_address.is_remote()) {
result->next_vertex = next_vertex_address;
break;
}
vertex = VertexAccessor(next_vertex_address, dba_);
in_edge_address = processed_[vertex];
}
}
bool ExpandBfsSubcursor::ExpandToVertex(EdgeAccessor edge,
VertexAccessor vertex) {
// TODO(mtomic): lambda filtering in distributed
return vertex.is_local()
? ExpandToLocalVertex(edge.address(), vertex)
: dba_.db().bfs_subcursor_clients().ExpandToRemoteVertex(
subcursor_ids_, edge, vertex);
}
bool ExpandBfsSubcursor::ExpandFromVertex(VertexAccessor vertex) {
bool expanded = false;
if (direction_ != query::EdgeAtom::Direction::IN) {
for (const EdgeAccessor &edge : vertex.out(&edge_types_))
expanded |= ExpandToVertex(edge, edge.to());
}
if (direction_ != query::EdgeAtom::Direction::OUT) {
for (const EdgeAccessor &edge : vertex.in(&edge_types_))
expanded |= ExpandToVertex(edge, edge.from());
}
return expanded;
}
BfsSubcursorStorage::BfsSubcursorStorage(database::GraphDb *db) : db_(db) {}
int64_t BfsSubcursorStorage::Create(tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view) {
std::lock_guard<std::mutex> lock(mutex_);
int64_t id = next_subcursor_id_++;
auto got = storage_.emplace(
id, std::make_unique<ExpandBfsSubcursor>(
db_, tx_id, direction, std::move(edge_types), graph_view));
CHECK(got.second) << "Subcursor with ID " << id << " already exists";
return id;
}
void BfsSubcursorStorage::Erase(int64_t subcursor_id) {
std::lock_guard<std::mutex> lock(mutex_);
auto removed = storage_.erase(subcursor_id);
CHECK(removed == 1) << "Subcursor with ID " << subcursor_id << " not found";
}
ExpandBfsSubcursor *BfsSubcursorStorage::Get(int64_t subcursor_id) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = storage_.find(subcursor_id);
CHECK(it != storage_.end())
<< "Subcursor with ID " << subcursor_id << " not found";
return it->second.get();
}
} // namespace distributed

View File

@ -1,141 +0,0 @@
/// @file
#pragma once
#include <map>
#include <memory>
#include <unordered_map>
#include "glog/logging.h"
#include "query/plan/operator.hpp"
namespace database {
class GraphDb;
}
namespace distributed {
/// Path from BFS source to a vertex might span multiple workers. This struct
/// stores information describing segment of a path stored on a worker and
/// information necessary to continue path reconstruction on another worker.
struct PathSegment {
std::vector<EdgeAccessor> edges;
std::experimental::optional<storage::VertexAddress> next_vertex;
std::experimental::optional<storage::EdgeAddress> next_edge;
};
/// Class storing the worker-local state of distributed BFS traversal. For each
/// traversal (uniquely identified by cursor id), there is one instance of this
/// class per worker, and those instances communicate via RPC calls.
class ExpandBfsSubcursor {
public:
ExpandBfsSubcursor(database::GraphDb *db, tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view);
// Stores subcursor ids of other workers.
void RegisterSubcursors(std::unordered_map<int16_t, int64_t> subcursor_ids) {
subcursor_ids_ = std::move(subcursor_ids);
}
/// Sets the source to be used for new expansion.
void SetSource(storage::VertexAddress source_address);
/// Notifies the subcursor that a new expansion should take place.
/// `to_visit_next_` must be moved to `to_visit_current_` synchronously for
/// all subcursors participating in expansion to avoid race condition with
/// `ExpandToRemoteVertex` RPC requests. Also used before setting new source
/// with `clear` set to true, to avoid a race condition similar to one
/// described above.
///
/// @param clear if set to true, `Reset` will be called instead of moving
/// `to_visit_next_`
void PrepareForExpand(bool clear);
/// Expands the BFS frontier once. Returns true if there was a successful
/// expansion.
bool ExpandLevel();
/// Pulls the next vertex in the current BFS frontier, if there is one.
std::experimental::optional<VertexAccessor> Pull();
/// Expands to a local vertex, if it wasn't already visited. Returns true if
/// expansion was successful.
bool ExpandToLocalVertex(storage::EdgeAddress edge, VertexAccessor vertex);
bool ExpandToLocalVertex(storage::EdgeAddress edge,
storage::VertexAddress vertex);
/// Reconstruct the part of path ending with given edge, stored on this
/// worker.
PathSegment ReconstructPath(storage::EdgeAddress edge_address);
/// Reconstruct the part of path to given vertex stored on this worker.
PathSegment ReconstructPath(storage::VertexAddress vertex_addr);
private:
/// Used to reset subcursor state before starting expansion from new source.
void Reset();
/// Expands to a local or remote vertex, returns true if expansion was
/// successful.
bool ExpandToVertex(EdgeAccessor edge, VertexAccessor vertex);
/// Tries to expand to all vertices connected to given one and returns true if
/// any of them was successful.
bool ExpandFromVertex(VertexAccessor vertex);
/// Helper for path reconstruction doing the actual work.
void ReconstructPathHelper(VertexAccessor vertex, PathSegment *result);
database::GraphDbAccessor dba_;
/// IDs of subcursors on other workers, used when sending RPCs.
std::unordered_map<int16_t, int64_t> subcursor_ids_;
query::EdgeAtom::Direction direction_;
std::vector<storage::EdgeType> edge_types_;
query::GraphView graph_view_;
/// Mutex protecting `to_visit_next_` and `processed_`, because there is a
/// race between expansions done locally using `ExpandToLocalVertex` and
/// incoming `ExpandToRemoteVertex` RPCs.
std::mutex mutex_;
/// List of visited vertices and their incoming edges. Local address is stored
/// for local edges, global address for remote edges.
std::unordered_map<VertexAccessor,
std::experimental::optional<storage::EdgeAddress>>
processed_;
/// List of vertices at the current expansion level.
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>>
to_visit_current_;
/// List of unvisited vertices reachable from current expansion level.
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>> to_visit_next_;
/// Index of the vertex from `to_visit_next_` to return on next pull.
size_t pull_index_;
};
/// Thread-safe storage for BFS subcursors.
class BfsSubcursorStorage {
public:
explicit BfsSubcursorStorage(database::GraphDb *db);
int64_t Create(tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view);
void Erase(int64_t subcursor_id);
ExpandBfsSubcursor *Get(int64_t subcursor_id);
private:
database::GraphDb *db_;
std::mutex mutex_;
std::map<int64_t, std::unique_ptr<ExpandBfsSubcursor>> storage_;
int64_t next_subcursor_id_{0};
};
} // namespace distributed

View File

@ -1,99 +0,0 @@
#include "glog/logging.h"
#include "database/storage.hpp"
#include "distributed/cache.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <typename TRecord>
TRecord *Cache<TRecord>::FindNew(gid::Gid gid) {
std::lock_guard<std::mutex> guard{lock_};
auto found = cache_.find(gid);
DCHECK(found != cache_.end())
<< "FindNew for uninitialized remote Vertex/Edge";
auto &pair = found->second;
if (!pair.second) {
pair.second = std::unique_ptr<TRecord>(pair.first->CloneData());
}
return pair.second.get();
}
template <typename TRecord>
void Cache<TRecord>::FindSetOldNew(tx::TransactionId tx_id, int worker_id,
gid::Gid gid, TRecord *&old_record,
TRecord *&new_record) {
{
std::lock_guard<std::mutex> guard(lock_);
auto found = cache_.find(gid);
if (found != cache_.end()) {
old_record = found->second.first.get();
new_record = found->second.second.get();
return;
}
}
auto remote = data_clients_.RemoteElement<TRecord>(worker_id, tx_id, gid);
LocalizeAddresses(*remote);
// This logic is a bit strange because we need to make sure that someone
// else didn't get a response and updated the cache before we did and we
// need a lock for that, but we also need to check if we can now return
// that result - otherwise we could get incosistent results for remote
// FindSetOldNew
std::lock_guard<std::mutex> guard(lock_);
auto it_pair = cache_.emplace(
gid, std::make_pair<rec_uptr, rec_uptr>(std::move(remote), nullptr));
old_record = it_pair.first->second.first.get();
new_record = it_pair.first->second.second.get();
}
template <typename TRecord>
void Cache<TRecord>::emplace(gid::Gid gid, rec_uptr old_record,
rec_uptr new_record) {
if (old_record) LocalizeAddresses(*old_record);
if (new_record) LocalizeAddresses(*new_record);
std::lock_guard<std::mutex> guard{lock_};
// We can't replace existing data because some accessors might be using
// it.
// TODO - consider if it's necessary and OK to copy just the data content.
auto found = cache_.find(gid);
if (found != cache_.end())
return;
else
cache_[gid] = std::make_pair(std::move(old_record), std::move(new_record));
}
template <typename TRecord>
void Cache<TRecord>::ClearCache() {
std::lock_guard<std::mutex> guard{lock_};
cache_.clear();
}
template <>
void Cache<Vertex>::LocalizeAddresses(Vertex &vertex) {
auto localize_edges = [this](auto &edges) {
for (auto &element : edges) {
element.vertex = storage_.LocalizedAddressIfPossible(element.vertex);
element.edge = storage_.LocalizedAddressIfPossible(element.edge);
}
};
localize_edges(vertex.in_.storage());
localize_edges(vertex.out_.storage());
}
template <>
void Cache<Edge>::LocalizeAddresses(Edge &edge) {
edge.from_ = storage_.LocalizedAddressIfPossible(edge.from_);
edge.to_ = storage_.LocalizedAddressIfPossible(edge.to_);
}
template class Cache<Vertex>;
template class Cache<Edge>;
} // namespace distributed

View File

@ -1,62 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "distributed/data_rpc_clients.hpp"
#include "storage/gid.hpp"
namespace database {
class Storage;
}
namespace distributed {
/**
* Used for caching Vertices and Edges that are stored on another worker in a
* distributed system. Maps global IDs to (old, new) Vertex/Edge pointer
* pairs. It is possible that either "old" or "new" are nullptrs, but at
* least one must be not-null. The Cache is the owner of TRecord
* objects it points to.
*
* @tparam TRecord - Edge or Vertex
*/
template <typename TRecord>
class Cache {
using rec_uptr = std::unique_ptr<TRecord>;
public:
Cache(database::Storage &storage, distributed::DataRpcClients &data_clients)
: storage_(storage), data_clients_(data_clients) {}
/// Returns the new data for the given ID. Creates it (as copy of old) if
/// necessary.
TRecord *FindNew(gid::Gid gid);
/// For the Vertex/Edge with the given global ID, looks for the data visible
/// from the given transaction's ID and command ID, and caches it. Sets the
/// given pointers to point to the fetched data. Analogue to
/// mvcc::VersionList::find_set_old_new.
void FindSetOldNew(tx::TransactionId tx_id, int worker_id, gid::Gid gid,
TRecord *&old_record, TRecord *&new_record);
/// Sets the given records as (new, old) data for the given gid.
void emplace(gid::Gid gid, rec_uptr old_record, rec_uptr new_record);
/// Removes all the data from the cache.
void ClearCache();
private:
database::Storage &storage_;
std::mutex lock_;
distributed::DataRpcClients &data_clients_;
// TODO it'd be better if we had VertexData and EdgeData in here, as opposed
// to Vertex and Edge.
std::unordered_map<gid::Gid, std::pair<rec_uptr, rec_uptr>> cache_;
// Localizes all the addresses in the record.
void LocalizeAddresses(TRecord &record);
};
} // namespace distributed

View File

@ -1,42 +0,0 @@
#include "communication/rpc/client_pool.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryMaster::ClusterDiscoveryMaster(
Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients)
: server_(server),
coordination_(coordination),
rpc_worker_clients_(rpc_worker_clients) {
server_.Register<RegisterWorkerRpc>([this](const auto &req_reader,
auto *res_builder) {
RegisterWorkerReq req;
req.Load(req_reader);
bool registration_successful =
this->coordination_.RegisterWorker(req.desired_worker_id, req.endpoint);
if (registration_successful) {
rpc_worker_clients_.ExecuteOnWorkers<void>(
0, [req](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<ClusterDiscoveryRpc>(
req.desired_worker_id, req.endpoint);
CHECK(result) << "ClusterDiscoveryRpc failed";
});
}
RegisterWorkerRes res(registration_successful,
this->coordination_.RecoveryInfo(),
this->coordination_.GetWorkers());
res.Save(res_builder);
});
server_.Register<NotifyWorkerRecoveredRpc>(
[this](const auto &req_reader, auto *res_builder) {
this->coordination_.WorkerRecovered(req_reader.getMember());
});
}
} // namespace distributed

View File

@ -1,27 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/rpc_worker_clients.hpp"
namespace distributed {
using Server = communication::rpc::Server;
/** Handle cluster discovery on master.
*
* Cluster discovery on master handles worker registration and broadcasts new
* worker information to already registered workers, and already registered
* worker information to the new worker.
*/
class ClusterDiscoveryMaster final {
public:
ClusterDiscoveryMaster(Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients);
private:
Server &server_;
MasterCoordination &coordination_;
RpcWorkerClients &rpc_worker_clients_;
};
} // namespace distributed

View File

@ -1,41 +0,0 @@
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryWorker::ClusterDiscoveryWorker(
Server &server, WorkerCoordination &coordination,
communication::rpc::ClientPool &client_pool)
: server_(server), coordination_(coordination), client_pool_(client_pool) {
server_.Register<ClusterDiscoveryRpc>(
[this](const auto &req_reader, auto *res_builder) {
ClusterDiscoveryReq req;
req.Load(req_reader);
this->coordination_.RegisterWorker(req.worker_id, req.endpoint);
});
}
void ClusterDiscoveryWorker::RegisterWorker(int worker_id) {
auto result =
client_pool_.Call<RegisterWorkerRpc>(worker_id, server_.endpoint());
CHECK(result) << "RegisterWorkerRpc failed";
CHECK(result->registration_successful)
<< "Unable to assign requested ID (" << worker_id << ") to worker!";
worker_id_ = worker_id;
for (auto &kv : result->workers) {
coordination_.RegisterWorker(kv.first, kv.second);
}
recovery_info_ = result->recovery_info;
}
void ClusterDiscoveryWorker::NotifyWorkerRecovered() {
CHECK(worker_id_ >= 0)
<< "Workers id is not yet assigned, preform registration before "
"notifying that the recovery finished";
auto result = client_pool_.Call<NotifyWorkerRecoveredRpc>(worker_id_);
CHECK(result) << "NotifyWorkerRecoveredRpc failed";
}
} // namespace distributed

View File

@ -1,50 +0,0 @@
#pragma once
#include <experimental/optional>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/server.hpp"
#include "distributed/coordination_worker.hpp"
#include "durability/recovery.hpp"
namespace distributed {
using Server = communication::rpc::Server;
using ClientPool = communication::rpc::ClientPool;
/** Handle cluster discovery on worker.
*
* Cluster discovery on worker handles worker registration by sending an rpc
* request to master and processes received rpc response with other worker
* information.
*/
class ClusterDiscoveryWorker final {
public:
ClusterDiscoveryWorker(Server &server, WorkerCoordination &coordination,
ClientPool &client_pool);
/**
* Registers a worker with the master.
*
* @param worker_id - Desired ID. If master can't assign the desired worker
* id, worker will exit.
*/
void RegisterWorker(int worker_id);
/**
* Notifies the master that the worker finished recovering. Assumes that the
* worker was already registered with master.
*/
void NotifyWorkerRecovered();
/** Returns the recovery info. Valid only after registration. */
auto recovery_info() const { return recovery_info_; }
private:
int worker_id_{-1};
Server &server_;
WorkerCoordination &coordination_;
communication::rpc::ClientPool &client_pool_;
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,34 +0,0 @@
#include "glog/logging.h"
#include "distributed/coordination.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
Coordination::Coordination(const Endpoint &master_endpoint) {
// The master is always worker 0.
workers_.emplace(0, master_endpoint);
}
Endpoint Coordination::GetEndpoint(int worker_id) {
auto found = workers_.find(worker_id);
CHECK(found != workers_.end()) << "No endpoint registered for worker id: "
<< worker_id;
return found->second;
}
std::vector<int> Coordination::GetWorkerIds() const {
std::vector<int> worker_ids;
for (auto worker : workers_) worker_ids.push_back(worker.first);
return worker_ids;
}
void Coordination::AddWorker(int worker_id, Endpoint endpoint) {
workers_.emplace(worker_id, endpoint);
}
std::unordered_map<int, Endpoint> Coordination::GetWorkers() {
return workers_;
}
} // namespace distributed

View File

@ -1,36 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "io/network/endpoint.hpp"
namespace distributed {
/** Coordination base class. This class is not thread safe. */
class Coordination {
public:
explicit Coordination(const io::network::Endpoint &master_endpoint);
/** Gets the endpoint for the given worker ID from the master. */
io::network::Endpoint GetEndpoint(int worker_id);
/** Returns all workers id, this includes master id(0). */
std::vector<int> GetWorkerIds() const;
/** Gets the mapping of worker id to worker endpoint including master (worker
* id = 0).
*/
std::unordered_map<int, io::network::Endpoint> GetWorkers();
protected:
~Coordination() {}
/** Adds a worker to coordination. */
void AddWorker(int worker_id, io::network::Endpoint endpoint);
private:
std::unordered_map<int, io::network::Endpoint> workers_;
};
} // namespace distributed

View File

@ -1,92 +0,0 @@
#include <chrono>
#include <thread>
#include "glog/logging.h"
#include "communication/rpc/client.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_rpc_messages.hpp"
#include "io/network/utils.hpp"
namespace distributed {
MasterCoordination::MasterCoordination(const Endpoint &master_endpoint)
: Coordination(master_endpoint) {}
bool MasterCoordination::RegisterWorker(int desired_worker_id,
Endpoint endpoint) {
// Worker's can't register before the recovery phase on the master is done to
// ensure the whole cluster is in a consistent state.
while (true) {
{
std::lock_guard<std::mutex> guard(lock_);
if (recovery_done_) break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
// Check if the desired worker id already exists.
if (workers.find(desired_worker_id) != workers.end()) {
LOG(WARNING) << "Unable to assign requested ID (" << desired_worker_id
<< ") to worker at: " << endpoint;
// If the desired worker ID is already assigned, return -1 and don't add
// that worker to master coordination.
return false;
}
AddWorker(desired_worker_id, endpoint);
return true;
}
void MasterCoordination::WorkerRecovered(int worker_id) {
CHECK(recovered_workers_.insert(worker_id).second)
<< "Worker already notified about finishing recovery";
}
Endpoint MasterCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
MasterCoordination::~MasterCoordination() {
using namespace std::chrono_literals;
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
communication::rpc::Client client(kv.second);
auto result = client.Call<StopWorkerRpc>();
CHECK(result) << "StopWorkerRpc failed for worker: " << kv.first;
}
// Make sure all workers have died.
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
while (io::network::CanEstablishConnection(kv.second))
std::this_thread::sleep_for(0.5s);
}
}
void MasterCoordination::SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info) {
std::lock_guard<std::mutex> guard(lock_);
recovery_done_ = true;
recovery_info_ = info;
}
int MasterCoordination::CountRecoveredWorkers() const {
return recovered_workers_.size();
}
std::experimental::optional<durability::RecoveryInfo>
MasterCoordination::RecoveryInfo() const {
std::lock_guard<std::mutex> guard(lock_);
CHECK(recovery_done_) << "RecoveryInfo requested before it's available";
return recovery_info_;
}
} // namespace distributed

View File

@ -1,61 +0,0 @@
#pragma once
#include <experimental/optional>
#include <mutex>
#include <set>
#include <unordered_map>
#include "distributed/coordination.hpp"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Master side. */
class MasterCoordination final : public Coordination {
public:
explicit MasterCoordination(const Endpoint &master_endpoint);
/** Shuts down all the workers and this master server. */
~MasterCoordination();
/** Registers a new worker with this master coordination.
*
* @param desired_worker_id - The ID the worker would like to have.
* @return True if the desired ID for the worker is available, or false
* if the desired ID is already taken.
*/
bool RegisterWorker(int desired_worker_id, Endpoint endpoint);
/*
* Worker `worker_id` finished with recovering, adds it to the set of
* recovered workers.
*/
void WorkerRecovered(int worker_id);
Endpoint GetEndpoint(int worker_id);
/// Sets the recovery info. nullopt indicates nothing was recovered.
void SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info);
std::experimental::optional<durability::RecoveryInfo> RecoveryInfo() const;
int CountRecoveredWorkers() const;
private:
// Most master functions aren't thread-safe.
mutable std::mutex lock_;
/// Durabiliry recovery info.
/// Indicates if the recovery phase is done.
bool recovery_done_{false};
/// Set of workers that finished sucesfully recovering
std::set<int> recovered_workers_;
/// If nullopt nothing was recovered.
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,72 +0,0 @@
#>cpp
#pragma once
#include <experimental/optional>
#include <unordered_map>
#include "communication/rpc/messages.hpp"
#include "distributed/coordination_rpc_messages.capnp.h"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'dur "/durability/recovery.capnp")
(lcp:capnp-import 'io "/io/network/endpoint.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:define-rpc register-worker
(:request
((desired-worker-id :int16_t)
(endpoint "io::network::Endpoint" :capnp-type "Io.Endpoint")))
(:response
((registration-successful :bool)
(recovery-info "std::experimental::optional<durability::RecoveryInfo>"
:capnp-type "Utils.Optional(Dur.RecoveryInfo)"
:capnp-save (lcp:capnp-save-optional "durability::capnp::RecoveryInfo"
"durability::RecoveryInfo")
:capnp-load (lcp:capnp-load-optional "durability::capnp::RecoveryInfo"
"durability::RecoveryInfo"))
(workers "std::unordered_map<int, io::network::Endpoint>"
:capnp-type "Utils.Map(Utils.BoxInt16, Io.Endpoint)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<utils::capnp::BoxInt16, io::network::capnp::Endpoint>(${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
key_builder.setValue(entry.first);
auto value_builder = builder->initValue();
entry.second.Save(&value_builder);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<utils::capnp::BoxInt16, io::network::capnp::Endpoint>(&${member}, ${reader},
[](const auto &reader) {
io::network::Endpoint value;
value.Load(reader.getValue());
return std::make_pair(reader.getKey().getValue(), value);
});
cpp<#)))))
(lcp:define-rpc cluster-discovery
(:request
((worker-id :int16_t)
(endpoint "io::network::Endpoint" :capnp-type "Io.Endpoint")))
(:response ()))
(lcp:define-rpc stop-worker
(:request ())
(:response ()))
(lcp:define-rpc notify-worker-recovered
(:request ((member :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,46 +0,0 @@
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "glog/logging.h"
#include "distributed/coordination_rpc_messages.hpp"
#include "distributed/coordination_worker.hpp"
namespace distributed {
using namespace std::literals::chrono_literals;
WorkerCoordination::WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint)
: Coordination(master_endpoint), server_(server) {}
void WorkerCoordination::RegisterWorker(int worker_id, Endpoint endpoint) {
std::lock_guard<std::mutex> guard(lock_);
AddWorker(worker_id, endpoint);
}
void WorkerCoordination::WaitForShutdown() {
using namespace std::chrono_literals;
std::mutex mutex;
std::condition_variable cv;
bool shutdown = false;
server_.Register<StopWorkerRpc>([&](const auto &req_reader, auto *res_builder) {
std::unique_lock<std::mutex> lk(mutex);
shutdown = true;
lk.unlock();
cv.notify_one();
});
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&shutdown] { return shutdown; });
}
io::network::Endpoint WorkerCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
} // namespace distributed

View File

@ -1,33 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "communication/rpc/server.hpp"
#include "distributed/coordination.hpp"
namespace distributed {
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Worker side. */
class WorkerCoordination final : public Coordination {
using Endpoint = io::network::Endpoint;
public:
WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint);
/** Registers the worker with the given endpoint. */
void RegisterWorker(int worker_id, Endpoint endpoint);
/** Starts listening for a remote shutdown command (issued by the master).
* Blocks the calling thread until that has finished. */
void WaitForShutdown();
Endpoint GetEndpoint(int worker_id);
private:
communication::rpc::Server &server_;
mutable std::mutex lock_;
};
} // namespace distributed

View File

@ -1,54 +0,0 @@
#include "database/storage.hpp"
#include "distributed/data_manager.hpp"
namespace distributed {
template <typename TRecord>
Cache<TRecord> &DataManager::GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found != access.end()) return found->second;
return access
.emplace(
tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_.storage()), std::ref(data_clients_)))
.first->second;
}
template <>
Cache<Vertex> &DataManager::Elements<Vertex>(tx::TransactionId tx_id) {
return GetCache(vertices_caches_, tx_id);
}
template <>
Cache<Edge> &DataManager::Elements<Edge>(tx::TransactionId tx_id) {
return GetCache(edges_caches_, tx_id);
}
DataManager::DataManager(database::GraphDb &db,
distributed::DataRpcClients &data_clients)
: db_(db), data_clients_(data_clients) {}
void DataManager::ClearCacheForSingleTransaction(tx::TransactionId tx_id) {
Elements<Vertex>(tx_id).ClearCache();
Elements<Edge>(tx_id).ClearCache();
}
void DataManager::ClearTransactionalCache(tx::TransactionId oldest_active) {
auto vertex_access = vertices_caches_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edges_caches_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
} // namespace distributed

View File

@ -1,45 +0,0 @@
#pragma once
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "transactions/type.hpp"
class Vertex;
class Edge;
namespace distributed {
/// Handles remote data caches for edges and vertices, per transaction.
class DataManager {
template <typename TRecord>
using CacheT = ConcurrentMap<tx::TransactionId, Cache<TRecord>>;
// Helper, gets or inserts a data cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id);
public:
DataManager(database::GraphDb &db, distributed::DataRpcClients &data_clients);
/// Gets or creates the remote vertex/edge cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &Elements(tx::TransactionId tx_id);
/// Removes all the caches for a single transaction.
void ClearCacheForSingleTransaction(tx::TransactionId tx_id);
/// Clears the cache of local transactions that have expired. The signature of
/// this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
DataRpcClients &data_clients_;
CacheT<Vertex> vertices_caches_;
CacheT<Edge> edges_caches_;
};
} // namespace distributed

View File

@ -1,49 +0,0 @@
#include <unordered_map>
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_messages.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <>
std::unique_ptr<Edge> DataRpcClients::RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<EdgeRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "EdgeRpc failed";
return std::move(response->edge_output);
}
template <>
std::unique_ptr<Vertex> DataRpcClients::RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<VertexRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "VertexRpc failed";
return std::move(response->vertex_output);
}
std::unordered_map<int, int64_t> DataRpcClients::VertexCounts(
tx::TransactionId tx_id) {
auto future_results = clients_.ExecuteOnWorkers<std::pair<int, int64_t>>(
-1, [tx_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto response = client_pool.Call<VertexCountRpc>(tx_id);
CHECK(response) << "VertexCountRpc failed";
return std::make_pair(worker_id, response->member);
});
std::unordered_map<int, int64_t> results;
for (auto &result : future_results) {
auto result_pair = result.get();
int worker = result_pair.first;
int vertex_count = result_pair.second;
results[worker] = vertex_count;
}
return results;
}
} // namespace distributed

View File

@ -1,31 +0,0 @@
#pragma once
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides access to other worker's data.
class DataRpcClients {
public:
DataRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Returns a remote worker's record (vertex/edge) data for the given params.
/// That worker must own the vertex/edge for the given id, and that vertex
/// must be visible in given transaction.
template <typename TRecord>
std::unique_ptr<TRecord> RemoteElement(int worker_id, tx::TransactionId tx_id,
gid::Gid gid);
/// Returns (worker_id, vertex_count) for each worker and the number of
/// vertices on it from the perspective of transaction `tx_id`.
std::unordered_map<int, int64_t> VertexCounts(tx::TransactionId tx_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,76 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/data_rpc_messages.capnp.h"
#include "distributed/serialization.hpp"
#include "storage/edge.hpp"
#include "storage/gid.hpp"
#include "storage/vertex.hpp"
#include "transactions/type.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-import 'dist "/distributed/serialization.capnp")
(lcp:define-struct tx-gid-pair ()
((tx-id "tx::TransactionId" :capnp-type "UInt64")
(gid "gid::Gid" :capnp-type "UInt64"))
(:serialize :capnp))
(lcp:define-rpc vertex
(:request ((member "TxGidPair")))
(:response
((vertex-input "const Vertex *"
:save-fun "SaveVertex(ar, *vertex_input, worker_id);" :load-fun ""
:capnp-type "Dist.Vertex"
:capnp-save
(lambda (builder member)
#>cpp
SaveVertex(*${member}, &${builder}, worker_id);
cpp<#)
:capnp-load
(lambda (reader member)
(declare (ignore member))
#>cpp
vertex_output = LoadVertex<const capnp::Vertex::Reader>(${reader});
cpp<#))
(worker-id :int64_t :save-fun "" :load-fun "" :capnp-save :dont-save)
(vertex-output "std::unique_ptr<Vertex>" :initarg nil
:save-fun "" :load-fun "vertex_output = LoadVertex(ar);"
:capnp-save :dont-save))))
(lcp:define-rpc edge
(:request ((member "TxGidPair")))
(:response
((edge-input "const Edge *"
:save-fun "SaveEdge(ar, *edge_input, worker_id);" :load-fun ""
:capnp-type "Dist.Edge"
:capnp-save
(lambda (builder member)
#>cpp
SaveEdge(*${member}, &${builder}, worker_id);
cpp<#)
:capnp-load
(lambda (reader member)
(declare (ignore member))
#>cpp
edge_output = LoadEdge<const capnp::Edge::Reader>(${reader});
cpp<#))
(worker-id :int64_t :save-fun "" :load-fun "" :capnp-save :dont-save)
(edge-output "std::unique_ptr<Edge>" :initarg nil
:save-fun "" :load-fun "edge_output = LoadEdge(ar);"
:capnp-save :dont-save))))
(lcp:define-rpc vertex-count
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ((member :int64_t))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,43 +0,0 @@
#include <memory>
#include "data_rpc_server.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/data_rpc_messages.hpp"
namespace distributed {
DataRpcServer::DataRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<VertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
database::GraphDbAccessor dba(db_, req_reader.getMember().getTxId());
auto vertex = dba.FindVertex(req_reader.getMember().getGid(), false);
CHECK(vertex.GetOld())
<< "Old record must exist when sending vertex by RPC";
VertexRes response(vertex.GetOld(), db_.WorkerId());
response.Save(res_builder);
});
rpc_server_.Register<EdgeRpc>([this](const auto &req_reader,
auto *res_builder) {
database::GraphDbAccessor dba(db_, req_reader.getMember().getTxId());
auto edge = dba.FindEdge(req_reader.getMember().getGid(), false);
CHECK(edge.GetOld()) << "Old record must exist when sending edge by RPC";
EdgeRes response(edge.GetOld(), db_.WorkerId());
response.Save(res_builder);
});
rpc_server_.Register<VertexCountRpc>(
[this](const auto &req_reader, auto *res_builder) {
VertexCountReq req;
req.Load(req_reader);
database::GraphDbAccessor dba(db_, req.member);
int64_t size = 0;
for (auto vertex : dba.Vertices(false)) ++size;
VertexCountRes res(size);
res.Save(res_builder);
});
}
} // namespace distributed

View File

@ -1,17 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
namespace distributed {
/// Serves this worker's data to others.
class DataRpcServer {
public:
DataRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,25 +0,0 @@
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "transactions/transaction.hpp"
#include "utils/future.hpp"
namespace distributed {
utils::Future<bool> DurabilityRpcClients::MakeSnapshot(tx::TransactionId tx) {
return utils::make_future(std::async(std::launch::async, [this, tx] {
auto futures = clients_.ExecuteOnWorkers<bool>(
0, [tx](int worker_id, communication::rpc::ClientPool &client_pool) {
auto res = client_pool.Call<MakeSnapshotRpc>(tx);
if (!res) return false;
return res->member;
});
bool created = true;
for (auto &future : futures) {
created &= future.get();
}
return created;
}));
}
} // namespace distributed

View File

@ -1,28 +0,0 @@
#pragma once
#include <future>
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides an ability to trigger snapshooting on other workers.
class DurabilityRpcClients {
public:
DurabilityRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
// Sends a snapshot request to workers and returns a future which becomes true
// if all workers sucesfully completed their snapshot creation, false
// otherwise
// @param tx - transaction from which to take db snapshot
utils::Future<bool> MakeSnapshot(tx::TransactionId tx);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "distributed/durability_rpc_messages.capnp.h"
#include "transactions/transaction.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc make-snapshot
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ((member :bool))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,20 +0,0 @@
#include "distributed/durability_rpc_server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/durability_rpc_messages.hpp"
namespace distributed {
DurabilityRpcServer::DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<MakeSnapshotRpc>(
[this](const auto &req_reader, auto *res_builder) {
database::GraphDbAccessor dba(this->db_, req_reader.getMember());
MakeSnapshotRes res(this->db_.MakeSnapshot(dba));
res.Save(res_builder);
});
}
} // namespace distributed

View File

@ -1,21 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
namespace database {
class GraphDb;
};
namespace distributed {
class DurabilityRpcServer {
public:
DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,25 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "distributed/index_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:define-rpc build-index
(:request
((label "storage::Label" :capnp-type "Storage.Common")
(property "storage::Property" :capnp-type "Storage.Common")
(tx-id "tx::TransactionId" :capnp-type "UInt64")))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,33 +0,0 @@
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/index_rpc_server.hpp"
namespace distributed {
IndexRpcServer::IndexRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<BuildIndexRpc>(
[this](const auto &req_reader, auto *res_builder) {
BuildIndexReq req;
req.Load(req_reader);
database::LabelPropertyIndex::Key key{req.label, req.property};
database::GraphDbAccessor dba(db_, req.tx_id);
if (db_.storage().label_property_index_.CreateIndex(key) == false) {
// If we are a distributed worker we just have to wait till the index
// (which should be in progress of being created) is created so that
// our return guarantess that the index has been built - this assumes
// that no worker thread that is creating an index will fail
while (!dba.LabelPropertyIndexExists(key.label_, key.property_)) {
// TODO reconsider this constant, currently rule-of-thumb chosen
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
} else {
dba.PopulateIndex(key);
dba.EnableIndex(key);
}
});
}
} // namespace distributed

View File

@ -1,22 +0,0 @@
#pragma once
namespace communication::rpc {
class Server;
}
namespace database {
class GraphDb;
}
namespace distributed {
class IndexRpcServer {
public:
IndexRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,41 +0,0 @@
#include "distributed/plan_consumer.hpp"
namespace distributed {
PlanConsumer::PlanConsumer(communication::rpc::Server &server)
: server_(server) {
server_.Register<DispatchPlanRpc>(
[this](const auto &req_reader, auto *res_builder) {
DispatchPlanReq req;
req.Load(req_reader);
plan_cache_.access().insert(
req.plan_id, std::make_unique<PlanPack>(req.plan, req.symbol_table,
std::move(req.storage)));
DispatchPlanRes res;
res.Save(res_builder);
});
server_.Register<RemovePlanRpc>(
[this](const auto &req_reader, auto *res_builder) {
plan_cache_.access().remove(req_reader.getMember());
});
}
PlanConsumer::PlanPack &PlanConsumer::PlanForId(int64_t plan_id) const {
auto accessor = plan_cache_.access();
auto found = accessor.find(plan_id);
CHECK(found != accessor.end())
<< "Missing plan and symbol table for plan id: " << plan_id;
return *found->second;
}
std::vector<int64_t> PlanConsumer::CachedPlanIds() const {
std::vector<int64_t> plan_ids;
auto access = plan_cache_.access();
plan_ids.reserve(access.size());
for (auto &kv : access) plan_ids.emplace_back(kv.first);
return plan_ids;
}
} // namespace distributed

View File

@ -1,44 +0,0 @@
#pragma once
#include <vector>
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan consumption from master. Creates and holds a local cache of
* plans. Worker side. */
class PlanConsumer {
public:
struct PlanPack {
PlanPack(std::shared_ptr<query::plan::LogicalOperator> plan,
query::SymbolTable symbol_table, query::AstStorage storage)
: plan(plan),
symbol_table(std::move(symbol_table)),
storage(std::move(storage)) {}
std::shared_ptr<query::plan::LogicalOperator> plan;
query::SymbolTable symbol_table;
const query::AstStorage storage;
};
explicit PlanConsumer(communication::rpc::Server &server);
/** Return cached plan and symbol table for a given plan id. */
PlanPack &PlanForId(int64_t plan_id) const;
/** Return the ids of all the cached plans. For testing. */
std::vector<int64_t> CachedPlanIds() const;
private:
communication::rpc::Server &server_;
// TODO remove unique_ptr. This is to get it to work, emplacing into a
// ConcurrentMap is tricky.
mutable ConcurrentMap<int64_t, std::unique_ptr<PlanPack>> plan_cache_;
};
} // namespace distributed

View File

@ -1,35 +0,0 @@
#include <distributed/plan_dispatcher.hpp>
namespace distributed {
PlanDispatcher::PlanDispatcher(RpcWorkerClients &clients) : clients_(clients) {}
void PlanDispatcher::DispatchPlan(
int64_t plan_id, std::shared_ptr<query::plan::LogicalOperator> plan,
const query::SymbolTable &symbol_table) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id, plan, symbol_table](
int worker_id, communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<DispatchPlanRpc>(plan_id, plan, symbol_table);
CHECK(result) << "DispatchPlanRpc failed";
});
for (auto &future : futures) {
future.wait();
}
}
void PlanDispatcher::RemovePlan(int64_t plan_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<RemovePlanRpc>(plan_id);
CHECK(result) << "Failed to remove plan from worker";
});
for (auto &future : futures) {
future.wait();
}
}
} // namespace distributed

View File

@ -1,30 +0,0 @@
#pragma once
#include "distributed/coordination.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan dispatching to all workers. Uses MasterCoordination to
* acomplish that. Master side.
*/
class PlanDispatcher {
public:
explicit PlanDispatcher(RpcWorkerClients &clients);
/** Dispatch a plan to all workers and wait for their acknowledgement. */
void DispatchPlan(int64_t plan_id,
std::shared_ptr<query::plan::LogicalOperator> plan,
const query::SymbolTable &symbol_table);
/** Remove a plan from all workers and wait for their acknowledgement. */
void RemovePlan(int64_t plan_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,59 +0,0 @@
#>cpp
#pragma once
#include "communication/rpc/messages.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
#include "distributed/plan_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-import 'plan "/query/plan/operator.capnp")
(lcp:capnp-import 'sem "/query/frontend/semantic/symbol.capnp")
(defun load-plan (reader member)
#>cpp
query::plan::LogicalOperator::LoadHelper helper;
${member} = utils::LoadSharedPtr<query::plan::capnp::LogicalOperator, query::plan::LogicalOperator>(
${reader}, [&helper](const auto &reader) {
auto op = query::plan::LogicalOperator::Construct(reader);
op->Load(reader, &helper);
return op.release();
}, &helper.loaded_ops);
storage = std::move(helper.ast_storage);
cpp<#)
(defun save-plan (builder member)
#>cpp
query::plan::LogicalOperator::SaveHelper helper;
utils::SaveSharedPtr<query::plan::capnp::LogicalOperator, query::plan::LogicalOperator>(
${member}, &${builder},
[&helper](auto *builder, const auto &val) {
val.Save(builder, &helper);
}, &helper.saved_ops);
cpp<#)
(lcp:define-rpc dispatch-plan
(:request
((plan-id :int64_t)
(plan "std::shared_ptr<query::plan::LogicalOperator>"
:capnp-type "Utils.SharedPtr(Plan.LogicalOperator)"
:capnp-save #'save-plan :capnp-load #'load-plan)
(symbol-table "query::SymbolTable" :capnp-type "Sem.SymbolTable")
(storage "query::AstStorage" :initarg nil
:save-fun ""
:load-fun "storage = std::move(ar.template get_helper<query::AstStorage>(query::AstStorage::kHelperId));"
:capnp-save :dont-save)))
(:response ()))
(lcp:define-rpc remove-plan
(:request ((member :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,176 +0,0 @@
#include "distributed/produce_rpc_server.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "transactions/engine_worker.hpp"
namespace distributed {
ProduceRpcServer::OngoingProduce::OngoingProduce(
database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols)
: dba_{db, tx_id},
context_(dba_),
pull_symbols_(std::move(pull_symbols)),
frame_(symbol_table.max_position()),
cursor_(op->MakeCursor(dba_)) {
context_.symbol_table_ = std::move(symbol_table);
context_.parameters_ = std::move(parameters);
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::Pull() {
if (!accumulation_.empty()) {
auto results = std::move(accumulation_.back());
accumulation_.pop_back();
for (auto &element : results) {
try {
query::ReconstructTypedValue(element);
} catch (query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
return std::make_pair(std::move(results), cursor_state_);
}
}
return std::make_pair(std::move(results), PullState::CURSOR_IN_PROGRESS);
}
return PullOneFromCursor();
}
PullState ProduceRpcServer::OngoingProduce::Accumulate() {
while (true) {
auto result = PullOneFromCursor();
if (result.second != PullState::CURSOR_IN_PROGRESS)
return result.second;
else
accumulation_.emplace_back(std::move(result.first));
}
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::PullOneFromCursor() {
std::vector<query::TypedValue> results;
// Check if we already exhausted this cursor (or it entered an error
// state). This happens when we accumulate before normal pull.
if (cursor_state_ != PullState::CURSOR_IN_PROGRESS) {
return std::make_pair(results, cursor_state_);
}
try {
if (cursor_->Pull(frame_, context_)) {
results.reserve(pull_symbols_.size());
for (const auto &symbol : pull_symbols_) {
results.emplace_back(std::move(frame_[symbol]));
}
} else {
cursor_state_ = PullState::CURSOR_EXHAUSTED;
}
} catch (const mvcc::SerializationError &) {
cursor_state_ = PullState::SERIALIZATION_ERROR;
} catch (const utils::LockTimeoutException &) {
cursor_state_ = PullState::LOCK_TIMEOUT_ERROR;
} catch (const RecordDeletedError &) {
cursor_state_ = PullState::UPDATE_DELETED_ERROR;
} catch (const query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
} catch (const query::RemoveAttachedVertexException &) {
cursor_state_ = PullState::UNABLE_TO_DELETE_VERTEX_ERROR;
} catch (const query::QueryRuntimeException &) {
cursor_state_ = PullState::QUERY_ERROR;
} catch (const query::HintedAbortError &) {
cursor_state_ = PullState::HINTED_ABORT_ERROR;
}
return std::make_pair(std::move(results), cursor_state_);
}
ProduceRpcServer::ProduceRpcServer(
database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer)
: db_(db),
produce_rpc_server_(server),
plan_consumer_(plan_consumer),
tx_engine_(tx_engine) {
produce_rpc_server_.Register<PullRpc>(
[this](const auto &req_reader, auto *res_builder) {
PullReq req;
req.Load(req_reader);
PullRes res(Pull(req));
res.Save(res_builder);
});
produce_rpc_server_.Register<TransactionCommandAdvancedRpc>(
[this](const auto &req_reader, auto *res_builder) {
TransactionCommandAdvancedReq req;
req.Load(req_reader);
tx_engine_.UpdateCommand(req.member);
db_.data_manager().ClearCacheForSingleTransaction(req.member);
TransactionCommandAdvancedRes res;
res.Save(res_builder);
});
}
void ProduceRpcServer::FinishAndClearOngoingProducePlans(
tx::TransactionId tx_id) {
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
for (auto it = ongoing_produces_.begin(); it != ongoing_produces_.end();) {
if (std::get<0>(it->first) == tx_id) {
it = ongoing_produces_.erase(it);
} else {
++it;
}
}
}
ProduceRpcServer::OngoingProduce &ProduceRpcServer::GetOngoingProduce(
const PullReq &req) {
auto key_tuple = std::make_tuple(req.tx_id, req.command_id, req.plan_id);
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
auto found = ongoing_produces_.find(key_tuple);
if (found != ongoing_produces_.end()) {
return found->second;
}
if (db_.type() == database::GraphDb::Type::DISTRIBUTED_WORKER) {
// On the worker cache the snapshot to have one RPC less.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.RunningTransaction(req.tx_id, req.tx_snapshot);
}
auto &plan_pack = plan_consumer_.PlanForId(req.plan_id);
return ongoing_produces_
.emplace(std::piecewise_construct, std::forward_as_tuple(key_tuple),
std::forward_as_tuple(db_, req.tx_id, plan_pack.plan,
plan_pack.symbol_table, req.params,
req.symbols))
.first->second;
}
PullResData ProduceRpcServer::Pull(const PullReq &req) {
auto &ongoing_produce = GetOngoingProduce(req);
PullResData result(db_.WorkerId(), req.send_old, req.send_new);
result.pull_state = PullState::CURSOR_IN_PROGRESS;
if (req.accumulate) {
result.pull_state = ongoing_produce.Accumulate();
// If an error ocurred, we need to return that error.
if (result.pull_state != PullState::CURSOR_EXHAUSTED) {
return result;
}
}
for (int i = 0; i < req.batch_size; ++i) {
auto pull_result = ongoing_produce.Pull();
result.pull_state = pull_result.second;
if (pull_result.second != PullState::CURSOR_IN_PROGRESS) break;
result.frames.emplace_back(std::move(pull_result.first));
}
return result;
}
} // namespace distributed

View File

@ -1,92 +0,0 @@
#pragma once
#include <cstdint>
#include <map>
#include <mutex>
#include <utility>
#include <vector>
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/plan_consumer.hpp"
#include "query/context.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/interpret/frame.hpp"
#include "query/parameters.hpp"
#include "query/plan/operator.hpp"
#include "query/typed_value.hpp"
#include "transactions/engine.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Handles the execution of a plan on the worker, requested by the remote
/// master. Assumes that (tx_id, plan_id) uniquely identifies an execution, and
/// that there will never be parallel requests for the same execution thus
/// identified.
class ProduceRpcServer {
/// Encapsulates a Cursor execution in progress. Can be used for pulling a
/// single result from the execution, or pulling all and accumulating the
/// results. Accumulations are used for synchronizing updates in distributed
/// MG (see query::plan::Synchronize).
class OngoingProduce {
public:
OngoingProduce(database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols);
/// Returns a vector of typed values (one for each `pull_symbol`), and an
/// indication of the pull result. The result data is valid only if the
/// returned state is CURSOR_IN_PROGRESS.
std::pair<std::vector<query::TypedValue>, PullState> Pull();
/// Accumulates all the frames pulled from the cursor and returns
/// CURSOR_EXHAUSTED. If an error occurs, an appropriate value is returned.
PullState Accumulate();
private:
database::GraphDbAccessor dba_;
query::Context context_;
std::vector<query::Symbol> pull_symbols_;
query::Frame frame_;
PullState cursor_state_{PullState::CURSOR_IN_PROGRESS};
std::vector<std::vector<query::TypedValue>> accumulation_;
std::unique_ptr<query::plan::Cursor> cursor_;
/// Pulls and returns a single result from the cursor.
std::pair<std::vector<query::TypedValue>, PullState> PullOneFromCursor();
};
public:
ProduceRpcServer(database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer);
/// Finish and clear ongoing produces for all plans that are tied to a
/// transaction with tx_id.
void FinishAndClearOngoingProducePlans(tx::TransactionId tx_id);
private:
std::mutex ongoing_produces_lock_;
/// Mapping of (tx id, command id, plan id) to OngoingProduce.
/// The command_id should be the command_id at the initialization of a cursor
/// that can call ProduceRpcServer.
std::map<std::tuple<tx::TransactionId, tx::CommandId, int64_t>,
OngoingProduce>
ongoing_produces_;
database::GraphDb &db_;
communication::rpc::Server &produce_rpc_server_;
const distributed::PlanConsumer &plan_consumer_;
tx::Engine &tx_engine_;
/// Gets an ongoing produce for the given pull request. Creates a new one if
/// there is none currently existing.
OngoingProduce &GetOngoingProduce(const PullReq &req);
/// Performs a single remote pull for the given request.
PullResData Pull(const PullReq &req);
};
} // namespace distributed

View File

@ -1,547 +0,0 @@
#>cpp
#pragma once
#include <cstdint>
#include <functional>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/pull_produce_rpc_messages.capnp.h"
#include "distributed/serialization.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "storage/address_types.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:in-impl
#>cpp
#include "database/graph_db_accessor.hpp"
#include "distributed/data_manager.hpp"
cpp<#)
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'sem "/query/frontend/semantic/symbol.capnp")
(lcp:capnp-import 'tx "/transactions/common.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "tx::CommandId" "UInt32")
(lcp:capnp-type-conversion "tx::Snapshot" "Tx.Snapshot")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
#>cpp
/// The default number of results returned via RPC from remote execution to the
/// master that requested it.
constexpr int kDefaultBatchSize = 20;
cpp<#
(lcp:define-enum pull-state
(cursor-exhausted
cursor-in-progress
serialization-error
lock-timeout-error
update-deleted-error
reconstruction-error
unable-to-delete-vertex-error
hinted-abort-error
query-error)
(:documentation "Returned along with a batch of results in the remote-pull
RPC. Indicates the state of execution on the worker.")
(:serialize))
(lcp:define-struct pull-data ()
((pull-state "PullState")
(frames "std::vector<std::vector<query::TypedValue>>"))
(:documentation
"The data returned to the end consumer (the Pull operator). Contains only
the relevant parts of the response, ready for use."))
(lcp:define-struct pull-res-data ()
((pull-state "PullState"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::PullState" "PullState")
:capnp-load (lcp:capnp-load-enum "capnp::PullState" "PullState"))
(frames "std::vector<std::vector<query::TypedValue>>"
:capnp-type "List(List(Dis.TypedValue))"
:capnp-save
(lambda (builder member)
#>cpp
for (size_t frame_i = 0; frame_i < ${member}.size(); ++frame_i) {
const auto &frame = ${member}[frame_i];
auto frame_builder = ${builder}.init(frame_i, frame.size());
for (size_t val_i = 0; val_i < frame.size(); ++val_i) {
const auto &value = frame[val_i];
auto value_builder = frame_builder[val_i];
utils::SaveCapnpTypedValue(
value, &value_builder,
[this](const auto &value, auto *builder) {
this->SaveGraphElement(value, builder);
});
}
}
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
${member}.reserve(${reader}.size());
for (const auto &frame_reader : ${reader}) {
std::vector<query::TypedValue> current_frame;
current_frame.reserve(frame_reader.size());
for (const auto &value_reader : frame_reader) {
query::TypedValue value;
utils::LoadCapnpTypedValue(
value_reader, &value,
[this, dba](const auto &reader, auto *value) {
this->LoadGraphElement(dba, reader, value);
});
current_frame.emplace_back(value);
}
${member}.emplace_back(current_frame);
}
cpp<#))
(worker-id :int16_t :capnp-save :dont-save
:documentation
"Id of the worker on which the response is created, used for
serializing vertices (converting local to global addresses). Indicates which
of (old, new) records of a graph element should be sent.")
(send-old :bool :capnp-save :dont-save)
(send-new :bool :capnp-save :dont-save)
;; Temporary caches used between deserialization and post-processing
;; (transfering the ownership of this data to a Cache).
(vertices "std::vector<GraphElementData<Vertex>>" :capnp-save :dont-save)
(edges "std::vector<GraphElementData<Edge>>" :capnp-save :dont-save)
(paths "std::vector<PathData>" :capnp-save :dont-save))
(:documentation
"The data of the remote pull response. Post-processing is required after
deserialization to initialize Vertex/Edge typed values in the frames (possibly
encapsulated in lists/maps) to their proper values. This requires a
GraphDbAccessor and therefore can't be done as part of deserialization.
TODO - make it possible to inject a &GraphDbAcessor from the Pull layer all
the way into RPC data deserialization to remove the requirement for
post-processing. The current approach of holding references to parts of the
frame (potentially embedded in lists/maps) is too error-prone.")
(:public
#>cpp
private:
cpp<#
(lcp:define-struct (graph-element-data t-record) ()
((global-address "storage::Address<mvcc::VersionList<TRecord>>")
(old-record "std::unique_ptr<TRecord>")
(new-record "std::unique_ptr<TRecord>")
(element-in-frame
"query::TypedValue *"
:documentation
"The position in frame is optional. This same structure is used for
deserializing path elements, in which case the vertex/edge in question is not
directly part of the frame."))
(:documentation
"Temp cache for deserialized vertices and edges. These objects are
created during deserialization. They are used immediatelly after during
post-processing. The vertex/edge data ownership gets transfered to the Cache,
and the `element_in_frame` reference is used to set the appropriate accessor
to the appropriate value. Not used on side that generates the response.")
(:public
#>cpp
GraphElementData(storage::Address<mvcc::VersionList<TRecord>> address,
std::unique_ptr<TRecord> old_record, std::unique_ptr<TRecord> new_record,
query::TypedValue *element_in_frame)
: global_address(address),
old_record(std::move(old_record)),
new_record(std::move(new_record)),
element_in_frame(element_in_frame) {}
cpp<#))
(lcp:define-struct path-data ()
((vertices "std::vector<GraphElementData<Vertex>>")
(edges "std::vector<GraphElementData<Edge>>")
(path-in-frame "query::TypedValue *"))
(:public
#>cpp
PathData(query::TypedValue *path_in_frame) : path_in_frame(path_in_frame) {}
cpp<#)
(:documentation "Same like `GraphElementData`, but for paths."))
#>cpp
public:
PullResData() {} // Default constructor required for serialization.
PullResData(int worker_id, bool send_old, bool send_new)
: worker_id(worker_id), send_old(send_old), send_new(send_new) {}
PullResData(const PullResData &) = delete;
PullResData &operator=(const PullResData &) = delete;
PullResData(PullResData &&) = default;
PullResData &operator=(PullResData &&) = default;
/// Saves a typed value that is a vertex/edge/path.
template <class TArchive>
void SaveGraphElement(TArchive &ar, const query::TypedValue &value) const {
// Helper template function for storing a vertex or an edge.
auto save_element = [&ar, this](auto element_accessor) {
ar << element_accessor.GlobalAddress().raw();
// If both old and new are null, we need to reconstruct.
if (!(element_accessor.GetOld() || element_accessor.GetNew())) {
bool result = element_accessor.Reconstruct();
CHECK(result) << "Attempting to serialize an element not visible to "
"current transaction.";
}
auto *old_rec = element_accessor.GetOld();
if (send_old && old_rec) {
ar << true;
distributed::SaveElement(ar, *old_rec, worker_id);
} else {
ar << false;
}
if (send_new) {
// Must call SwitchNew as that will trigger a potentially necesary
// Reconstruct.
element_accessor.SwitchNew();
auto *new_rec = element_accessor.GetNew();
if (new_rec) {
ar << true;
distributed::SaveElement(ar, *new_rec, worker_id);
} else {
ar << false;
}
} else {
ar << false;
}
};
switch (value.type()) {
case query::TypedValue::Type::Vertex:
save_element(value.ValueVertex());
break;
case query::TypedValue::Type::Edge:
save_element(value.ValueEdge());
break;
case query::TypedValue::Type::Path: {
auto &path = value.ValuePath();
ar << path.size();
save_element(path.vertices()[0]);
for (size_t i = 0; i < path.size(); ++i) {
save_element(path.edges()[i]);
save_element(path.vertices()[i + 1]);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << value.type();
}
}
/// Loads a typed value that is a vertex/edge/path. Part of the
/// deserialization process, populates the temporary data caches which are
/// processed later.
template <class TArchive>
void LoadGraphElement(TArchive &ar, query::TypedValue::Type type,
query::TypedValue &value) {
auto load_edge = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadEdge(ar) : nullptr;
};
auto load_vertex = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadVertex(ar) : nullptr;
};
switch (type) {
case query::TypedValue::Type::Vertex: {
storage::VertexAddress::StorageT address;
ar >> address;
vertices.emplace_back(storage::VertexAddress(address), load_vertex(ar),
load_vertex(ar), &value);
break;
}
case query::TypedValue::Type::Edge: {
storage::VertexAddress::StorageT address;
ar >> address;
edges.emplace_back(storage::EdgeAddress(address), load_edge(ar),
load_edge(ar), &value);
break;
}
case query::TypedValue::Type::Path: {
size_t path_size;
ar >> path_size;
paths.emplace_back(&value);
auto &path_data = paths.back();
storage::VertexAddress::StorageT vertex_address;
storage::EdgeAddress::StorageT edge_address;
ar >> vertex_address;
path_data.vertices.emplace_back(storage::VertexAddress(vertex_address),
load_vertex(ar), load_vertex(ar),
nullptr);
for (size_t i = 0; i < path_size; ++i) {
ar >> edge_address;
path_data.edges.emplace_back(storage::EdgeAddress(edge_address),
load_edge(ar), load_edge(ar), nullptr);
ar >> vertex_address;
path_data.vertices.emplace_back(
storage::VertexAddress(vertex_address), load_vertex(ar),
load_vertex(ar), nullptr);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << type;
}
}
cpp<#)
(:private
#>cpp
void SaveGraphElement(const query::TypedValue &,
distributed::capnp::TypedValue::Builder *) const;
void LoadGraphElement(database::GraphDbAccessor *,
const distributed::capnp::TypedValue::Reader &,
query::TypedValue *);
cpp<#)
(:serialize :capnp :load-args '((dba "database::GraphDbAccessor *"))))
(lcp:in-impl
#>cpp
void PullResData::SaveGraphElement(
const query::TypedValue &value,
distributed::capnp::TypedValue::Builder *builder) const {
auto save_element = [this](auto accessor, auto *builder) {
builder->setAddress(accessor.GlobalAddress().raw());
// If both old and new are null, we need to reconstruct
if (!(accessor.GetOld() || accessor.GetNew())) {
bool result = accessor.Reconstruct();
CHECK(result) << "Attempting to serialize an element not visible to "
"current transaction.";
}
auto *old_rec = accessor.GetOld();
if (send_old && old_rec) {
auto old_builder = builder->initOld();
distributed::SaveElement(*old_rec, &old_builder, worker_id);
}
if (send_new) {
// Must call SwitchNew as that will trigger a potentially necesary
// Reconstruct.
accessor.SwitchNew();
auto *new_rec = accessor.GetNew();
if (new_rec) {
auto new_builder = builder->initNew();
distributed::SaveElement(*new_rec, &new_builder, worker_id);
}
}
};
switch (value.type()) {
case query::TypedValue::Type::Vertex: {
auto vertex_builder = builder->initVertex();
save_element(value.ValueVertex(), &vertex_builder);
break;
}
case query::TypedValue::Type::Edge: {
auto edge_builder = builder->initEdge();
save_element(value.ValueEdge(), &edge_builder);
break;
}
case query::TypedValue::Type::Path: {
const auto &path = value.ValuePath();
auto path_builder = builder->initPath();
auto vertices_builder = path_builder.initVertices(path.vertices().size());
for (size_t i = 0; i < path.vertices().size(); ++i) {
auto vertex_builder = vertices_builder[i];
save_element(path.vertices()[i], &vertex_builder);
}
auto edges_builder = path_builder.initEdges(path.edges().size());
for (size_t i = 0; i < path.edges().size(); ++i) {
auto edge_builder = edges_builder[i];
save_element(path.edges()[i], &edge_builder);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << value.type();
}
}
void PullResData::LoadGraphElement(
database::GraphDbAccessor *dba,
const distributed::capnp::TypedValue::Reader &reader,
query::TypedValue *value) {
auto load_vertex = [dba](const auto &vertex_reader) {
storage::VertexAddress global_address(vertex_reader.getAddress());
auto old_record =
vertex_reader.hasOld()
? distributed::LoadVertex<const distributed::capnp::Vertex::Reader>(
vertex_reader.getOld())
: nullptr;
auto new_record =
vertex_reader.hasNew()
? distributed::LoadVertex<const distributed::capnp::Vertex::Reader>(
vertex_reader.getNew())
: nullptr;
dba->db()
.data_manager()
.Elements<Vertex>(dba->transaction_id())
.emplace(global_address.gid(), std::move(old_record),
std::move(new_record));
return VertexAccessor(global_address, *dba);
};
auto load_edge = [dba](const auto &edge_reader) {
storage::EdgeAddress global_address(edge_reader.getAddress());
auto old_record =
edge_reader.hasOld()
? distributed::LoadEdge<const distributed::capnp::Edge::Reader>(
edge_reader.getOld())
: nullptr;
auto new_record =
edge_reader.hasNew()
? distributed::LoadEdge<const distributed::capnp::Edge::Reader>(
edge_reader.getNew())
: nullptr;
dba->db()
.data_manager()
.Elements<Edge>(dba->transaction_id())
.emplace(global_address.gid(), std::move(old_record),
std::move(new_record));
return EdgeAccessor(global_address, *dba);
};
switch (reader.which()) {
case distributed::capnp::TypedValue::VERTEX:
*value = load_vertex(reader.getVertex());
break;
case distributed::capnp::TypedValue::EDGE:
*value = load_edge(reader.getEdge());
break;
case distributed::capnp::TypedValue::PATH: {
auto vertices_reader = reader.getPath().getVertices();
auto edges_reader = reader.getPath().getEdges();
query::Path path(load_vertex(vertices_reader[0]));
for (size_t i = 0; i < edges_reader.size(); ++i) {
path.Expand(load_edge(edges_reader[i]));
path.Expand(load_vertex(vertices_reader[i + 1]));
}
*value = path;
break;
}
default:
LOG(FATAL) << "Unsupported graph element type.";
}
}
cpp<#)
(lcp:define-rpc pull
(:request
((tx-id "tx::TransactionId")
(tx-snapshot "tx::Snapshot")
(plan-id :int64_t)
(command-id "tx::CommandId")
(params "Parameters"
:save-fun
"
ar << params.size();
for (auto &kv : params) {
ar << kv.first;
// Params never contain a vertex/edge, so save plan TypedValue.
utils::SaveTypedValue(ar, kv.second);
}
"
:load-fun
"
size_t params_size;
ar >> params_size;
for (size_t i = 0; i < params_size; ++i) {
int token_pos;
ar >> token_pos;
query::TypedValue param;
// Params never contain a vertex/edge, so load plan TypedValue.
utils::LoadTypedValue(ar, param);
params.Add(token_pos, param);
}
"
:capnp-type "Utils.Map(Utils.BoxInt64, Dis.TypedValue)"
:capnp-save
(lambda (builder member)
#>cpp
auto entries_builder = ${builder}.initEntries(${member}.size());
size_t i = 0;
for (auto &entry : params) {
auto builder = entries_builder[i];
auto key_builder = builder.initKey();
key_builder.setValue(entry.first);
auto value_builder = builder.initValue();
utils::SaveCapnpTypedValue(entry.second, &value_builder);
++i;
}
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
for (const auto &entry_reader : ${reader}.getEntries()) {
query::TypedValue value;
utils::LoadCapnpTypedValue(entry_reader.getValue(), &value);
${member}.Add(entry_reader.getKey().getValue(), value);
}
cpp<#))
(symbols "std::vector<query::Symbol>"
:capnp-type "List(Sem.Symbol)"
:capnp-save (lcp:capnp-save-vector "query::capnp::Symbol" "query::Symbol")
:capnp-load (lcp:capnp-load-vector "query::capnp::Symbol" "query::Symbol"))
(accumulate :bool)
(batch-size :int64_t)
;; Indicates which of (old, new) records of a graph element should be sent.
(send-old :bool)
(send-new :bool)))
(:response
((data "PullResData" :initarg :move
:save-fun
"
ar << data.pull_state;
ar << data.frames.size();
// We need to indicate how many values are in each frame.
// Assume all the frames have an equal number of elements.
ar << (data.frames.size() == 0 ? 0 : data.frames[0].size());
for (const auto &frame : data.frames) {
for (const auto &value : frame) {
utils::SaveTypedValue<TArchive>(
ar, value, [this](TArchive &ar, const query::TypedValue &value) {
data.SaveGraphElement(ar, value);
});
}
}
"
:load-fun
"
ar >> data.pull_state;
size_t frame_count;
ar >> frame_count;
data.frames.reserve(frame_count);
size_t frame_size;
ar >> frame_size;
for (size_t i = 0; i < frame_count; ++i) {
data.frames.emplace_back();
auto &current_frame = data.frames.back();
current_frame.reserve(frame_size);
for (size_t j = 0; j < frame_size; ++j) {
current_frame.emplace_back();
utils::LoadTypedValue<TArchive>(
ar, current_frame.back(),
[this](TArchive &ar, query::TypedValue::TypedValue::Type type,
query::TypedValue &value) {
data.LoadGraphElement(ar, type, value);
});
}
}
"))
(:serialize :capnp :base t :load-args '((dba "database::GraphDbAccessor *")))))
;; TODO make a separate RPC for the continuation of an existing pull, as an
;; optimization not to have to send the full PullReqData pack every time.
(lcp:define-rpc transaction-command-advanced
(:request ((member "tx::TransactionId")))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,41 +0,0 @@
#include <functional>
#include "distributed/data_manager.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
utils::Future<PullData> PullRpcClients::Pull(
database::GraphDbAccessor &dba, int worker_id, int64_t plan_id,
tx::CommandId command_id, const Parameters &params,
const std::vector<query::Symbol> &symbols, bool accumulate,
int batch_size) {
return clients_.ExecuteOnWorker<
PullData>(worker_id, [&dba, plan_id, command_id, params, symbols,
accumulate, batch_size](int worker_id,
ClientPool &client_pool) {
auto load_pull_res = [&dba](const auto &res_reader) {
PullRes res;
res.Load(res_reader, &dba);
return res;
};
auto result = client_pool.CallWithLoad<PullRpc>(
load_pull_res, dba.transaction_id(), dba.transaction().snapshot(),
plan_id, command_id, params, symbols, accumulate, batch_size, true,
true);
return PullData{result->data.pull_state, std::move(result->data.frames)};
});
}
std::vector<utils::Future<void>>
PullRpcClients::NotifyAllTransactionCommandAdvanced(tx::TransactionId tx_id) {
return clients_.ExecuteOnWorkers<void>(
0, [tx_id](int worker_id, auto &client) {
auto res = client.template Call<TransactionCommandAdvancedRpc>(tx_id);
CHECK(res) << "TransactionCommandAdvanceRpc failed";
});
}
} // namespace distributed

View File

@ -1,48 +0,0 @@
#pragma once
#include <vector>
#include "database/graph_db_accessor.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Provides means of calling for the execution of a plan on some remote worker,
/// and getting the results of that execution. The results are returned in
/// batches and are therefore accompanied with an enum indicator of the state of
/// remote execution.
class PullRpcClients {
using ClientPool = communication::rpc::ClientPool;
public:
PullRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Calls a remote pull asynchroniously. IMPORTANT: take care not to call this
/// function for the same (tx_id, worker_id, plan_id, command_id) before the
/// previous call has ended.
///
/// @todo: it might be cleaner to split Pull into {InitRemoteCursor,
/// Pull, RemoteAccumulate}, but that's a lot of refactoring and more
/// RPC calls.
utils::Future<PullData> Pull(database::GraphDbAccessor &dba, int worker_id,
int64_t plan_id, tx::CommandId command_id,
const Parameters &params,
const std::vector<query::Symbol> &symbols,
bool accumulate,
int batch_size = kDefaultBatchSize);
auto GetWorkerIds() { return clients_.GetWorkerIds(); }
std::vector<utils::Future<void>> NotifyAllTransactionCommandAdvanced(
tx::TransactionId tx_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,154 +0,0 @@
#pragma once
#include <functional>
#include <type_traits>
#include <unordered_map>
#include "communication/rpc/client_pool.hpp"
#include "distributed/coordination.hpp"
#include "distributed/index_rpc_messages.hpp"
#include "distributed/token_sharing_rpc_messages.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "storage/types.hpp"
#include "transactions/transaction.hpp"
#include "utils/future.hpp"
#include "utils/thread.hpp"
namespace distributed {
/** A cache of RPC clients (of the given name/kind) per MG distributed worker.
* Thread safe. */
class RpcWorkerClients {
public:
explicit RpcWorkerClients(Coordination &coordination)
: coordination_(coordination),
thread_pool_(std::thread::hardware_concurrency()) {}
RpcWorkerClients(const RpcWorkerClients &) = delete;
RpcWorkerClients(RpcWorkerClients &&) = delete;
RpcWorkerClients &operator=(const RpcWorkerClients &) = delete;
RpcWorkerClients &operator=(RpcWorkerClients &&) = delete;
auto &GetClientPool(int worker_id) {
std::lock_guard<std::mutex> guard{lock_};
auto found = client_pools_.find(worker_id);
if (found != client_pools_.end()) return found->second;
return client_pools_
.emplace(std::piecewise_construct, std::forward_as_tuple(worker_id),
std::forward_as_tuple(coordination_.GetEndpoint(worker_id)))
.first->second;
}
auto GetWorkerIds() { return coordination_.GetWorkerIds(); }
/** Asynchroniously executes the given function on the rpc client for the
* given worker id. Returns an `utils::Future` of the given `execute`
* function's
* return type. */
template <typename TResult>
auto ExecuteOnWorker(
int worker_id,
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
execute) {
auto &client_pool = GetClientPool(worker_id);
return thread_pool_.Run(execute, worker_id, std::ref(client_pool));
}
/** Asynchroniously executes the `execute` function on all worker rpc clients
* except the one whose id is `skip_worker_id`. Returns a vectore of futures
* contaning the results of the `execute` function. */
template <typename TResult>
auto ExecuteOnWorkers(
int skip_worker_id,
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
execute) {
std::vector<utils::Future<TResult>> futures;
for (auto &worker_id : coordination_.GetWorkerIds()) {
if (worker_id == skip_worker_id) continue;
futures.emplace_back(std::move(ExecuteOnWorker(worker_id, execute)));
}
return futures;
}
private:
// TODO make Coordination const, it's member GetEndpoint must be const too.
Coordination &coordination_;
std::unordered_map<int, communication::rpc::ClientPool> client_pools_;
std::mutex lock_;
utils::ThreadPool thread_pool_;
};
/** Wrapper class around a RPC call to build indices.
*/
class IndexRpcClients {
public:
explicit IndexRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
auto GetBuildIndexFutures(const storage::Label &label,
const storage::Property &property,
tx::TransactionId transaction_id, int worker_id) {
return clients_.ExecuteOnWorkers<bool>(
worker_id,
[label, property, transaction_id](
int worker_id, communication::rpc::ClientPool &client_pool) {
return static_cast<bool>(
client_pool.Call<BuildIndexRpc>(label, property, transaction_id));
});
}
private:
RpcWorkerClients &clients_;
};
/** Wrapper class around a RPC call to share token between workers.
*/
class TokenSharingRpcClients {
public:
explicit TokenSharingRpcClients(RpcWorkerClients *clients)
: clients_(clients) {}
auto TransferToken(int worker_id) {
return clients_->ExecuteOnWorker<void>(
worker_id,
[](int worker_id, communication::rpc::ClientPool &client_pool) {
CHECK(client_pool.Call<TokenTransferRpc>())
<< "Unable to transfer token";
});
}
private:
RpcWorkerClients *clients_;
};
/** Join ongoing produces on all workers.
*
* Sends a RPC request to all workers when a transaction is ending, notifying
* them to end all ongoing produces tied to that transaction.
*/
class OngoingProduceJoinerRpcClients {
public:
OngoingProduceJoinerRpcClients(RpcWorkerClients &clients)
: clients_(clients) {}
void JoinOngoingProduces(tx::TransactionId tx_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [tx_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<distributed::WaitOnTransactionEndRpc>(tx_id);
CHECK(result)
<< "[WaitOnTransactionEndRpc] failed to notify that transaction "
<< tx_id << " ended";
});
// We need to wait for all workers to destroy pending futures to avoid
// using already destroyed (released) transaction objects.
for (auto &future : futures) {
future.wait();
}
}
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,71 +0,0 @@
@0xccb448f0b998d9c8;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("distributed::capnp");
struct Address {
gid @0 :UInt64;
workerId @1 :Int16;
}
struct PropertyValue {
id @0 :UInt16;
value @1 :TypedValue;
}
struct Edge {
from @0 :Address;
to @1 :Address;
typeId @2 :UInt16;
properties @3 :List(PropertyValue);
}
struct Vertex {
outEdges @0 :List(EdgeEntry);
inEdges @1 :List(EdgeEntry);
labelIds @2 :List(UInt16);
properties @3 :List(PropertyValue);
struct EdgeEntry {
vertexAddress @0 :Address;
edgeAddress @1 :Address;
edgeTypeId @2 :UInt16;
}
}
struct TypedValue {
union {
nullType @0 :Void;
bool @1 :Bool;
integer @2 :Int64;
double @3 :Float64;
string @4 :Text;
list @5 :List(TypedValue);
map @6 :List(Entry);
vertex @7 :VertexAccessor;
edge @8 :EdgeAccessor;
path @9 :Path;
}
struct Entry {
key @0 :Text;
value @1 :TypedValue;
}
struct VertexAccessor {
address @0 :UInt64;
old @1 :Vertex;
new @2: Vertex;
}
struct EdgeAccessor {
address @0 :UInt64;
old @1 :Edge;
new @2: Edge;
}
struct Path {
vertices @0 :List(VertexAccessor);
edges @1 :List(EdgeAccessor);
}
}

View File

@ -1,120 +0,0 @@
#include "distributed/serialization.hpp"
namespace {
template <class TAddress>
void SaveAddress(TAddress address,
distributed::capnp::Address::Builder *builder,
int16_t worker_id) {
builder->setGid(address.is_local() ? address.local()->gid_ : address.gid());
builder->setWorkerId(address.is_local() ? worker_id : address.worker_id());
}
storage::VertexAddress LoadVertexAddress(
const distributed::capnp::Address::Reader &reader) {
return {reader.getGid(), reader.getWorkerId()};
}
storage::EdgeAddress LoadEdgeAddress(
const distributed::capnp::Address::Reader &reader) {
return {reader.getGid(), reader.getWorkerId()};
}
void SaveProperties(
const PropertyValueStore &props,
::capnp::List<distributed::capnp::PropertyValue>::Builder *builder) {
int64_t i = 0;
for (const auto &kv : props) {
auto prop_builder = (*builder)[i];
prop_builder.setId(kv.first.Id());
auto value_builder = prop_builder.initValue();
utils::SaveCapnpTypedValue(kv.second, &value_builder);
++i;
}
}
PropertyValueStore LoadProperties(
const ::capnp::List<distributed::capnp::PropertyValue>::Reader &reader) {
PropertyValueStore props;
for (const auto &prop_reader : reader) {
query::TypedValue value;
utils::LoadCapnpTypedValue(prop_reader.getValue(), &value);
props.set(storage::Property(prop_reader.getId()), value);
}
return props;
}
} // namespace
namespace distributed {
void SaveVertex(const Vertex &vertex, capnp::Vertex::Builder *builder,
int16_t worker_id) {
auto save_edges = [worker_id](const auto &edges, auto *edges_builder) {
int64_t i = 0;
for (const auto &edge : edges) {
auto edge_builder = (*edges_builder)[i];
auto vertex_addr_builder = edge_builder.initVertexAddress();
SaveAddress(edge.vertex, &vertex_addr_builder, worker_id);
auto edge_addr_builder = edge_builder.initEdgeAddress();
SaveAddress(edge.edge, &edge_addr_builder, worker_id);
edge_builder.setEdgeTypeId(edge.edge_type.Id());
++i;
}
};
auto out_builder = builder->initOutEdges(vertex.out_.size());
save_edges(vertex.out_, &out_builder);
auto in_builder = builder->initInEdges(vertex.in_.size());
save_edges(vertex.in_, &in_builder);
auto labels_builder = builder->initLabelIds(vertex.labels_.size());
for (size_t i = 0; i < vertex.labels_.size(); ++i) {
labels_builder.set(i, vertex.labels_[i].Id());
}
auto properties_builder = builder->initProperties(vertex.properties_.size());
SaveProperties(vertex.properties_, &properties_builder);
}
template <>
std::unique_ptr<Vertex> LoadVertex(const capnp::Vertex::Reader &reader) {
auto vertex = std::make_unique<Vertex>();
auto load_edges = [](const auto &edges_reader) {
Edges edges;
for (const auto &edge_reader : edges_reader) {
auto vertex_address = LoadVertexAddress(edge_reader.getVertexAddress());
auto edge_address = LoadEdgeAddress(edge_reader.getEdgeAddress());
storage::EdgeType edge_type(edge_reader.getEdgeTypeId());
edges.emplace(vertex_address, edge_address, edge_type);
}
return edges;
};
vertex->out_ = load_edges(reader.getOutEdges());
vertex->in_ = load_edges(reader.getInEdges());
for (const auto &label_id : reader.getLabelIds()) {
vertex->labels_.emplace_back(label_id);
}
vertex->properties_ = LoadProperties(reader.getProperties());
return vertex;
}
void SaveEdge(const Edge &edge, capnp::Edge::Builder *builder,
int16_t worker_id) {
auto from_builder = builder->initFrom();
SaveAddress(edge.from_, &from_builder, worker_id);
auto to_builder = builder->initTo();
SaveAddress(edge.to_, &to_builder, worker_id);
builder->setTypeId(edge.edge_type_.Id());
auto properties_builder = builder->initProperties(edge.properties_.size());
SaveProperties(edge.properties_, &properties_builder);
}
template <>
std::unique_ptr<Edge> LoadEdge(const capnp::Edge::Reader &reader) {
auto from = LoadVertexAddress(reader.getFrom());
auto to = LoadVertexAddress(reader.getTo());
auto edge =
std::make_unique<Edge>(from, to, storage::EdgeType{reader.getTypeId()});
edge->properties_ = LoadProperties(reader.getProperties());
return edge;
}
} // namespace distributed

View File

@ -1,209 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "distributed/serialization.capnp.h"
#include "storage/address_types.hpp"
#include "storage/edge.hpp"
#include "storage/types.hpp"
#include "storage/vertex.hpp"
#include "utils/serialization.hpp"
namespace distributed {
namespace impl {
// Saves the given address into the given archive. Converts a local address to a
// global one, using the given worker_id.
template <typename TArchive, typename TAddress>
void SaveAddress(TArchive &ar, TAddress address, int worker_id) {
if (address.is_local()) {
ar << address.local()->gid_;
ar << worker_id;
} else {
ar << address.gid();
ar << address.worker_id();
}
};
// Saves the given properties into the given archive.
template <typename TArchive>
void SaveProperties(TArchive &ar, const PropertyValueStore &props) {
ar << props.size();
for (auto &kv : props) {
ar << kv.first.Id();
utils::SaveTypedValue(ar, kv.second);
}
}
} // namespace impl
void SaveVertex(const Vertex &vertex, capnp::Vertex::Builder *builder,
int16_t worker_id);
/**
* Saves the given vertex into the given Boost archive.
*
* @param ar - Archive into which to serialize.
* @param vertex - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveVertex(TArchive &ar, const Vertex &vertex, int worker_id) {
auto save_edges = [&ar, worker_id](auto &edges) {
ar << edges.size();
for (auto &edge_struct : edges) {
impl::SaveAddress(ar, edge_struct.vertex, worker_id);
impl::SaveAddress(ar, edge_struct.edge, worker_id);
ar << edge_struct.edge_type.Id();
}
};
save_edges(vertex.out_);
save_edges(vertex.in_);
ar << vertex.labels_.size();
for (auto &label : vertex.labels_) {
ar << label.Id();
}
impl::SaveProperties(ar, vertex.properties_);
}
void SaveEdge(const Edge &edge, capnp::Edge::Builder *builder,
int16_t worker_id);
/**
* Saves the given edge into the given Boost archive.
*
* @param - Archive into which to serialize.
* @param edge - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveEdge(TArchive &ar, const Edge &edge, int worker_id) {
impl::SaveAddress(ar, edge.from_, worker_id);
impl::SaveAddress(ar, edge.to_, worker_id);
ar << edge.edge_type_.Id();
impl::SaveProperties(ar, edge.properties_);
}
/// Alias for `SaveEdge` allowing for param type resolution.
inline void SaveElement(const Edge &record, capnp::Edge::Builder *builder,
int16_t worker_id) {
return SaveEdge(record, builder, worker_id);
}
/// Alias for `SaveVertex` allowing for param type resolution.
inline void SaveElement(const Vertex &record, capnp::Vertex::Builder *builder,
int16_t worker_id) {
return SaveVertex(record, builder, worker_id);
}
/// Alias for `SaveEdge` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Edge &record, int worker_id) {
return SaveEdge(ar, record, worker_id);
}
/// Alias for `SaveVertex` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Vertex &record, int worker_id) {
return SaveVertex(ar, record, worker_id);
}
namespace impl {
template <typename TArchive>
storage::VertexAddress LoadVertexAddress(TArchive &ar) {
gid::Gid vertex_id;
ar >> vertex_id;
int worker_id;
ar >> worker_id;
return {vertex_id, worker_id};
}
template <typename TArchive>
void LoadProperties(TArchive &ar, PropertyValueStore &store) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Property::IdT prop;
ar >> prop;
query::TypedValue value;
utils::LoadTypedValue(ar, value);
store.set(storage::Property(prop), static_cast<PropertyValue>(value));
}
}
} // namespace impl
/**
* Loads a Vertex from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Vertex> LoadVertex(TArchive &ar) {
auto vertex = std::make_unique<Vertex>();
auto decode_edges = [&ar](Edges &edges) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
auto vertex_address = impl::LoadVertexAddress(ar);
storage::EdgeType::IdT edge_type;
gid::Gid edge_id;
ar >> edge_id;
int edge_worker_id;
ar >> edge_worker_id;
ar >> edge_type;
edges.emplace(vertex_address, {edge_id, edge_worker_id},
storage::EdgeType(edge_type));
}
};
decode_edges(vertex->out_);
decode_edges(vertex->in_);
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Label::IdT label;
ar >> label;
vertex->labels_.emplace_back(label);
}
impl::LoadProperties(ar, vertex->properties_);
return vertex;
}
template <>
std::unique_ptr<Vertex> LoadVertex(const capnp::Vertex::Reader &reader);
/**
* Loads an Edge from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Edge> LoadEdge(TArchive &ar) {
auto from = impl::LoadVertexAddress(ar);
auto to = impl::LoadVertexAddress(ar);
storage::EdgeType::IdT edge_type;
ar >> edge_type;
auto edge = std::make_unique<Edge>(from, to, storage::EdgeType{edge_type});
impl::LoadProperties(ar, edge->properties_);
return edge;
}
template <>
std::unique_ptr<Edge> LoadEdge(const capnp::Edge::Reader &reader);
} // namespace distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include "communication/rpc/messages.hpp"
#include "distributed/storage_gc_rpc_messages.capnp.h"
#include "io/network/endpoint.hpp"
#include "transactions/transaction.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc ran-local-gc
(:request
((local-oldest-active "tx::TransactionId" :capnp-type "UInt64")
(worker-id :int16_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "distributed/token_sharing_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc token-transfer
(:request ())
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,100 +0,0 @@
#pragma once
#include "distributed/rpc_worker_clients.hpp"
#include "storage/dynamic_graph_partitioner/dgp.hpp"
namespace communication::rpc {
class Server;
}
namespace database {
class GraphDb;
};
namespace distributed {
/// Shares the token between dynamic graph partitioners instances across workers
/// by passing the token from one worker to another, in a circular fashion. This
/// guarantees that no two workers will execute the dynamic graph partitioner
/// step in the same time.
class TokenSharingRpcServer {
public:
TokenSharingRpcServer(database::GraphDb *db, int worker_id,
distributed::Coordination *coordination,
communication::rpc::Server *server,
distributed::TokenSharingRpcClients *clients)
: worker_id_(worker_id),
coordination_(coordination),
server_(server),
clients_(clients),
dgp_(db) {
server_->Register<distributed::TokenTransferRpc>(
[this](const auto &req_reader, auto *res_builder) { token_ = true; });
runner_ = std::thread([this]() {
while (true) {
// Wait till we get the token
while (!token_) {
if (shutting_down_) break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
if (shutting_down_) break;
token_ = false;
dgp_.Run();
// Transfer token to next
auto workers = coordination_->GetWorkerIds();
sort(workers.begin(), workers.end());
int next_worker = -1;
auto pos = std::upper_bound(workers.begin(), workers.end(), worker_id_);
if (pos != workers.end()) {
next_worker = *pos;
} else {
next_worker = workers[0];
}
clients_->TransferToken(next_worker);
}
});
}
/// Starts the token sharing server which in turn starts the dynamic graph
/// partitioner.
void StartTokenSharing() {
started_ = true;
token_ = true;
}
~TokenSharingRpcServer() {
shutting_down_ = true;
if (runner_.joinable()) runner_.join();
if (started_ && worker_id_ == 0) {
// Wait till we get the token back otherwise some worker might try to
// migrate to another worker while that worker is shutting down or
// something else bad might happen
// TODO(dgleich): Solve this better in the future since this blocks
// shutting down until spinner steps complete
while (!token_) {
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
}
private:
int worker_id_;
distributed::Coordination *coordination_;
communication::rpc::Server *server_;
distributed::TokenSharingRpcClients *clients_;
std::atomic<bool> started_{false};
std::atomic<bool> token_{false};
std::atomic<bool> shutting_down_{false};
std::thread runner_;
DynamicGraphPartitioner dgp_;
};
} // namespace distributed

View File

@ -1,86 +0,0 @@
#pragma once
#include <functional>
#include <vector>
#include "communication/rpc/server.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "transactions/engine.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/scheduler.hpp"
namespace distributed {
/// Periodically calls `ClearTransactionalCache(oldest_transaction)` on all
/// registered objects.
class TransactionalCacheCleaner {
/// The wait time between two releases of local transaction objects that have
/// expired on the master.
static constexpr std::chrono::seconds kCacheReleasePeriod{1};
public:
template <typename... T>
TransactionalCacheCleaner(tx::Engine &tx_engine, T &... caches)
: tx_engine_(tx_engine) {
Register(caches...);
cache_clearing_scheduler_.Run(
"DistrTxCacheGc", kCacheReleasePeriod,
[this]() { this->Clear(tx_engine_.GlobalGcSnapshot().back()); });
}
protected:
/// Registers the given object for transactional cleaning. The object will
/// periodically get it's `ClearCache(tx::TransactionId)` method called
/// with the oldest active transaction id. Note that the ONLY guarantee for
/// the call param is that there are no transactions alive that have an id
/// lower than it.
template <typename TCache>
void Register(TCache &cache) {
functions_.emplace_back([&cache](tx::TransactionId oldest_active) {
cache.ClearTransactionalCache(oldest_active);
});
}
private:
template <typename TCache, typename... T>
void Register(TCache &cache, T &... caches) {
Register(cache);
Register(caches...);
}
void Clear(tx::TransactionId oldest_active) {
for (auto &f : functions_) f(oldest_active);
}
tx::Engine &tx_engine_;
std::vector<std::function<void(tx::TransactionId &oldest_active)>> functions_;
utils::Scheduler cache_clearing_scheduler_;
};
/// Registers a RPC server that listens for `WaitOnTransactionEnd` requests
/// that require all ongoing produces to finish. It also periodically calls
/// `ClearTransactionalCache` on all registered objects.
class WorkerTransactionalCacheCleaner : public TransactionalCacheCleaner {
public:
template <class... T>
WorkerTransactionalCacheCleaner(tx::WorkerEngine &tx_engine,
communication::rpc::Server &server,
ProduceRpcServer &produce_server,
T &... caches)
: TransactionalCacheCleaner(tx_engine, caches...),
rpc_server_(server),
produce_server_(produce_server) {
Register(tx_engine);
rpc_server_.Register<WaitOnTransactionEndRpc>([this](const auto &req_reader,
auto *res_builder) {
produce_server_.FinishAndClearOngoingProducePlans(req_reader.getMember());
});
}
private:
communication::rpc::Server &rpc_server_;
ProduceRpcServer &produce_server_;
};
} // namespace distributed

View File

@ -1,17 +0,0 @@
#>cpp
#pragma once
#include "distributed/transactional_cache_cleaner_rpc_messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "transactions/type.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc wait-on-transaction-end
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ()))
(lcp:pop-namespace)

View File

@ -1,116 +0,0 @@
#include <unordered_map>
#include <vector>
#include "distributed/updates_rpc_clients.hpp"
#include "query/exceptions.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
namespace {
void RaiseIfRemoteError(UpdateResult result) {
switch (result) {
case UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
throw query::RemoveAttachedVertexException();
case UpdateResult::SERIALIZATION_ERROR:
throw mvcc::SerializationError();
case UpdateResult::LOCK_TIMEOUT_ERROR:
throw utils::LockTimeoutException(
"Remote LockTimeoutError during edge creation");
case UpdateResult::UPDATE_DELETED_ERROR:
throw RecordDeletedError();
case UpdateResult::DONE:
break;
}
}
}
UpdateResult UpdatesRpcClients::Update(int worker_id,
const database::StateDelta &delta) {
auto res = worker_clients_.GetClientPool(worker_id).Call<UpdateRpc>(delta);
CHECK(res) << "UpdateRpc failed on worker: " << worker_id;
return res->member;
}
gid::Gid UpdatesRpcClients::CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto res = worker_clients_.GetClientPool(worker_id).Call<CreateVertexRpc>(
CreateVertexReqData{tx_id, labels, properties});
CHECK(res) << "CreateVertexRpc failed on worker: " << worker_id;
CHECK(res->member.result == UpdateResult::DONE)
<< "Remote Vertex creation result not UpdateResult::DONE";
return res->member.gid;
}
storage::EdgeAddress UpdatesRpcClients::CreateEdge(
tx::TransactionId tx_id, VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(from.address().is_remote()) << "In CreateEdge `from` must be remote";
int from_worker = from.address().worker_id();
auto res = worker_clients_.GetClientPool(from_worker)
.Call<CreateEdgeRpc>(CreateEdgeReqData{
from.gid(), to.GlobalAddress(), edge_type, tx_id});
CHECK(res) << "CreateEdge RPC failed on worker: " << from_worker;
RaiseIfRemoteError(res->member.result);
return {res->member.gid, from_worker};
}
void UpdatesRpcClients::AddInEdge(tx::TransactionId tx_id,
VertexAccessor &from,
storage::EdgeAddress edge_address,
VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(to.address().is_remote() && edge_address.is_remote() &&
(from.GlobalAddress().worker_id() != to.address().worker_id()))
<< "AddInEdge should only be called when `to` is remote and "
"`from` is not on the same worker as `to`.";
auto worker_id = to.GlobalAddress().worker_id();
auto res = worker_clients_.GetClientPool(worker_id).Call<AddInEdgeRpc>(
AddInEdgeReqData{from.GlobalAddress(), edge_address, to.gid(), edge_type,
tx_id});
CHECK(res) << "AddInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveVertex(int worker_id, tx::TransactionId tx_id,
gid::Gid gid, bool check_empty) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveVertexRpc>(
RemoveVertexReqData{gid, tx_id, check_empty});
CHECK(res) << "RemoveVertex RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid edge_gid, gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveEdgeRpc>(
RemoveEdgeData{tx_id, edge_gid, vertex_from_id, vertex_to_addr});
CHECK(res) << "RemoveEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id,
storage::EdgeAddress edge_address) {
CHECK(edge_address.is_remote()) << "RemoveInEdge edge_address is local.";
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveInEdgeRpc>(
RemoveInEdgeData{tx_id, vertex_id, edge_address});
CHECK(res) << "RemoveInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
std::vector<utils::Future<UpdateResult>> UpdatesRpcClients::UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id) {
return worker_clients_.ExecuteOnWorkers<UpdateResult>(
skip_worker_id, [tx_id](int worker_id, auto &client) {
auto res = client.template Call<UpdateApplyRpc>(tx_id);
CHECK(res) << "UpdateApplyRpc failed";
return res->member;
});
}
} // namespace distributed

View File

@ -1,76 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "database/state_delta.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Exposes the functionality to send updates to other workers (that own the
/// graph element we are updating). Also enables us to call for a worker to
/// apply the accumulated deferred updates, or discard them.
class UpdatesRpcClients {
public:
explicit UpdatesRpcClients(RpcWorkerClients &clients)
: worker_clients_(clients) {}
/// Sends an update delta to the given worker.
UpdateResult Update(int worker_id, const database::StateDelta &delta);
/// Creates a vertex on the given worker and returns it's id.
gid::Gid CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates an edge on the given worker and returns it's address. If the `to`
/// vertex is on the same worker as `from`, then all remote CRUD will be
/// handled by a call to this function. Otherwise a separate call to
/// `AddInEdge` might be necessary. Throws all the exceptions that can
/// occur remotely as a result of updating a vertex.
storage::EdgeAddress CreateEdge(tx::TransactionId tx_id,
VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type);
/// Adds the edge with the given address to the `to` vertex as an incoming
/// edge. Only used when `to` is remote and not on the same worker as `from`.
void AddInEdge(tx::TransactionId tx_id, VertexAccessor &from,
storage::EdgeAddress edge_address, VertexAccessor &to,
storage::EdgeType edge_type);
/// Removes a vertex from the other worker.
void RemoveVertex(int worker_id, tx::TransactionId tx_id, gid::Gid gid,
bool check_empty);
/// Removes an edge on another worker. This also handles the `from` vertex
/// outgoing edge, as that vertex is on the same worker as the edge. If the
/// `to` vertex is on the same worker, then that side is handled too by the
/// single RPC call, otherwise a separate call has to be made to
/// RemoveInEdge.
void RemoveEdge(tx::TransactionId tx_id, int worker_id, gid::Gid edge_gid,
gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr);
void RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id, storage::EdgeAddress edge_address);
/// Calls for all the workers (except the given one) to apply their updates
/// and returns the future results.
std::vector<utils::Future<UpdateResult>> UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id);
private:
RpcWorkerClients &worker_clients_;
};
} // namespace distributed

View File

@ -1,187 +0,0 @@
#>cpp
#pragma once
#include <unordered_map>
#include "communication/rpc/messages.hpp"
#include "database/state_delta.hpp"
#include "distributed/updates_rpc_messages.capnp.h"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'db "/database/state_delta.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
(lcp:capnp-type-conversion "gid::Gid" "UInt64")
(lcp:capnp-type-conversion "storage::Label" "Storage.Common")
(lcp:capnp-type-conversion "storage::EdgeType" "Storage.Common")
(lcp:capnp-type-conversion "storage::Property" "Storage.Common")
(lcp:capnp-type-conversion "storage::EdgeAddress" "Storage.Address")
(lcp:capnp-type-conversion "storage::VertexAddress" "Storage.Address")
(lcp:define-enum update-result
(done
serialization-error
lock-timeout-error
update-deleted-error
unable-to-delete-vertex-error)
(:documentation "The result of sending or applying a deferred update to a worker.")
(:serialize))
(lcp:define-rpc update
(:request ((member "database::StateDelta" :capnp-type "Db.StateDelta")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-rpc update-apply
(:request ((member "tx::TransactionId")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct create-result ()
((result "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult"))
(gid "gid::Gid" :documentation "Only valid if creation was successful."))
(:serialize :boost :capnp))
(lcp:define-struct create-vertex-req-data ()
((tx-id "tx::TransactionId")
(labels "std::vector<storage::Label>"
:capnp-save (lcp:capnp-save-vector "storage::capnp::Common" "storage::Label")
:capnp-load (lcp:capnp-load-vector "storage::capnp::Common" "storage::Label"))
(properties "std::unordered_map<storage::Property, query::TypedValue>"
:save-fun
#>cpp
ar << properties.size();
for (auto &kv : properties) {
ar << kv.first;
utils::SaveTypedValue(ar, kv.second);
}
cpp<#
:load-fun
#>cpp
size_t props_size;
ar >> props_size;
for (size_t i = 0; i < props_size; ++i) {
storage::Property p;
ar >> p;
query::TypedValue tv;
utils::LoadTypedValue(ar, tv);
properties.emplace(p, std::move(tv));
}
cpp<#
:capnp-type "Utils.Map(Storage.Common, Dis.TypedValue)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<storage::capnp::Common, capnp::TypedValue>(
${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
entry.first.Save(&key_builder);
auto value_builder = builder->initValue();
utils::SaveCapnpTypedValue(entry.second, &value_builder);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<storage::capnp::Common, capnp::TypedValue>(
&${member}, ${reader},
[](const auto &reader) {
storage::Property prop;
prop.Load(reader.getKey());
query::TypedValue value;
utils::LoadCapnpTypedValue(reader.getValue(), &value);
return std::make_pair(prop, value);
});
cpp<#)))
(:serialize :capnp))
(lcp:define-rpc create-vertex
(:request ((member "CreateVertexReqData")))
(:response ((member "CreateResult"))))
(lcp:define-struct create-edge-req-data ()
((from "gid::Gid")
(to "storage::VertexAddress")
(edge-type "storage::EdgeType")
(tx-id "tx::TransactionId"))
(:serialize :capnp))
(lcp:define-rpc create-edge
(:request ((member "CreateEdgeReqData")))
(:response ((member "CreateResult"))))
(lcp:define-struct add-in-edge-req-data ()
((from "storage::VertexAddress")
(edge-address "storage::EdgeAddress")
(to "gid::Gid")
(edge-type "storage::EdgeType")
(tx-id "tx::TransactionId"))
(:serialize :capnp))
(lcp:define-rpc add-in-edge
(:request ((member "AddInEdgeReqData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-vertex-req-data ()
((gid "gid::Gid")
(tx-id "tx::TransactionId")
(check-empty :bool))
(:serialize :capnp))
(lcp:define-rpc remove-vertex
(:request ((member "RemoveVertexReqData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-edge-data ()
((tx-id "tx::TransactionId")
(edge-id "gid::Gid")
(vertex-from-id "gid::Gid")
(vertex-to-address "storage::VertexAddress"))
(:serialize :capnp))
(lcp:define-rpc remove-edge
(:request ((member "RemoveEdgeData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-in-edge-data ()
((tx-id "tx::TransactionId")
(vertex "gid::Gid")
(edge-address "storage::EdgeAddress"))
(:serialize :capnp))
(lcp:define-rpc remove-in-edge
(:request ((member "RemoveInEdgeData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,385 +0,0 @@
#include <utility>
#include "glog/logging.h"
#include "distributed/updates_rpc_server.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Emplace(
const database::StateDelta &delta) {
auto gid = std::is_same<TRecordAccessor, VertexAccessor>::value
? delta.vertex_id
: delta.edge_id;
std::lock_guard<utils::SpinLock> guard{lock_};
auto found = deltas_.find(gid);
if (found == deltas_.end()) {
found =
deltas_
.emplace(gid, std::make_pair(FindAccessor(gid),
std::vector<database::StateDelta>{}))
.first;
}
found->second.second.emplace_back(delta);
// TODO call `RecordAccessor::update` to force serialization errors to
// fail-fast (as opposed to when all the deltas get applied).
//
// This is problematic because `VersionList::update` needs to become
// thread-safe within the same transaction. Note that the concurrency is
// possible both between the owner worker interpretation thread and an RPC
// thread (current thread), as well as multiple RPC threads if this
// object's lock is released (perhaps desirable).
//
// A potential solution *might* be that `LockStore::Lock` returns a `bool`
// indicating if the caller was the one obtaining the lock (not the same
// as lock already being held by the same transaction).
//
// Another thing that needs to be done (if we do this) is ensuring that
// `LockStore::Take` is thread-safe when called in parallel in the same
// transaction. Currently it's thread-safe only when called in parallel
// from different transactions (only one manages to take the RecordLock).
//
// Deferring the implementation of this as it's tricky, and essentially an
// optimization.
//
// try {
// found->second.first.update();
// } catch (const mvcc::SerializationError &) {
// return UpdateResult::SERIALIZATION_ERROR;
// } catch (const RecordDeletedError &) {
// return UpdateResult::UPDATE_DELETED_ERROR;
// } catch (const utils::LockTimeoutException &) {
// return UpdateResult::LOCK_TIMEOUT_ERROR;
// }
return UpdateResult::DONE;
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto result = db_accessor_.InsertVertex();
for (auto &label : labels) result.add_label(label);
for (auto &kv : properties) result.PropsSet(kv.first, kv.second);
std::lock_guard<utils::SpinLock> guard{lock_};
deltas_.emplace(result.gid(),
std::make_pair(result, std::vector<database::StateDelta>{}));
return result.gid();
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateEdge(
gid::Gid from, storage::VertexAddress to, storage::EdgeType edge_type) {
auto &db = db_accessor_.db();
auto from_addr = db.storage().LocalizedAddressIfPossible(
storage::VertexAddress(from, db.WorkerId()));
auto to_addr = db.storage().LocalizedAddressIfPossible(to);
auto edge = db_accessor_.InsertOnlyEdge(from_addr, to_addr, edge_type);
std::lock_guard<utils::SpinLock> guard{lock_};
deltas_.emplace(edge.gid(),
std::make_pair(edge, std::vector<database::StateDelta>{}));
return edge.gid();
}
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Apply() {
std::lock_guard<utils::SpinLock> guard{lock_};
for (auto &kv : deltas_) {
auto &record_accessor = kv.second.first;
// We need to reconstruct the record as in the meantime some local
// update might have updated it.
record_accessor.Reconstruct();
for (database::StateDelta &delta : kv.second.second) {
try {
auto &dba = db_accessor_;
switch (delta.type) {
case database::StateDelta::Type::TRANSACTION_BEGIN:
case database::StateDelta::Type::TRANSACTION_COMMIT:
case database::StateDelta::Type::TRANSACTION_ABORT:
case database::StateDelta::Type::CREATE_VERTEX:
case database::StateDelta::Type::CREATE_EDGE:
case database::StateDelta::Type::BUILD_INDEX:
LOG(FATAL) << "Can only apply record update deltas for remote "
"graph element";
case database::StateDelta::Type::REMOVE_VERTEX:
if (!db_accessor().RemoveVertex(
reinterpret_cast<VertexAccessor &>(record_accessor),
delta.check_empty)) {
return UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR;
}
break;
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
case database::StateDelta::Type::SET_PROPERTY_EDGE:
record_accessor.PropsSet(delta.property, delta.value);
break;
case database::StateDelta::Type::ADD_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.add_label(delta.label);
break;
case database::StateDelta::Type::REMOVE_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.remove_label(delta.label);
break;
case database::StateDelta::Type::ADD_OUT_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.out_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_to_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::ADD_IN_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.in_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_from_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::REMOVE_EDGE:
// We only remove the edge as a result of this StateDelta,
// because the removal of edge from vertex in/out is performed
// in REMOVE_[IN/OUT]_EDGE deltas.
db_accessor_.RemoveEdge(
reinterpret_cast<EdgeAccessor &>(record_accessor), false,
false);
break;
case database::StateDelta::Type::REMOVE_OUT_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveOutEdge(delta.edge_address);
break;
case database::StateDelta::Type::REMOVE_IN_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveInEdge(delta.edge_address);
break;
}
} catch (const mvcc::SerializationError &) {
return UpdateResult::SERIALIZATION_ERROR;
} catch (const RecordDeletedError &) {
return UpdateResult::UPDATE_DELETED_ERROR;
} catch (const utils::LockTimeoutException &) {
return UpdateResult::LOCK_TIMEOUT_ERROR;
}
}
}
return UpdateResult::DONE;
}
UpdatesRpcServer::UpdatesRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db) {
server.Register<UpdateRpc>([this](const auto &req_reader, auto *res_builder) {
UpdateReq req;
req.Load(req_reader);
using DeltaType = database::StateDelta::Type;
auto &delta = req.member;
switch (delta.type) {
case DeltaType::SET_PROPERTY_VERTEX:
case DeltaType::ADD_LABEL:
case DeltaType::REMOVE_LABEL:
case database::StateDelta::Type::REMOVE_OUT_EDGE:
case database::StateDelta::Type::REMOVE_IN_EDGE: {
UpdateRes res(
GetUpdates(vertex_updates_, delta.transaction_id).Emplace(delta));
res.Save(res_builder);
return;
}
case DeltaType::SET_PROPERTY_EDGE: {
UpdateRes res(
GetUpdates(edge_updates_, delta.transaction_id).Emplace(delta));
res.Save(res_builder);
return;
}
default:
LOG(FATAL) << "Can't perform a remote update with delta type: "
<< static_cast<int>(req.member.type);
}
});
server.Register<UpdateApplyRpc>(
[this](const auto &req_reader, auto *res_builder) {
UpdateApplyReq req;
req.Load(req_reader);
UpdateApplyRes res(Apply(req.member));
res.Save(res_builder);
});
server.Register<CreateVertexRpc>([this](const auto &req_reader,
auto *res_builder) {
CreateVertexReq req;
req.Load(req_reader);
gid::Gid gid = GetUpdates(vertex_updates_, req.member.tx_id)
.CreateVertex(req.member.labels, req.member.properties);
CreateVertexRes res(CreateResult{UpdateResult::DONE, gid});
res.Save(res_builder);
});
server.Register<CreateEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
CreateEdgeReq req;
req.Load(req_reader);
auto data = req.member;
auto creation_result = CreateEdge(data);
// If `from` and `to` are both on this worker, we handle it in this
// RPC call. Do it only if CreateEdge succeeded.
if (creation_result.result == UpdateResult::DONE &&
data.to.worker_id() == db_.WorkerId()) {
auto to_delta = database::StateDelta::AddInEdge(
data.tx_id, data.to.gid(), {data.from, db_.WorkerId()},
{creation_result.gid, db_.WorkerId()}, data.edge_type);
creation_result.result =
GetUpdates(vertex_updates_, data.tx_id).Emplace(to_delta);
}
CreateEdgeRes res(creation_result);
res.Save(res_builder);
});
server.Register<AddInEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
AddInEdgeReq req;
req.Load(req_reader);
auto to_delta = database::StateDelta::AddInEdge(
req.member.tx_id, req.member.to, req.member.from,
req.member.edge_address, req.member.edge_type);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
AddInEdgeRes res(result);
res.Save(res_builder);
});
server.Register<RemoveVertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveVertexReq req;
req.Load(req_reader);
auto to_delta = database::StateDelta::RemoveVertex(
req.member.tx_id, req.member.gid, req.member.check_empty);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
RemoveVertexRes res(result);
res.Save(res_builder);
});
server.Register<RemoveEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveEdgeReq req;
req.Load(req_reader);
RemoveEdgeRes res(RemoveEdge(req.member));
res.Save(res_builder);
});
server.Register<RemoveInEdgeRpc>([this](const auto &req_reader,
auto *res_builder) {
RemoveInEdgeReq req;
req.Load(req_reader);
auto data = req.member;
RemoveInEdgeRes res(GetUpdates(vertex_updates_, data.tx_id)
.Emplace(database::StateDelta::RemoveInEdge(
data.tx_id, data.vertex, data.edge_address)));
res.Save(res_builder);
});
}
UpdateResult UpdatesRpcServer::Apply(tx::TransactionId tx_id) {
auto apply = [tx_id](auto &collection) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found == access.end()) {
return UpdateResult::DONE;
}
auto result = found->second.Apply();
access.remove(tx_id);
return result;
};
auto vertex_result = apply(vertex_updates_);
auto edge_result = apply(edge_updates_);
if (vertex_result != UpdateResult::DONE) return vertex_result;
if (edge_result != UpdateResult::DONE) return edge_result;
return UpdateResult::DONE;
}
void UpdatesRpcServer::ClearTransactionalCache(
tx::TransactionId oldest_active) {
auto vertex_access = vertex_updates_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edge_updates_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
UpdatesRpcServer::TransactionUpdates<TAccessor> &UpdatesRpcServer::GetUpdates(
MapT<TAccessor> &updates, tx::TransactionId tx_id) {
return updates.access()
.emplace(tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_), tx_id))
.first->second;
}
CreateResult UpdatesRpcServer::CreateEdge(const CreateEdgeReqData &req) {
auto gid = GetUpdates(edge_updates_, req.tx_id)
.CreateEdge(req.from, req.to, req.edge_type);
auto from_delta = database::StateDelta::AddOutEdge(
req.tx_id, req.from, req.to, {gid, db_.WorkerId()}, req.edge_type);
auto result = GetUpdates(vertex_updates_, req.tx_id).Emplace(from_delta);
return {result, gid};
}
UpdateResult UpdatesRpcServer::RemoveEdge(const RemoveEdgeData &data) {
// Edge removal.
auto deletion_delta =
database::StateDelta::RemoveEdge(data.tx_id, data.edge_id);
auto result = GetUpdates(edge_updates_, data.tx_id).Emplace(deletion_delta);
// Out-edge removal, for sure is local.
if (result == UpdateResult::DONE) {
auto remove_out_delta = database::StateDelta::RemoveOutEdge(
data.tx_id, data.vertex_from_id, {data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_out_delta);
}
// In-edge removal, might not be local.
if (result == UpdateResult::DONE &&
data.vertex_to_address.worker_id() == db_.WorkerId()) {
auto remove_in_delta = database::StateDelta::RemoveInEdge(
data.tx_id, data.vertex_to_address.gid(),
{data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_in_delta);
}
return result;
}
template <>
VertexAccessor
UpdatesRpcServer::TransactionUpdates<VertexAccessor>::FindAccessor(
gid::Gid gid) {
return db_accessor_.FindVertex(gid, false);
}
template <>
EdgeAccessor UpdatesRpcServer::TransactionUpdates<EdgeAccessor>::FindAccessor(
gid::Gid gid) {
return db_accessor_.FindEdge(gid, false);
}
} // namespace distributed

View File

@ -1,104 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/edge_accessor.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "storage/vertex_accessor.hpp"
#include "transactions/type.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
/// An RPC server that accepts and holds deferred updates (deltas) until it's
/// told to apply or discard them. The updates are organized and applied per
/// transaction in this single updates server.
///
/// Attempts to get serialization and update-after-delete errors to happen as
/// soon as possible during query execution (fail fast).
class UpdatesRpcServer {
// Remote updates for one transaction.
template <typename TRecordAccessor>
class TransactionUpdates {
public:
TransactionUpdates(database::GraphDb &db, tx::TransactionId tx_id)
: db_accessor_(db, tx_id) {}
/// Adds a delta and returns the result. Does not modify the state (data) of
/// the graph element the update is for, but calls the `update` method to
/// fail-fast on serialization and update-after-delete errors.
UpdateResult Emplace(const database::StateDelta &delta);
/// Creates a new vertex and returns it's gid.
gid::Gid CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates a new edge and returns it's gid. Does not update vertices at the
/// end of the edge.
gid::Gid CreateEdge(gid::Gid from, storage::VertexAddress to,
storage::EdgeType edge_type);
/// Applies all the deltas on the record.
UpdateResult Apply();
auto &db_accessor() { return db_accessor_; }
private:
database::GraphDbAccessor db_accessor_;
std::unordered_map<
gid::Gid, std::pair<TRecordAccessor, std::vector<database::StateDelta>>>
deltas_;
// Multiple workers might be sending remote updates concurrently.
utils::SpinLock lock_;
// Helper method specialized for [Vertex|Edge]Accessor.
TRecordAccessor FindAccessor(gid::Gid gid);
};
public:
UpdatesRpcServer(database::GraphDb &db, communication::rpc::Server &server);
/// Applies all existsing updates for the given transaction ID. If there are
/// no updates for that transaction, nothing happens. Clears the updates cache
/// after applying them, regardless of the result.
UpdateResult Apply(tx::TransactionId tx_id);
/// Clears the cache of local transactions that are completed. The signature
/// of this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
template <typename TAccessor>
using MapT =
ConcurrentMap<tx::TransactionId, TransactionUpdates<TAccessor>>;
MapT<VertexAccessor> vertex_updates_;
MapT<EdgeAccessor> edge_updates_;
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
TransactionUpdates<TAccessor> &GetUpdates(MapT<TAccessor> &updates,
tx::TransactionId tx_id);
// Performs edge creation for the given request.
CreateResult CreateEdge(const CreateEdgeReqData &req);
// Performs edge removal for the given request.
UpdateResult RemoveEdge(const RemoveEdgeData &data);
};
} // namespace distributed

View File

@ -1,9 +0,0 @@
@0xb3d70bc0576218f3;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("durability::capnp");
struct RecoveryInfo {
snapshotTxId @0 :UInt64;
maxWalTxId @1 :UInt64;
}

View File

@ -5,7 +5,6 @@
#include "database/graph_db.hpp"
#include "durability/hashed_file_reader.hpp"
#include "durability/recovery.capnp.h"
#include "storage/vertex_accessor.hpp"
#include "transactions/type.hpp"
@ -25,25 +24,6 @@ struct RecoveryInfo {
max_wal_tx_id == other.max_wal_tx_id;
}
bool operator!=(const RecoveryInfo &other) const { return !(*this == other); }
void Save(capnp::RecoveryInfo::Builder *builder) const {
builder->setSnapshotTxId(snapshot_tx_id);
builder->setMaxWalTxId(max_wal_tx_id);
}
void Load(const capnp::RecoveryInfo::Reader &reader) {
snapshot_tx_id = reader.getSnapshotTxId();
max_wal_tx_id = reader.getMaxWalTxId();
}
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &snapshot_tx_id;
ar &max_wal_tx_id;
}
};
/** Reads snapshot metadata from the end of the file without messing up the

View File

@ -4,29 +4,5 @@ set(io_src_files
network/socket.cpp
network/utils.cpp)
# Use this function to add each capnp file to generation. This way each file is
# standalone and we avoid recompiling everything.
# NOTE: io_src_files and io_capnp_files are globally updated.
# TODO: This is duplicated from src/CMakeLists.txt, find a good way to
# generalize this on per subdirectory basis.
function(add_capnp capnp_src_file)
set(cpp_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.c++)
set(h_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.h)
add_custom_command(OUTPUT ${cpp_file} ${h_file}
COMMAND ${CAPNP_EXE} compile -o${CAPNP_CXX_EXE} ${capnp_src_file} -I ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file} capnproto-proj
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Update *global* io_capnp_files
set(io_capnp_files ${io_capnp_files} ${cpp_file} ${h_file} PARENT_SCOPE)
# Update *global* io_src_files
set(io_src_files ${io_src_files} ${cpp_file} PARENT_SCOPE)
endfunction(add_capnp)
add_capnp(network/endpoint.capnp)
add_custom_target(generate_io_capnp DEPENDS ${io_capnp_files})
add_library(mg-io STATIC ${io_src_files})
target_link_libraries(mg-io stdc++fs Threads::Threads fmt glog mg-utils)
target_link_libraries(mg-io capnp kj)
add_dependencies(mg-io generate_io_capnp)

View File

@ -1,10 +0,0 @@
@0x93c2449a1e02365a;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("io::network::capnp");
struct Endpoint {
address @0 :Text;
port @1 :UInt16;
family @2 :UInt8;
}

View File

@ -24,18 +24,6 @@ Endpoint::Endpoint(const std::string &address, uint16_t port)
CHECK(family_ != 0) << "Not a valid IPv4 or IPv6 address: " << address;
}
void Endpoint::Save(capnp::Endpoint::Builder *builder) const {
builder->setAddress(address_);
builder->setPort(port_);
builder->setFamily(family_);
}
void Endpoint::Load(const capnp::Endpoint::Reader &reader) {
address_ = reader.getAddress();
port_ = reader.getPort();
family_ = reader.getFamily();
}
bool Endpoint::operator==(const Endpoint &other) const {
return address_ == other.address_ && port_ == other.port_ &&
family_ == other.family_;

View File

@ -5,7 +5,6 @@
#include <iostream>
#include <string>
#include "io/network/endpoint.capnp.h"
#include "utils/exceptions.hpp"
namespace io::network {
@ -27,9 +26,6 @@ class Endpoint {
bool operator==(const Endpoint &other) const;
friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint);
void Save(capnp::Endpoint::Builder *builder) const;
void Load(const capnp::Endpoint::Reader &reader);
private:
std::string address_;
uint16_t port_{0};

View File

@ -1514,7 +1514,6 @@ code generation."
(when schema (write-line schema out))))))
;; Now generate the save/load C++ code in the cpp file.
(write-line "// Autogenerated Cap'n Proto serialization code" cpp-out)
(write-line "#include \"utils/serialization.hpp\"" cpp-out)
(let (open-namespaces)
(dolist (cpp-class (remove-if (lambda (cpp-type) (not (typep cpp-type 'cpp-class))) cpp-types))
;; Check if we need to open or close namespaces

View File

@ -12,9 +12,9 @@
#include <glog/logging.h>
#include "communication/bolt/v1/session.hpp"
#include "communication/server.hpp"
#include "config.hpp"
#include "database/graph_db.hpp"
#include "stats/stats.hpp"
#include "telemetry/telemetry.hpp"
#include "utils/flag_validation.hpp"
#include "utils/signals.hpp"
@ -103,8 +103,7 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
/// Run the Memgraph server.
///
/// Sets up all the required state before running `memgraph_main` and does any
/// required cleanup afterwards. `get_stats_prefix` is used to obtain the
/// prefix when logging Memgraph's statistics.
/// required cleanup afterwards.
///
/// Command line arguments and configuration files are read before calling any
/// of the supplied functions. Therefore, you should use flags only from those
@ -116,8 +115,7 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
///
/// @code
/// int main(int argc, char *argv[]) {
/// auto get_stats_prefix = []() -> std::string { return "memgraph"; };
/// return WithInit(argc, argv, get_stats_prefix, SingleNodeMain);
/// return WithInit(argc, argv, SingleNodeMain);
/// }
/// @endcode
///
@ -126,8 +124,8 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
/// `InitSignalHandlers` with appropriate function to shutdown the server you
/// started.
int WithInit(int argc, char **argv,
const std::function<std::string()> &get_stats_prefix,
const std::function<void()> &memgraph_main) {
google::SetUsageMessage("Memgraph database server");
gflags::SetVersionString(version_string);
// Load config before parsing arguments, so that flags from the command line
@ -142,9 +140,6 @@ int WithInit(int argc, char **argv,
// Unhandled exception handler init.
std::set_terminate(&utils::TerminateHandler);
stats::InitStatsLogging(get_stats_prefix());
utils::OnScopeExit stop_stats([] { stats::StopStatsLogging(); });
// Initialize the communication library.
communication::Init();
@ -163,7 +158,6 @@ int WithInit(int argc, char **argv,
}
void SingleNodeMain() {
google::SetUsageMessage("Memgraph single-node database server");
database::SingleNode db;
SessionData session_data{db};
@ -206,79 +200,6 @@ void SingleNodeMain() {
// End common stuff for enterprise and community editions
#ifdef MG_COMMUNITY
int main(int argc, char **argv) {
return WithInit(argc, argv, []() { return "memgraph"; }, SingleNodeMain);
return WithInit(argc, argv, SingleNodeMain);
}
#else // enterprise edition
// Distributed flags.
DEFINE_HIDDEN_bool(
master, false,
"If this Memgraph server is the master in a distributed deployment.");
DEFINE_HIDDEN_bool(
worker, false,
"If this Memgraph server is a worker in a distributed deployment.");
DECLARE_int32(worker_id);
void MasterMain() {
google::SetUsageMessage("Memgraph distributed master");
database::Master db;
SessionData session_data{db};
ServerContext context;
std::string service_name = "Bolt";
if (FLAGS_key_file != "" && FLAGS_cert_file != "") {
context = ServerContext(FLAGS_key_file, FLAGS_cert_file);
service_name = "BoltS";
}
ServerT server({FLAGS_interface, static_cast<uint16_t>(FLAGS_port)},
session_data, &context, FLAGS_session_inactivity_timeout,
service_name, FLAGS_num_workers);
// Handler for regular termination signals
auto shutdown = [&server] {
// Server needs to be shutdown first and then the database. This prevents a
// race condition when a transaction is accepted during server shutdown.
server.Shutdown();
};
InitSignalHandlers(shutdown);
server.AwaitShutdown();
}
void WorkerMain() {
google::SetUsageMessage("Memgraph distributed worker");
database::Worker db;
db.WaitForShutdown();
}
int main(int argc, char **argv) {
auto get_stats_prefix = [&]() -> std::string {
if (FLAGS_master) {
return "master";
} else if (FLAGS_worker) {
return fmt::format("worker-{}", FLAGS_worker_id);
}
return "memgraph";
};
auto memgraph_main = [&]() {
CHECK(!(FLAGS_master && FLAGS_worker))
<< "Can't run Memgraph as worker and master at the same time";
if (FLAGS_master)
MasterMain();
else if (FLAGS_worker)
WorkerMain();
else
SingleNodeMain();
};
return WithInit(argc, argv, get_stats_prefix, memgraph_main);
}
#endif // enterprise edition

Some files were not shown because too many files have changed in this diff Show More