Compare commits

...

1 Commits

Author SHA1 Message Date
Matej Ferencevic
d4315b3242 Prepare release v0.11.0 2018-07-04 23:01:56 +02:00
215 changed files with 144 additions and 24135 deletions

View File

@ -137,9 +137,6 @@ if (USE_READLINE)
endif() endif()
endif() endif()
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost 1.62 REQUIRED COMPONENTS iostreams serialization)
# OpenSSL # OpenSSL
find_package(OpenSSL REQUIRED) find_package(OpenSSL REQUIRED)
@ -193,7 +190,6 @@ option(EXPERIMENTAL "Build experimental binaries" OFF)
option(CUSTOMERS "Build customer binaries" OFF) option(CUSTOMERS "Build customer binaries" OFF)
option(TEST_COVERAGE "Generate coverage reports from running memgraph" OFF) option(TEST_COVERAGE "Generate coverage reports from running memgraph" OFF)
option(TOOLS "Build tools binaries" ON) option(TOOLS "Build tools binaries" ON)
option(MG_COMMUNITY "Build Memgraph Community Edition" OFF)
option(ASAN "Build with Address Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF) option(ASAN "Build with Address Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(TSAN "Build with Thread Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF) option(TSAN "Build with Thread Sanitizer. To get a reasonable performance option should be used only in Release or RelWithDebInfo build " OFF)
option(UBSAN "Build with Undefined Behaviour Sanitizer" OFF) option(UBSAN "Build with Undefined Behaviour Sanitizer" OFF)
@ -208,10 +204,6 @@ if (TEST_COVERAGE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif() endif()
if (MG_COMMUNITY)
add_definitions(-DMG_COMMUNITY)
endif()
if (ASAN) if (ASAN)
# Enable Addres sanitizer and get nicer stack traces in error messages. # Enable Addres sanitizer and get nicer stack traces in error messages.
# NOTE: AddressSanitizer uses llvm-symbolizer binary from the Clang # NOTE: AddressSanitizer uses llvm-symbolizer binary from the Clang

View File

@ -3,7 +3,6 @@
- build_debug/memgraph - build_debug/memgraph
- build_release/memgraph - build_release/memgraph
- build_release/tools/src/mg_import_csv - build_release/tools/src/mg_import_csv
- build_release/tools/src/mg_statsd
- config - config
filename: binaries.tar.gz filename: binaries.tar.gz

View File

@ -33,13 +33,8 @@
cmake -DCMAKE_BUILD_TYPE=release .. cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1200 make -j$THREADS memgraph tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot TIMEOUT=1200 make -j$THREADS memgraph tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot
# Generate distributed card fraud dataset.
cd ../tests/distributed/card_fraud
./generate_dataset.sh
cd ../../..
# Checkout to parent commit and initialize. # Checkout to parent commit and initialize.
cd ../parent cd ../../parent
git checkout HEAD~1 git checkout HEAD~1
TIMEOUT=1200 ./init TIMEOUT=1200 ./init
@ -88,7 +83,3 @@
cd ../../docs/user_technical cd ../../docs/user_technical
# TODO (mferencevic): uncomment this once couscous is replaced with pandoc # TODO (mferencevic): uncomment this once couscous is replaced with pandoc
#./bundle_community #./bundle_community
# Generate distributed card fraud dataset.
cd ../../tests/distributed/card_fraud
./generate_dataset.sh

View File

@ -13,36 +13,12 @@ set(memgraph_src_files
communication/helpers.cpp communication/helpers.cpp
communication/init.cpp communication/init.cpp
communication/bolt/v1/decoder/decoded_value.cpp communication/bolt/v1/decoder/decoded_value.cpp
communication/rpc/client.cpp
communication/rpc/protocol.cpp
communication/rpc/server.cpp
data_structures/concurrent/skiplist_gc.cpp data_structures/concurrent/skiplist_gc.cpp
database/config.cpp database/config.cpp
database/counters.cpp database/counters.cpp
database/graph_db.cpp database/graph_db.cpp
database/graph_db_accessor.cpp database/graph_db_accessor.cpp
database/state_delta.cpp database/state_delta.cpp
distributed/bfs_rpc_clients.cpp
distributed/bfs_subcursor.cpp
distributed/cluster_discovery_master.cpp
distributed/cluster_discovery_worker.cpp
distributed/coordination.cpp
distributed/coordination_master.cpp
distributed/coordination_worker.cpp
distributed/durability_rpc_clients.cpp
distributed/durability_rpc_server.cpp
distributed/index_rpc_server.cpp
distributed/plan_consumer.cpp
distributed/plan_dispatcher.cpp
distributed/cache.cpp
distributed/data_manager.cpp
distributed/data_rpc_clients.cpp
distributed/data_rpc_server.cpp
distributed/produce_rpc_server.cpp
distributed/pull_rpc_clients.cpp
distributed/serialization.cpp
distributed/updates_rpc_clients.cpp
distributed/updates_rpc_server.cpp
durability/paths.cpp durability/paths.cpp
durability/recovery.cpp durability/recovery.cpp
durability/snapshooter.cpp durability/snapshooter.cpp
@ -61,41 +37,16 @@ set(memgraph_src_files
query/plan/rule_based_planner.cpp query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp query/plan/variable_start_planner.cpp
query/typed_value.cpp query/typed_value.cpp
stats/metrics.cpp
stats/stats.cpp
storage/concurrent_id_mapper_master.cpp
storage/concurrent_id_mapper_worker.cpp
storage/dynamic_graph_partitioner/dgp.cpp
storage/dynamic_graph_partitioner/vertex_migrator.cpp
storage/edge_accessor.cpp storage/edge_accessor.cpp
storage/locking/record_lock.cpp storage/locking/record_lock.cpp
storage/property_value.cpp storage/property_value.cpp
storage/property_value_store.cpp storage/property_value_store.cpp
storage/record_accessor.cpp storage/record_accessor.cpp
storage/vertex_accessor.cpp storage/vertex_accessor.cpp
transactions/engine_master.cpp
transactions/engine_single_node.cpp transactions/engine_single_node.cpp
transactions/engine_worker.cpp
transactions/snapshot.cpp
) )
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Use this function to add each capnp file to generation. This way each file is
# standalone and we avoid recompiling everything.
# NOTE: memgraph_src_files and generated_capnp_files are globally updated.
function(add_capnp capnp_src_file)
set(cpp_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.c++)
set(h_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.h)
add_custom_command(OUTPUT ${cpp_file} ${h_file}
COMMAND ${CAPNP_EXE} compile -o${CAPNP_CXX_EXE} ${capnp_src_file} -I ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file} capnproto-proj
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Update *global* generated_capnp_files
set(generated_capnp_files ${generated_capnp_files} ${cpp_file} ${h_file} PARENT_SCOPE)
# Update *global* memgraph_src_files
set(memgraph_src_files ${memgraph_src_files} ${cpp_file} PARENT_SCOPE)
endfunction(add_capnp)
# Lisp C++ Preprocessing # Lisp C++ Preprocessing
set(lcp_exe ${CMAKE_SOURCE_DIR}/tools/lcp) set(lcp_exe ${CMAKE_SOURCE_DIR}/tools/lcp)
@ -135,67 +86,19 @@ function(add_lcp lcp_file)
set(generated_lcp_files ${generated_lcp_files} ${h_file} ${cpp_file} ${capnp_file} PARENT_SCOPE) set(generated_lcp_files ${generated_lcp_files} ${h_file} ${cpp_file} ${capnp_file} PARENT_SCOPE)
endfunction(add_lcp) endfunction(add_lcp)
add_lcp(database/counters_rpc_messages.lcp CAPNP_SCHEMA @0x95a2c3ea3871e945)
add_capnp(database/counters_rpc_messages.capnp)
add_lcp(database/state_delta.lcp CAPNP_SCHEMA @0xdea01657b3563887) add_lcp(database/state_delta.lcp CAPNP_SCHEMA @0xdea01657b3563887)
add_capnp(database/state_delta.capnp)
add_lcp(distributed/bfs_rpc_messages.lcp CAPNP_SCHEMA @0x8e508640b09b6d2a)
add_capnp(distributed/bfs_rpc_messages.capnp)
add_lcp(distributed/coordination_rpc_messages.lcp CAPNP_SCHEMA @0x93df0c4703cf98fb)
add_capnp(distributed/coordination_rpc_messages.capnp)
add_lcp(distributed/data_rpc_messages.lcp CAPNP_SCHEMA @0xc1c8a341ba37aaf5)
add_capnp(distributed/data_rpc_messages.capnp)
add_lcp(distributed/durability_rpc_messages.lcp CAPNP_SCHEMA @0xf5e53bc271e2163d)
add_capnp(distributed/durability_rpc_messages.capnp)
add_lcp(distributed/index_rpc_messages.lcp CAPNP_SCHEMA @0xa8aab46862945bd6)
add_capnp(distributed/index_rpc_messages.capnp)
add_lcp(distributed/plan_rpc_messages.lcp CAPNP_SCHEMA @0xfcbc48dc9f106d28)
add_capnp(distributed/plan_rpc_messages.capnp)
add_lcp(distributed/pull_produce_rpc_messages.lcp CAPNP_SCHEMA @0xa78a9254a73685bd)
add_capnp(distributed/pull_produce_rpc_messages.capnp)
add_lcp(distributed/storage_gc_rpc_messages.lcp CAPNP_SCHEMA @0xd705663dfe36cf81)
add_capnp(distributed/storage_gc_rpc_messages.capnp)
add_lcp(distributed/token_sharing_rpc_messages.lcp CAPNP_SCHEMA @0x8f295db54ec4caec)
add_capnp(distributed/token_sharing_rpc_messages.capnp)
add_lcp(distributed/transactional_cache_cleaner_rpc_messages.lcp CAPNP_SCHEMA @0xe2be6183a1ff9e11)
add_capnp(distributed/transactional_cache_cleaner_rpc_messages.capnp)
add_lcp(distributed/updates_rpc_messages.lcp CAPNP_SCHEMA @0x82d5f38d73c7b53a)
add_capnp(distributed/updates_rpc_messages.capnp)
add_lcp(query/plan/operator.lcp CAPNP_SCHEMA @0xe5cae8d045d30c42) add_lcp(query/plan/operator.lcp CAPNP_SCHEMA @0xe5cae8d045d30c42)
add_capnp(query/plan/operator.capnp)
add_lcp(stats/stats_rpc_messages.lcp CAPNP_SCHEMA @0xc19a87c81b9b4512)
add_capnp(stats/stats_rpc_messages.capnp)
add_lcp(storage/concurrent_id_mapper_rpc_messages.lcp CAPNP_SCHEMA @0xa6068dae93d225dd)
add_capnp(storage/concurrent_id_mapper_rpc_messages.capnp)
add_lcp(transactions/engine_rpc_messages.lcp CAPNP_SCHEMA @0xde02b7c49180cad5)
add_capnp(transactions/engine_rpc_messages.capnp)
add_custom_target(generate_lcp DEPENDS ${generated_lcp_files}) add_custom_target(generate_lcp DEPENDS ${generated_lcp_files})
# Registering capnp must come after registering lcp files.
add_capnp(communication/rpc/messages.capnp)
add_capnp(distributed/serialization.capnp)
add_capnp(durability/recovery.capnp)
add_capnp(query/common.capnp)
add_capnp(query/frontend/ast/ast.capnp)
add_capnp(query/frontend/semantic/symbol.capnp)
add_capnp(storage/serialization.capnp)
add_capnp(transactions/common.capnp)
add_capnp(utils/serialization.capnp)
add_custom_target(generate_capnp DEPENDS generate_lcp ${generated_capnp_files})
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
# memgraph_lib depend on these libraries # memgraph_lib depend on these libraries
set(MEMGRAPH_ALL_LIBS stdc++fs Threads::Threads fmt cppitertools set(MEMGRAPH_ALL_LIBS stdc++fs Threads::Threads fmt cppitertools
antlr_opencypher_parser_lib dl glog gflags capnp kj antlr_opencypher_parser_lib dl glog gflags
${OPENSSL_LIBRARIES} ${OPENSSL_LIBRARIES}
${Boost_IOSTREAMS_LIBRARY_RELEASE}
${Boost_SERIALIZATION_LIBRARY_RELEASE}
mg-utils mg-io) mg-utils mg-io)
if (USE_LTALLOC) if (USE_LTALLOC)
@ -214,7 +117,6 @@ target_link_libraries(memgraph_lib ${MEMGRAPH_ALL_LIBS})
target_include_directories(memgraph_lib PRIVATE ${OPENSSL_INCLUDE_DIR}) target_include_directories(memgraph_lib PRIVATE ${OPENSSL_INCLUDE_DIR})
add_dependencies(memgraph_lib generate_opencypher_parser) add_dependencies(memgraph_lib generate_opencypher_parser)
add_dependencies(memgraph_lib generate_lcp) add_dependencies(memgraph_lib generate_lcp)
add_dependencies(memgraph_lib generate_capnp)
# STATIC library used to store key-value pairs # STATIC library used to store key-value pairs
add_library(kvstore_lib STATIC storage/kvstore.cpp) add_library(kvstore_lib STATIC storage/kvstore.cpp)

View File

@ -7,6 +7,8 @@
#include "communication/bolt/v1/encoder/chunked_encoder_buffer.hpp" #include "communication/bolt/v1/encoder/chunked_encoder_buffer.hpp"
#include "communication/bolt/v1/encoder/client_encoder.hpp" #include "communication/bolt/v1/encoder/client_encoder.hpp"
#include "communication/client.hpp"
#include "query/typed_value.hpp" #include "query/typed_value.hpp"
#include "utils/exceptions.hpp" #include "utils/exceptions.hpp"

View File

@ -10,7 +10,6 @@
#include "communication/bolt/v1/decoder/decoded_value.hpp" #include "communication/bolt/v1/decoder/decoded_value.hpp"
#include "communication/bolt/v1/state.hpp" #include "communication/bolt/v1/state.hpp"
#include "database/graph_db.hpp" #include "database/graph_db.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "query/exceptions.hpp" #include "query/exceptions.hpp"
#include "query/typed_value.hpp" #include "query/typed_value.hpp"
#include "utils/exceptions.hpp" #include "utils/exceptions.hpp"
@ -122,13 +121,6 @@ State HandleRun(TSession &session, State state, Marker marker) {
return State::Result; return State::Result;
} }
session.db_accessor_->AdvanceCommand(); session.db_accessor_->AdvanceCommand();
if (session.db_.type() == database::GraphDb::Type::DISTRIBUTED_MASTER) {
auto tx_id = session.db_accessor_->transaction_id();
auto futures =
session.db_.pull_clients().NotifyAllTransactionCommandAdvanced(
tx_id);
for (auto &future : futures) future.wait();
}
} }
auto &params_map = params.ValueMap(); auto &params_map = params.ValueMap();

View File

@ -1,23 +0,0 @@
#pragma once
#include "communication/rpc/messages.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft {
enum class RpcType { REQUEST_VOTE, APPEND_ENTRIES };
template <class State>
struct PeerRpcRequest {
RpcType type;
RequestVoteRequest request_vote;
AppendEntriesRequest<State> append_entries;
};
struct PeerRpcReply {
RpcType type;
RequestVoteReply request_vote;
AppendEntriesReply append_entries;
};
} // namespace communication::raft

View File

@ -1,699 +0,0 @@
#pragma once
#include <algorithm>
#include "fmt/format.h"
#include "glog/logging.h"
namespace communication::raft {
namespace impl {
template <class State>
RaftMemberImpl<State>::RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id,
const RaftConfig &config)
: network_(network), storage_(storage), id_(id), config_(config) {
std::lock_guard<std::mutex> lock(mutex_);
tie(term_, voted_for_) = storage_.GetTermAndVotedFor();
for (const auto &peer_id : config_.members) {
peer_states_[peer_id] = std::make_unique<RaftPeerState>();
}
SetElectionTimer();
}
template <class State>
RaftMemberImpl<State>::~RaftMemberImpl() {
Stop();
}
template <class State>
void RaftMemberImpl<State>::Stop() {
{
std::lock_guard<std::mutex> lock(mutex_);
if (!exiting_) {
LogInfo("Stopping...");
exiting_ = true;
}
}
state_changed_.notify_all();
}
template <class State>
template <class... Args>
void RaftMemberImpl<State>::LogInfo(const std::string &format,
Args &&... args) {
LOG(INFO) << fmt::format("[id = {}, term = {}] {}", id_, term_,
fmt::format(format, std::forward<Args>(args)...))
<< std::endl;
}
template <class State>
void RaftMemberImpl<State>::TimerThreadMain() {
std::unique_lock<std::mutex> lock(mutex_);
while (!exiting_) {
if (Clock::now() >= next_election_time_) {
StartNewElection();
}
state_changed_.wait_until(lock, next_election_time_);
}
}
template <class State>
void RaftMemberImpl<State>::PeerThreadMain(std::string peer_id) {
RaftPeerState &peer_state = *peer_states_[peer_id];
LogInfo("Peer thread started for {}", peer_id);
std::unique_lock<std::mutex> lock(mutex_);
/* This loop will either call a function that issues an RPC or wait on the
* condition variable. It must not do both! Lock on `mutex_` is released while
* waiting for RPC response, which might cause us to miss a notification on
* `state_changed_` conditional variable and wait indefinitely. The safest
* thing to do is to assume some important part of state was modified while we
* were waiting for the response and loop around to check. */
while (!exiting_) {
TimePoint now = Clock::now();
TimePoint wait_until;
if (mode_ != RaftMode::FOLLOWER && peer_state.backoff_until > now) {
wait_until = peer_state.backoff_until;
} else {
switch (mode_) {
case RaftMode::FOLLOWER:
wait_until = TimePoint::max();
break;
case RaftMode::CANDIDATE:
if (!peer_state.request_vote_done) {
RequestVote(peer_id, peer_state, lock);
continue;
}
break;
case RaftMode::LEADER:
if (peer_state.next_index <= storage_.GetLastLogIndex() ||
now >= peer_state.next_heartbeat_time) {
AppendEntries(peer_id, peer_state, lock);
continue;
} else {
wait_until = peer_state.next_heartbeat_time;
}
break;
}
}
state_changed_.wait_until(lock, wait_until);
}
LogInfo("Peer thread exiting for {}", peer_id);
}
template <class State>
void RaftMemberImpl<State>::CandidateOrLeaderTransitionToFollower() {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderTransitionToFollower` called from follower mode";
mode_ = RaftMode::FOLLOWER;
leader_ = {};
SetElectionTimer();
}
template <class State>
void RaftMemberImpl<State>::CandidateTransitionToLeader() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CandidateTransitionToLeader` called while not in candidate mode";
mode_ = RaftMode::LEADER;
leader_ = id_;
/* We don't want to trigger elections while in leader mode. */
next_election_time_ = TimePoint::max();
/* [Raft thesis, Section 6.4]
* "The Leader Completeness Property guarantees that a leader has all
* committed entries, but at the start of its term, it may not know which
* those are. To find out, it needs to commit an entry from its term. Raft
* handles this by having each leader commit a blank no-op entry into the log
* at the start of its term. As soon as this no-op entry is committed, the
* leaders commit index will be at least as large as any other servers
* during its term." */
LogEntry<State> entry;
entry.term = term_;
entry.command = std::experimental::nullopt;
storage_.AppendLogEntry(entry);
}
template <class State>
bool RaftMemberImpl<State>::CandidateOrLeaderNoteTerm(const TermId new_term) {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderNoteTerm` called from follower mode";
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (term_ < new_term) {
UpdateTermAndVotedFor(new_term, {});
CandidateOrLeaderTransitionToFollower();
return true;
}
return false;
}
template <class State>
void RaftMemberImpl<State>::UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for) {
term_ = new_term;
voted_for_ = new_voted_for;
leader_ = {};
storage_.WriteTermAndVotedFor(term_, voted_for_);
}
template <class State>
void RaftMemberImpl<State>::SetElectionTimer() {
/* [Raft thesis, section 3.4]
* "Raft uses randomized election timeouts to ensure that split votes are rare
* and that they are resolved quickly. To prevent split votes in the first
* place, election timeouts are chosen randomly from a fixed interval (e.g.,
* 150-300 ms)." */
std::uniform_int_distribution<uint64_t> distribution(
config_.leader_timeout_min.count(), config_.leader_timeout_max.count());
Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_));
next_election_time_ = Clock::now() + wait_interval;
}
template <class State>
void RaftMemberImpl<State>::StartNewElection() {
LogInfo("Starting new election");
/* [Raft thesis, section 3.4]
* "To begin an election, a follower increments its current term and
* transitions to candidate state. It then votes for itself and issues
* RequestVote RPCs in parallel to each of the other servers in the cluster."
*/
UpdateTermAndVotedFor(term_ + 1, id_);
mode_ = RaftMode::CANDIDATE;
/* [Raft thesis, section 3.4]
* "Each candidate restarts its randomized election timeout at the start of an
* election, and it waits for that timeout to elapse before starting the next
* election; this reduces the likelihood of another split vote in the new
* election." */
SetElectionTimer();
for (const auto &peer_id : config_.members) {
if (peer_id == id_) {
continue;
}
auto &peer_state = peer_states_[peer_id];
peer_state->request_vote_done = false;
peer_state->voted_for_me = false;
peer_state->match_index = 0;
peer_state->next_index = storage_.GetLastLogIndex() + 1;
/* [Raft thesis, section 3.5]
* "Until the leader has discovered where it and the follower's logs match,
* the leader can send AppendEntries with no entries (like heartbeats) to
* save bandwidth. Then, once the matchIndex immediately precedes the
* nextIndex, the leader should begin to send the actual entries." */
peer_state->suppress_log_entries = true;
/* [Raft thesis, section 3.4]
* "Once a candidate wins an election, it becomes leader. It then sends
* heartbeat messages to all of the other servers to establish its authority
* and prevent new elections."
*
* This will make newly elected leader send heartbeats immediately.
*/
peer_state->next_heartbeat_time = TimePoint::min();
peer_state->backoff_until = TimePoint::min();
}
// We already have the majority if we're in a single node cluster.
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
/* Notify peer threads to start issuing RequestVote RPCs. */
state_changed_.notify_all();
}
template <class State>
bool RaftMemberImpl<State>::CountVotes() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CountVotes` should only be called from candidate mode";
int num_votes = 0;
for (const auto &peer_id : config_.members) {
if (peer_id == id_ || peer_states_[peer_id]->voted_for_me) {
num_votes++;
}
}
return 2 * num_votes > config_.members.size();
}
template <class State>
void RaftMemberImpl<State>::RequestVote(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Requesting vote from {}", peer_id);
RequestVoteRequest request;
request.candidate_term = term_;
request.candidate_id = id_;
request.last_log_index = storage_.GetLastLogIndex();
request.last_log_term = storage_.GetLogTerm(request.last_log_index);
RequestVoteReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendRequestVote(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.candidate_term || mode_ != RaftMode::CANDIDATE ||
exiting_) {
LogInfo("Ignoring RequestVote RPC reply from {}", peer_id);
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(reply.term == term_) << "Stale RequestVote RPC reply";
peer_state.request_vote_done = true;
if (reply.vote_granted) {
peer_state.voted_for_me = true;
LogInfo("Got vote from {}", peer_id);
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
} else {
LogInfo("Vote denied from {}", peer_id);
}
state_changed_.notify_all();
}
template <class State>
void RaftMemberImpl<State>::AdvanceCommitIndex() {
DCHECK(mode_ == RaftMode::LEADER)
<< "`AdvanceCommitIndex` can only be called from leader mode";
std::vector<LogIndex> match_indices;
for (const auto &peer : peer_states_) {
match_indices.push_back(peer.second->match_index);
}
match_indices.push_back(storage_.GetLastLogIndex());
std::sort(match_indices.begin(), match_indices.end(),
std::greater<LogIndex>());
LogIndex new_commit_index_ = match_indices[(config_.members.size() - 1) / 2];
LogInfo("Trying to advance commit index {} to {}", commit_index_,
new_commit_index_);
/* This can happen because we reset `match_index` to 0 for every peer when
* elected. */
if (commit_index_ >= new_commit_index_) {
return;
}
/* [Raft thesis, section 3.6.2]
* (...) Raft never commits log entries from previous terms by counting
* replicas. Only log entries from the leader's current term are committed by
* counting replicas; once an entry from the current term has been committed
* in this way, then all prior entries are committed indirectly because of the
* Log Matching Property." */
if (storage_.GetLogTerm(new_commit_index_) != term_) {
LogInfo("Cannot commit log entry from previous term");
return;
}
commit_index_ = std::max(commit_index_, new_commit_index_);
}
template <class State>
void RaftMemberImpl<State>::AppendEntries(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Appending entries to {}", peer_id);
AppendEntriesRequest<State> request;
request.leader_term = term_;
request.leader_id = id_;
request.prev_log_index = peer_state.next_index - 1;
request.prev_log_term = storage_.GetLogTerm(peer_state.next_index - 1);
if (!peer_state.suppress_log_entries &&
peer_state.next_index <= storage_.GetLastLogIndex()) {
request.entries = storage_.GetLogSuffix(peer_state.next_index);
} else {
request.entries = {};
}
request.leader_commit = commit_index_;
AppendEntriesReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendAppendEntries(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
/* There is probably something wrong with this peer, let's avoid sending log
* entries. */
peer_state.suppress_log_entries = true;
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.leader_term || exiting_) {
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(mode_ == RaftMode::LEADER)
<< "Elected leader for term should never change";
DCHECK(reply.term == term_) << "Got stale AppendEntries reply";
if (reply.success) {
/* We've found a match, we can start sending log entries. */
peer_state.suppress_log_entries = false;
LogIndex new_match_index = request.prev_log_index + request.entries.size();
DCHECK(peer_state.match_index <= new_match_index)
<< "`match_index` should increase monotonically within a term";
peer_state.match_index = new_match_index;
AdvanceCommitIndex();
peer_state.next_index = peer_state.match_index + 1;
peer_state.next_heartbeat_time = Clock::now() + config_.heartbeat_interval;
} else {
DCHECK(peer_state.next_index > 1)
<< "Log replication should not fail for first log entry.";
--peer_state.next_index;
}
state_changed_.notify_all();
}
template <class State>
RequestVoteReply RaftMemberImpl<State>::OnRequestVote(
const RequestVoteRequest &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("RequestVote RPC request from {}", request.candidate_id);
RequestVoteReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.candidate_term < term_) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.candidate_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.candidate_term, {});
}
/* [Raft thesis, Section 3.6.1]
* "Raft uses the voting process to prevent a candidate from winning an
* election unless its log contains all committed entries. (...) The
* RequestVote RPC implements this restriction: the RPC includes information
* about the candidate's log, and the voter denies its vote if its own log is
* more up-to-date than that of the candidate. Raft determines which of two
* logs is more up-to-date by comparing the index and term of the last entries
* in the logs. If the logs have last entries with different terms, then the
* log with the later term is more up-to-date. If the logs end with the same
* term, then whichever log is longer is more up-to-date." */
LogIndex my_last_log_index = storage_.GetLastLogIndex();
TermId my_last_log_term = storage_.GetLogTerm(my_last_log_index);
if (my_last_log_term > request.last_log_term ||
(my_last_log_term == request.last_log_term &&
my_last_log_index > request.last_log_index)) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.4]
* "Each server will vote for at most one candidate in a given term, on a
* firstcome-first-served basis."
*/
/* We voted for someone else in this term. */
if (request.candidate_term == term_ && voted_for_ &&
*voted_for_ != request.candidate_id) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* Now we know we will vote for this candidate, because it's term is at least
* as big as ours and we haven't voted for anyone else. */
UpdateTermAndVotedFor(request.candidate_term, request.candidate_id);
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = request.candidate_term;
reply.vote_granted = true;
return reply;
}
template <class State>
AppendEntriesReply RaftMemberImpl<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("AppendEntries RPC request from {}", request.leader_id);
AppendEntriesReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.leader_term < term_) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.leader_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.leader_term, {});
}
/* [Raft thesis, Section 3.4]
* "While waiting for votes, a candidate may receive an AppendEntries RPC from
* another server claiming to be leader. If the leader's term (included in its
* RPC) is at least as large as the candidate's current term, then the
* candidate recognizes the leader as legitimate and returns to follower
* state." */
if (mode_ == RaftMode::CANDIDATE && request.leader_term == term_) {
CandidateOrLeaderTransitionToFollower();
}
DCHECK(mode_ != RaftMode::LEADER)
<< "Leader cannot accept `AppendEntries` RPC";
DCHECK(term_ == request.leader_term) << "Term should be equal to request "
"term when accepting `AppendEntries` "
"RPC";
leader_ = request.leader_id;
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
/* [Raft thesis, Section 3.5]
* "When sending an AppendEntries RPC, the leader includes the index and term
* of the entry in its log that immediately precedes the new entries. If the
* follower does not find an entry in its log with the same index and term,
* then it refuses the new entries." */
if (request.prev_log_index > storage_.GetLastLogIndex() ||
storage_.GetLogTerm(request.prev_log_index) != request.prev_log_term) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.5]
* "To bring a follower's log into consistency with its own, the leader must
* find the latest log entry where the two logs agree, delete any entries in
* the follower's log after that point, and send the follower all of the
* leader's entries after that point." */
/* Entry at `request.prev_log_index` is the last entry where ours and leader's
* logs agree. It's time to replace the tail of the log with new entries from
* the leader. We have to be careful here as duplicated AppendEntries RPCs
* could cause data loss.
*
* There is a possibility that an old AppendEntries RPC is duplicated and
* received after processing newer one. For example, leader appends entry 3
* and then entry 4, but follower recieves entry 3, then entry 4, and then
* entry 3 again. We have to be careful not to delete entry 4 from log when
* processing the last RPC. */
LogIndex index = request.prev_log_index;
auto it = request.entries.begin();
for (; it != request.entries.end(); ++it) {
++index;
if (index > storage_.GetLastLogIndex()) {
break;
}
if (storage_.GetLogTerm(index) != it->term) {
LogInfo("Truncating log suffix from index {}", index);
DCHECK(commit_index_ < index)
<< "Committed entries should never be truncated from the log";
storage_.TruncateLogSuffix(index);
break;
}
}
LogInfo("Appending {} out of {} logs from {}.", request.entries.end() - it,
request.entries.size(), request.leader_id);
for (; it != request.entries.end(); ++it) {
storage_.AppendLogEntry(*it);
}
commit_index_ = std::max(commit_index_, request.leader_commit);
/* Let's bump election timer once again, we don't want to take down the leader
* because of our long disk writes. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = term_;
reply.success = true;
return reply;
}
template <class State>
ClientResult RaftMemberImpl<State>::AddCommand(
const typename State::Change &command, bool blocking) {
std::unique_lock<std::mutex> lock(mutex_);
if (mode_ != RaftMode::LEADER) {
return ClientResult::NOT_LEADER;
}
LogEntry<State> entry;
entry.term = term_;
entry.command = command;
storage_.AppendLogEntry(entry);
// Entry is already replicated if this is a single node cluster.
AdvanceCommitIndex();
state_changed_.notify_all();
if (!blocking) {
return ClientResult::OK;
}
LogIndex index = storage_.GetLastLogIndex();
while (!exiting_ && term_ == entry.term) {
if (commit_index_ >= index) {
return ClientResult::OK;
}
state_changed_.wait(lock);
}
return ClientResult::NOT_LEADER;
}
} // namespace impl
template <class State>
RaftMember<State>::RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config)
: network_(network), impl_(network, storage, id, config) {
timer_thread_ =
std::thread(&impl::RaftMemberImpl<State>::TimerThreadMain, &impl_);
for (const auto &peer_id : config.members) {
if (peer_id != id) {
peer_threads_.emplace_back(&impl::RaftMemberImpl<State>::PeerThreadMain,
&impl_, peer_id);
}
}
network_.Start(*this);
}
template <class State>
RaftMember<State>::~RaftMember() {
impl_.Stop();
timer_thread_.join();
for (auto &peer_thread : peer_threads_) {
peer_thread.join();
}
}
template <class State>
ClientResult RaftMember<State>::AddCommand(
const typename State::Change &command, bool blocking) {
return impl_.AddCommand(command, blocking);
}
template <class State>
RequestVoteReply RaftMember<State>::OnRequestVote(
const RequestVoteRequest &request) {
return impl_.OnRequestVote(request);
}
template <class State>
AppendEntriesReply RaftMember<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
return impl_.OnAppendEntries(request);
}
} // namespace communication::raft

View File

@ -1,277 +0,0 @@
#pragma once
#include <chrono>
#include <condition_variable>
#include <experimental/optional>
#include <map>
#include <mutex>
#include <random>
#include <set>
#include <thread>
#include <vector>
#include "boost/serialization/vector.hpp"
#include "glog/logging.h"
#include "utils/serialization.hpp"
namespace communication::raft {
template <class State>
class RaftMember;
enum class ClientResult { NOT_LEADER, OK };
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
using MemberId = std::string;
using TermId = uint64_t;
using ClientId = uint64_t;
using CommandId = uint64_t;
using LogIndex = uint64_t;
template <class State>
struct LogEntry {
int term;
std::experimental::optional<typename State::Change> command;
bool operator==(const LogEntry &rhs) const {
return term == rhs.term && command == rhs.command;
}
bool operator!=(const LogEntry &rhs) const { return !(*this == rhs); }
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &command;
}
};
/* Raft RPC requests and replies as described in [Raft thesis, Figure 3.1]. */
struct RequestVoteRequest {
TermId candidate_term;
MemberId candidate_id;
LogIndex last_log_index;
TermId last_log_term;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &candidate_term;
ar &candidate_id;
ar &last_log_index;
ar &last_log_term;
}
};
struct RequestVoteReply {
TermId term;
bool vote_granted;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &vote_granted;
}
};
template <class State>
struct AppendEntriesRequest {
TermId leader_term;
MemberId leader_id;
LogIndex prev_log_index;
TermId prev_log_term;
std::vector<LogEntry<State>> entries;
LogIndex leader_commit;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &leader_term;
ar &leader_id;
ar &prev_log_index;
ar &prev_log_term;
ar &entries;
ar &leader_commit;
}
};
struct AppendEntriesReply {
TermId term;
bool success;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &success;
}
};
template <class State>
class RaftNetworkInterface {
public:
virtual ~RaftNetworkInterface() = default;
/* These function return false if RPC failed for some reason (e.g. cannot
* establish connection or request cancelled). Otherwise
* `reply` contains response from peer. */
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) = 0;
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) = 0;
/* This will be called once the RaftMember is ready to start receiving RPCs.
*/
virtual void Start(RaftMember<State> &member) = 0;
};
template <class State>
class RaftStorageInterface {
public:
virtual ~RaftStorageInterface() = default;
virtual void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) = 0;
virtual std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() = 0;
virtual void AppendLogEntry(const LogEntry<State> &entry) = 0;
virtual TermId GetLogTerm(const LogIndex index) = 0;
virtual LogEntry<State> GetLogEntry(const LogIndex index) = 0;
virtual std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) = 0;
virtual LogIndex GetLastLogIndex() = 0;
virtual void TruncateLogSuffix(const LogIndex index) = 0;
};
struct RaftConfig {
std::vector<MemberId> members;
std::chrono::milliseconds leader_timeout_min;
std::chrono::milliseconds leader_timeout_max;
std::chrono::milliseconds heartbeat_interval;
std::chrono::milliseconds rpc_backoff;
};
namespace impl {
enum class RaftMode { FOLLOWER, CANDIDATE, LEADER };
struct RaftPeerState {
bool request_vote_done;
bool voted_for_me;
LogIndex match_index;
LogIndex next_index;
bool suppress_log_entries;
Clock::time_point next_heartbeat_time;
Clock::time_point backoff_until;
};
template <class State>
class RaftMemberImpl {
public:
explicit RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config);
~RaftMemberImpl();
void Stop();
void TimerThreadMain();
void PeerThreadMain(std::string peer_id);
void UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for);
void CandidateOrLeaderTransitionToFollower();
void CandidateTransitionToLeader();
bool CandidateOrLeaderNoteTerm(const TermId new_term);
void StartNewElection();
void SetElectionTimer();
bool CountVotes();
void RequestVote(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
void AdvanceCommitIndex();
void AppendEntries(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
ClientResult AddCommand(const typename State::Change &command, bool blocking);
template <class... Args>
void LogInfo(const std::string &, Args &&...);
RaftNetworkInterface<State> &network_;
RaftStorageInterface<State> &storage_;
MemberId id_;
RaftConfig config_;
TermId term_;
RaftMode mode_ = RaftMode::FOLLOWER;
std::experimental::optional<MemberId> voted_for_ = std::experimental::nullopt;
std::experimental::optional<MemberId> leader_ = std::experimental::nullopt;
TimePoint next_election_time_;
LogIndex commit_index_ = 0;
bool exiting_ = false;
std::map<std::string, std::unique_ptr<RaftPeerState>> peer_states_;
/* This mutex protects all of the internal state. */
std::mutex mutex_;
/* Used to notify waiting threads that some of the internal state has changed.
* It is notified when following events occurr:
* - mode change
* - election start
* - `next_election_time_` update on RPC from leader or candidate
* - destructor is called
* - `commit_index_` is advanced
*/
std::condition_variable state_changed_;
std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}());
};
} // namespace impl
template <class State>
class RaftMember final {
public:
explicit RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage, const MemberId &id,
const RaftConfig &config);
~RaftMember();
ClientResult AddCommand(const typename State::Change &command, bool blocking);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
private:
RaftNetworkInterface<State> &network_;
impl::RaftMemberImpl<State> impl_;
/* Timer thread for triggering elections. */
std::thread timer_thread_;
/* One thread per peer for outgoing RPCs. */
std::vector<std::thread> peer_threads_;
};
} // namespace communication::raft
#include "raft-inl.hpp"

View File

@ -1,120 +0,0 @@
#pragma once
#include <unordered_map>
#include "glog/logging.h"
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
#include "communication/rpc/client.hpp"
#include "communication/rpc/server.hpp"
#include "io/network/endpoint.hpp"
/* Implementation of `RaftNetworkInterface` using RPC. Raft RPC requests and
* responses are wrapped in `PeerRpcRequest` and `PeerRpcReply`. */
// TODO(mtomic): Unwrap RPCs and use separate request-response protocols instead
// of `PeerProtocol`, or at least use an union to avoid sending unnecessary data
// over the wire.
namespace communication::raft {
template <class State>
using PeerProtocol = rpc::RequestResponse<PeerRpcRequest<State>, PeerRpcReply>;
template <class State>
class RpcNetwork : public RaftNetworkInterface<State> {
public:
RpcNetwork(rpc::Server &server,
std::unordered_map<std::string, io::network::Endpoint> directory)
: server_(server), directory_(std::move(directory)) {}
virtual void Start(RaftMember<State> &member) override {
// TODO: Serialize RPC via Cap'n Proto
// server_.Register<PeerProtocol<State>>(
// [&member](const auto &req_reader, auto *res_builder) {
// PeerRpcRequest<State> request;
// request.Load(req_reader);
// PeerRpcReply reply;
// reply.type = request.type;
// switch (request.type) {
// case RpcType::REQUEST_VOTE:
// reply.request_vote = member.OnRequestVote(request.request_vote);
// break;
// case RpcType::APPEND_ENTRIES:
// reply.append_entries =
// member.OnAppendEntries(request.append_entries);
// break;
// default:
// LOG(ERROR) << "Unknown RPC type: "
// << static_cast<int>(request.type);
// }
// reply.Save(res_builder);
// });
}
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.append_entries;
return true;
}
private:
bool SendRpc(const MemberId &recipient, const PeerRpcRequest<State> &request,
PeerRpcReply &reply) {
auto &client = GetClient(recipient);
auto response = client.template Call<PeerProtocol<State>>(request);
if (!response) {
return false;
}
reply = *response;
return true;
}
rpc::Client &GetClient(const MemberId &id) {
auto it = clients_.find(id);
if (it == clients_.end()) {
auto ne = directory_[id];
it = clients_.try_emplace(id, ne).first;
}
return it->second;
}
rpc::Server &server_;
// TODO(mtomic): how to update and distribute this?
std::unordered_map<MemberId, io::network::Endpoint> directory_;
std::unordered_map<MemberId, rpc::Client> clients_;
};
} // namespace communication::raft

View File

@ -1,239 +0,0 @@
/**
* @file
*
* Raft log is stored inside a folder. Each log entry is stored in a file named
* by its index. There is a special file named "metadata" which stores Raft
* metadata and also the last log index, which is used on startup to identify
* which log entry files are valid.
*/
#pragma once
#include <fcntl.h>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/iostreams/device/file_descriptor.hpp"
#include "boost/iostreams/stream.hpp"
#include "communication/raft/raft.hpp"
#include "communication/raft/storage/memory.hpp"
#include "utils/file.hpp"
namespace communication::raft {
struct SimpleFileStorageMetadata {
TermId term;
std::experimental::optional<MemberId> voted_for;
LogIndex last_log_index;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term &voted_for &last_log_index;
}
};
template <class State>
class SimpleFileStorage : public RaftStorageInterface<State> {
public:
explicit SimpleFileStorage(const fs::path &parent_dir) : memory_storage_() {
try {
dir_ = utils::OpenDir(parent_dir);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Error opening log directory: {}", e.what());
}
auto md = utils::TryOpenFile(dir_, "metadata", O_RDONLY);
if (!md) {
LOG(WARNING) << fmt::format("No metadata file found in directory '{}'",
parent_dir);
return;
}
boost::iostreams::file_descriptor_source src(
md->Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(src);
boost::archive::binary_iarchive iar(is);
SimpleFileStorageMetadata metadata;
try {
iar >> metadata;
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Failed to deserialize Raft metadata: " << e.what();
}
LOG(INFO) << fmt::format(
"Read term = {} and voted_for = {} from storage", metadata.term,
metadata.voted_for ? *metadata.voted_for : "(none)");
memory_storage_.term_ = metadata.term;
memory_storage_.voted_for_ = metadata.voted_for;
memory_storage_.log_.reserve(metadata.last_log_index);
for (LogIndex idx = 1; idx <= metadata.last_log_index; ++idx) {
utils::File entry_file;
try {
entry_file = utils::OpenFile(dir_, fmt::format("{}", idx), O_RDONLY);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open entry file {}: {}", idx,
e.what());
}
boost::iostreams::file_descriptor_source src(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(
src);
boost::archive::binary_iarchive iar(is);
LogEntry<State> entry;
try {
iar >> entry;
memory_storage_.log_.emplace_back(std::move(entry));
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to deserialize log entry {}: {}", idx,
e.what());
}
}
LOG(INFO) << fmt::format("Read {} log entries", metadata.last_log_index);
}
void WriteTermAndVotedFor(
TermId term,
const std::experimental::optional<MemberId> &voted_for) override {
memory_storage_.WriteTermAndVotedFor(term, voted_for);
WriteMetadata();
// Metadata file might be newly created so we have to fsync the directory.
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
std::pair<TermId, std::experimental::optional<MemberId>> GetTermAndVotedFor()
override {
return memory_storage_.GetTermAndVotedFor();
}
void AppendLogEntry(const LogEntry<State> &entry) override {
memory_storage_.AppendLogEntry(entry);
utils::File entry_file;
try {
entry_file = utils::OpenFile(
dir_, fmt::format("{}", memory_storage_.GetLastLogIndex()),
O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open log entry file: {}", e.what());
}
boost::iostreams::file_descriptor_sink sink(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << entry;
os.flush();
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to serialize log entry: {}", e.what());
}
try {
utils::Fsync(entry_file);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to write log entry file to disk: {}",
e.what());
}
// We update the metadata only after the log entry file is written to
// disk. This ensures that no file in range [1, last_log_index] is
// corrupted.
WriteMetadata();
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
TermId GetLogTerm(const LogIndex index) override {
return memory_storage_.GetLogTerm(index);
}
LogEntry<State> GetLogEntry(const LogIndex index) override {
return memory_storage_.GetLogEntry(index);
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) override {
return memory_storage_.GetLogSuffix(index);
}
LogIndex GetLastLogIndex() override {
return memory_storage_.GetLastLogIndex();
}
void TruncateLogSuffix(const LogIndex index) override {
return memory_storage_.TruncateLogSuffix(index);
}
private:
InMemoryStorage<State> memory_storage_;
utils::File dir_;
void WriteMetadata() {
// We first write data to a temporary file, ensure data is safely written
// to disk, and then rename the file. Since rename is an atomic operation,
// "metadata" file won't get corrupted in case of program crash.
utils::File md_tmp;
try {
md_tmp =
OpenFile(dir_, "metadata.new", O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open temporary metadata file: {}",
e.what());
}
boost::iostreams::file_descriptor_sink sink(
md_tmp.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << SimpleFileStorageMetadata{
memory_storage_.GetTermAndVotedFor().first,
memory_storage_.GetTermAndVotedFor().second,
memory_storage_.GetLastLogIndex()};
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Error serializing Raft metadata";
}
os.flush();
try {
utils::Fsync(md_tmp);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format(
"Failed to write temporary metadata file to disk: {}", e.what());
}
try {
utils::Rename(dir_, "metadata.new", dir_, "metadata");
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to move temporary metadata file: {}",
e.what());
}
}
};
} // namespace communication::raft

View File

@ -1,63 +0,0 @@
#pragma once
#include "communication/raft/raft.hpp"
namespace communication::raft {
template <class State>
class InMemoryStorage : public RaftStorageInterface<State> {
public:
InMemoryStorage()
: term_(0), voted_for_(std::experimental::nullopt), log_() {}
InMemoryStorage(const TermId term,
const std::experimental::optional<std::string> &voted_for,
const std::vector<LogEntry<State>> log)
: term_(term), voted_for_(voted_for), log_(log) {}
void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) {
term_ = term;
voted_for_ = voted_for;
}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {term_, voted_for_};
}
void AppendLogEntry(const LogEntry<State> &entry) { log_.push_back(entry); }
TermId GetLogTerm(const LogIndex index) {
CHECK(0 <= index && index <= log_.size())
<< "Trying to read nonexistent log entry";
return index > 0 ? log_[index - 1].term : 0;
}
LogEntry<State> GetLogEntry(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entry";
return log_[index - 1];
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entries";
return std::vector<LogEntry<State>>(log_.begin() + index - 1, log_.end());
}
LogIndex GetLastLogIndex(void) { return log_.size(); }
void TruncateLogSuffix(const LogIndex index) {
CHECK(1 <= index <= log_.size())
<< "Trying to remove nonexistent log entries";
log_.erase(log_.begin() + index - 1, log_.end());
}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft

View File

@ -1,141 +0,0 @@
#include <functional>
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft::test_utils {
struct DummyState {
struct Change {
bool operator==(const Change &) const { return true; }
bool operator!=(const Change &) const { return false; }
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
struct IntState {
int x;
struct Change {
enum Type { ADD, SUB, SET };
Type t;
int d;
bool operator==(const Change &rhs) const {
return t == rhs.t && d == rhs.d;
}
bool operator!=(const Change &rhs) const { return !(*this == rhs); };
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &t;
ar &d;
}
};
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &x;
}
};
/* Implementations of `RaftNetworkInterface` for simpler unit testing. */
/* `NoOpNetworkInterface` doesn't do anything -- it's like a server disconnected
* from the network. */
template <class State>
class NoOpNetworkInterface : public RaftNetworkInterface<State> {
public:
~NoOpNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &, const RequestVoteRequest &,
RequestVoteReply &) override {
return false;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &,
AppendEntriesReply &) override {
return false;
}
virtual void Start(RaftMember<State> &) override {}
};
/* `NextReplyNetworkInterface` has two fields: `on_request_` and `next_reply_`
* which is optional. `on_request_` is a callback that will be called before
* processing requets. If `next_reply_` is not set, `Send*` functions will
* return false, otherwise they return that reply. */
template <class State>
class NextReplyNetworkInterface : public RaftNetworkInterface<State> {
public:
~NextReplyNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::REQUEST_VOTE)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::APPEND_ENTRIES)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->append_entries;
return true;
}
virtual void Start(RaftMember<State> &) override {}
std::function<void(const PeerRpcRequest<State> &)> on_request_;
std::experimental::optional<PeerRpcReply> next_reply_;
};
template <class State>
class NoOpStorageInterface : public RaftStorageInterface<State> {
public:
NoOpStorageInterface() {}
void WriteTermAndVotedFor(const TermId,
const std::experimental::optional<std::string> &) {}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {0, {}};
}
void AppendLogEntry(const LogEntry<State> &) {}
TermId GetLogTerm(const LogIndex) { return 0; }
LogEntry<State> GetLogEntry(const LogIndex) { assert(false); }
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex) { return {}; }
LogIndex GetLastLogIndex() { return 0; }
void TruncateLogSuffix(const LogIndex) {}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft::test_utils

View File

@ -1,100 +0,0 @@
#include <chrono>
#include <thread>
#include "gflags/gflags.h"
#include "communication/rpc/client.hpp"
DEFINE_HIDDEN_bool(rpc_random_latency, false,
"If a random wait should happen on each RPC call, to "
"simulate network latency.");
namespace communication::rpc {
Client::Client(const io::network::Endpoint &endpoint) : endpoint_(endpoint) {}
std::experimental::optional<::capnp::FlatArrayMessageReader> Client::Send(
::capnp::MessageBuilder *message) {
std::lock_guard<std::mutex> guard(mutex_);
if (FLAGS_rpc_random_latency) {
auto microseconds = (int)(1000 * rand_(gen_));
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
}
// Check if the connection is broken (if we haven't used the client for a
// long time the server could have died).
if (client_ && client_->ErrorStatus()) {
client_ = std::experimental::nullopt;
}
// Connect to the remote server.
if (!client_) {
client_.emplace(&context_);
if (!client_->Connect(endpoint_)) {
LOG(ERROR) << "Couldn't connect to remote address " << endpoint_;
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
}
// Serialize and send request.
auto request_words = ::capnp::messageToFlatArray(*message);
auto request_bytes = request_words.asBytes();
CHECK(request_bytes.size() <= std::numeric_limits<MessageSize>::max())
<< fmt::format(
"Trying to send message of size {}, max message size is {}",
request_bytes.size(), std::numeric_limits<MessageSize>::max());
MessageSize request_data_size = request_bytes.size();
if (!client_->Write(reinterpret_cast<uint8_t *>(&request_data_size),
sizeof(MessageSize), true)) {
LOG(ERROR) << "Couldn't send request size to " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
if (!client_->Write(request_bytes.begin(), request_bytes.size())) {
LOG(ERROR) << "Couldn't send request data to " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
// Receive response data size.
if (!client_->Read(sizeof(MessageSize))) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
MessageSize response_data_size =
*reinterpret_cast<MessageSize *>(client_->GetData());
client_->ShiftData(sizeof(MessageSize));
// Receive response data.
if (!client_->Read(response_data_size)) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
// Read the response message.
auto data = ::kj::arrayPtr(client_->GetData(), response_data_size);
// Our data is word aligned and padded to 64bit because we use regular
// (non-packed) serialization of Cap'n Proto. So we can use reinterpret_cast.
auto data_words =
::kj::arrayPtr(reinterpret_cast<::capnp::word *>(data.begin()),
reinterpret_cast<::capnp::word *>(data.end()));
::capnp::FlatArrayMessageReader response_message(data_words.asConst());
client_->ShiftData(response_data_size);
return std::experimental::make_optional(std::move(response_message));
}
void Client::Abort() {
if (!client_) return;
// We need to call Shutdown on the client to abort any pending read or
// write operations.
client_->Shutdown();
client_ = std::experimental::nullopt;
}
} // namespace communication::rpc

View File

@ -1,101 +0,0 @@
#pragma once
#include <experimental/optional>
#include <memory>
#include <mutex>
#include <random>
#include <capnp/message.h>
#include <capnp/serialize.h>
#include <glog/logging.h>
#include "communication/client.hpp"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
/// Client is thread safe, but it is recommended to use thread_local clients.
class Client {
public:
explicit Client(const io::network::Endpoint &endpoint);
/// Call function can initiate only one request at the time. Function blocks
/// until there is a response. If there was an error nullptr is returned.
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> Call(
Args &&... args) {
return CallWithLoad<TRequestResponse>(
[](const auto &reader) {
typename TRequestResponse::Response response;
response.Load(reader);
return response;
},
std::forward<Args>(args)...);
}
/// Same as `Call` but the first argument is a response loading function.
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> CallWithLoad(
std::function<typename TRequestResponse::Response(
const typename TRequestResponse::Response::Capnp::Reader &)>
load,
Args &&... args) {
typename TRequestResponse::Request request(std::forward<Args>(args)...);
auto req_type = TRequestResponse::Request::TypeInfo;
VLOG(12) << "[RpcClient] sent " << req_type.name;
::capnp::MallocMessageBuilder req_msg;
{
auto builder = req_msg.initRoot<capnp::Message>();
builder.setTypeId(req_type.id);
auto data_builder = builder.initData();
auto req_builder =
data_builder
.template initAs<typename TRequestResponse::Request::Capnp>();
request.Save(&req_builder);
}
auto maybe_response = Send(&req_msg);
if (!maybe_response) {
return std::experimental::nullopt;
}
auto res_msg = maybe_response->getRoot<capnp::Message>();
auto res_type = TRequestResponse::Response::TypeInfo;
if (res_msg.getTypeId() != res_type.id) {
// Since message_id was checked in private Call function, this means
// something is very wrong (probably on the server side).
LOG(ERROR) << "Message response was of unexpected type";
client_ = std::experimental::nullopt;
return std::experimental::nullopt;
}
VLOG(12) << "[RpcClient] received " << res_type.name;
auto data_reader =
res_msg.getData()
.template getAs<typename TRequestResponse::Response::Capnp>();
return std::experimental::make_optional(load(data_reader));
}
/// Call this function from another thread to abort a pending RPC call.
void Abort();
private:
std::experimental::optional<::capnp::FlatArrayMessageReader> Send(
::capnp::MessageBuilder *message);
io::network::Endpoint endpoint_;
// TODO (mferencevic): currently the RPC client is hardcoded not to use SSL
communication::ClientContext context_;
std::experimental::optional<communication::Client> client_;
std::mutex mutex_;
// Random generator for simulated network latency (enable with a flag).
// Distribution parameters are rule-of-thumb chosen.
std::mt19937 gen_{std::random_device{}()};
std::lognormal_distribution<> rand_{0.0, 1.11};
};
} // namespace communication::rpc

View File

@ -1,68 +0,0 @@
#pragma once
#include <mutex>
#include <stack>
#include "communication/rpc/client.hpp"
namespace communication::rpc {
/**
* A simple client pool that creates new RPC clients on demand. Useful when you
* want to send RPCs to the same server from multiple threads without them
* blocking each other.
*/
class ClientPool {
public:
explicit ClientPool(const io::network::Endpoint &endpoint)
: endpoint_(endpoint) {}
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> Call(
Args &&... args) {
return WithUnusedClient([&](const auto &client) {
return client->template Call<TRequestResponse>(
std::forward<Args>(args)...);
});
};
template <class TRequestResponse, class... Args>
std::experimental::optional<typename TRequestResponse::Response> CallWithLoad(
std::function<typename TRequestResponse::Response(
const typename TRequestResponse::Response::Capnp::Reader &)>
load,
Args &&... args) {
return WithUnusedClient([&](const auto &client) {
return client->template CallWithLoad<TRequestResponse>(
load, std::forward<Args>(args)...);
});
};
private:
template <class TFun>
auto WithUnusedClient(const TFun &fun) {
std::unique_ptr<Client> client;
std::unique_lock<std::mutex> lock(mutex_);
if (unused_clients_.empty()) {
client = std::make_unique<Client>(endpoint_);
} else {
client = std::move(unused_clients_.top());
unused_clients_.pop();
}
lock.unlock();
auto res = fun(client);
lock.lock();
unused_clients_.push(std::move(client));
return res;
}
io::network::Endpoint endpoint_;
std::mutex mutex_;
std::stack<std::unique_ptr<Client>> unused_clients_;
};
} // namespace communication::rpc

View File

@ -1,9 +0,0 @@
@0xd3832c9a1a3d8ec7;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("communication::rpc::capnp");
struct Message {
typeId @0 :UInt64;
data @1 :AnyPointer;
}

View File

@ -1,54 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
namespace communication::rpc {
using MessageSize = uint32_t;
/// Type information on a RPC message.
/// Each message should have a static member `TypeInfo` with this information.
struct MessageType {
/// Unique ID for a message.
uint64_t id;
/// Pretty name of the type.
std::string name;
};
inline bool operator==(const MessageType &a, const MessageType &b) {
return a.id == b.id;
}
inline bool operator!=(const MessageType &a, const MessageType &b) {
return a.id != b.id;
}
inline bool operator<(const MessageType &a, const MessageType &b) {
return a.id < b.id;
}
inline bool operator<=(const MessageType &a, const MessageType &b) {
return a.id <= b.id;
}
inline bool operator>(const MessageType &a, const MessageType &b) {
return a.id > b.id;
}
inline bool operator>=(const MessageType &a, const MessageType &b) {
return a.id >= b.id;
}
/// Each RPC is defined via this struct.
///
/// `TRequest` and `TResponse` are required to be classes which have a static
/// member `TypeInfo` of `MessageType` type. This is used for proper
/// registration and deserialization of RPC types. Additionally, both `TRequest`
/// and `TResponse` are required to define a nested `Capnp` type, which
/// corresponds to the Cap'n Proto schema type, as well as defined the following
/// serialization functions:
/// * void Save(Capnp::Builder *, ...) const
/// * void Load(const Capnp::Reader &, ...)
template <typename TRequest, typename TResponse>
struct RequestResponse {
using Request = TRequest;
using Response = TResponse;
};
} // namespace communication::rpc

View File

@ -1,77 +0,0 @@
#include <sstream>
#include "capnp/message.h"
#include "capnp/serialize.h"
#include "fmt/format.h"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/rpc/server.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
Session::Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream)
: server_(server),
input_stream_(input_stream),
output_stream_(output_stream) {}
void Session::Execute() {
if (input_stream_.size() < sizeof(MessageSize)) return;
MessageSize request_len =
*reinterpret_cast<MessageSize *>(input_stream_.data());
uint64_t request_size = sizeof(MessageSize) + request_len;
input_stream_.Resize(request_size);
if (input_stream_.size() < request_size) return;
// Read the request message.
auto data =
::kj::arrayPtr(input_stream_.data() + sizeof(request_len), request_len);
// Our data is word aligned and padded to 64bit because we use regular
// (non-packed) serialization of Cap'n Proto. So we can use reinterpret_cast.
auto data_words =
::kj::arrayPtr(reinterpret_cast<::capnp::word *>(data.begin()),
reinterpret_cast<::capnp::word *>(data.end()));
::capnp::FlatArrayMessageReader request_message(data_words.asConst());
auto request = request_message.getRoot<capnp::Message>();
input_stream_.Shift(sizeof(MessageSize) + request_len);
auto callbacks_accessor = server_.callbacks_.access();
auto it = callbacks_accessor.find(request.getTypeId());
if (it == callbacks_accessor.end()) {
// Throw exception to close the socket and cleanup the session.
throw SessionException(
"Session trying to execute an unregistered RPC call!");
}
VLOG(12) << "[RpcServer] received " << it->second.req_type.name;
::capnp::MallocMessageBuilder response_message;
// callback fills the message data
auto response_builder = response_message.initRoot<capnp::Message>();
it->second.callback(request, &response_builder);
// Serialize and send response
auto response_words = ::capnp::messageToFlatArray(response_message);
auto response_bytes = response_words.asBytes();
if (response_bytes.size() > std::numeric_limits<MessageSize>::max()) {
throw SessionException(fmt::format(
"Trying to send response of size {}, max response size is {}",
response_bytes.size(), std::numeric_limits<MessageSize>::max()));
}
MessageSize input_stream_size = response_bytes.size();
if (!output_stream_.Write(reinterpret_cast<uint8_t *>(&input_stream_size),
sizeof(MessageSize), true)) {
throw SessionException("Couldn't send response size!");
}
if (!output_stream_.Write(response_bytes.begin(), response_bytes.size())) {
throw SessionException("Couldn't send response data!");
}
VLOG(12) << "[RpcServer] sent " << it->second.res_type.name;
}
} // namespace communication::rpc

View File

@ -1,55 +0,0 @@
#pragma once
#include <chrono>
#include <cstdint>
#include <memory>
#include "communication/rpc/messages.hpp"
#include "communication/session.hpp"
/**
* @brief Protocol
*
* Has classes and functions that implement the server side of our
* RPC protocol.
*
* Message layout: MessageSize message_size,
* message_size bytes serialized_message
*/
namespace communication::rpc {
// Forward declaration of class Server
class Server;
/**
* This class is thrown when the Session wants to indicate that a fatal error
* occured during execution.
*/
class SessionException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/**
* Distributed Protocol Session
*
* This class is responsible for handling a single client connection.
*/
class Session {
public:
Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream);
/**
* Executes the protocol after data has been read into the stream.
* Goes through the protocol states in order to execute commands from the
* client.
*/
void Execute();
private:
Server &server_;
communication::InputStream &input_stream_;
communication::OutputStream &output_stream_;
};
} // namespace communication::rpc

View File

@ -1,17 +0,0 @@
#include "communication/rpc/server.hpp"
namespace communication::rpc {
Server::Server(const io::network::Endpoint &endpoint,
size_t workers_count)
: server_(endpoint, *this, &context_, -1, "RPC", workers_count) {}
void Server::StopProcessingCalls() {
server_.Shutdown();
server_.AwaitShutdown();
}
const io::network::Endpoint &Server::endpoint() const {
return server_.endpoint();
}
} // namespace communication::rpc

View File

@ -1,86 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "capnp/any.h"
#include "communication/rpc/messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "data_structures/queue.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
class Server {
public:
Server(const io::network::Endpoint &endpoint,
size_t workers_count = std::thread::hardware_concurrency());
Server(const Server &) = delete;
Server(Server &&) = delete;
Server &operator=(const Server &) = delete;
Server &operator=(Server &&) = delete;
void StopProcessingCalls();
const io::network::Endpoint &endpoint() const;
template <class TRequestResponse>
void Register(std::function<
void(const typename TRequestResponse::Request::Capnp::Reader &,
typename TRequestResponse::Response::Capnp::Builder *)>
callback) {
RpcCallback rpc;
rpc.req_type = TRequestResponse::Request::TypeInfo;
rpc.res_type = TRequestResponse::Response::TypeInfo;
rpc.callback = [callback = callback](const auto &reader, auto *builder) {
auto req_data =
reader.getData()
.template getAs<typename TRequestResponse::Request::Capnp>();
builder->setTypeId(TRequestResponse::Response::TypeInfo.id);
auto data_builder = builder->initData();
auto res_builder =
data_builder
.template initAs<typename TRequestResponse::Response::Capnp>();
callback(req_data, &res_builder);
};
auto callbacks_accessor = callbacks_.access();
auto got =
callbacks_accessor.insert(TRequestResponse::Request::TypeInfo.id, rpc);
CHECK(got.second) << "Callback for that message type already registered";
VLOG(12) << "[RpcServer] register " << rpc.req_type.name << " -> "
<< rpc.res_type.name;
}
template <typename TRequestResponse>
void UnRegister() {
const MessageType &type = TRequestResponse::Request::TypeInfo;
auto callbacks_accessor = callbacks_.access();
auto deleted = callbacks_accessor.remove(type.id);
CHECK(deleted) << "Trying to remove unknown message type callback";
}
private:
friend class Session;
struct RpcCallback {
MessageType req_type;
std::function<void(const capnp::Message::Reader &,
capnp::Message::Builder *)>
callback;
MessageType res_type;
};
ConcurrentMap<uint64_t, RpcCallback> callbacks_;
std::mutex mutex_;
// TODO (mferencevic): currently the RPC server is hardcoded not to use SSL
communication::ServerContext context_;
communication::Server<Session, Server> server_;
}; // namespace communication::rpc
} // namespace communication::rpc

View File

@ -32,41 +32,6 @@ DEFINE_string(properties_on_disk, "",
"Property names of properties which will be stored on available " "Property names of properties which will be stored on available "
"disk. Property names have to be separated with comma (,)."); "disk. Property names have to be separated with comma (,).");
#ifndef MG_COMMUNITY
// Distributed master/worker flags.
DEFINE_VALIDATED_HIDDEN_int32(worker_id, 0,
"ID of a worker in a distributed system. Igored "
"in single-node.",
FLAG_IN_RANGE(0, 1 << gid::kWorkerIdSize));
DEFINE_HIDDEN_string(master_host, "0.0.0.0",
"For master node indicates the host served on. For worker "
"node indicates the master location.");
DEFINE_VALIDATED_HIDDEN_int32(
master_port, 0,
"For master node the port on which to serve. For "
"worker node indicates the master's port.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_HIDDEN_string(worker_host, "0.0.0.0",
"For worker node indicates the host served on. For master "
"node this flag is not used.");
DEFINE_VALIDATED_HIDDEN_int32(
worker_port, 0,
"For master node it's unused. For worker node "
"indicates the port on which to serve. If zero (default value), a port is "
"chosen at random. Sent to the master when registring worker node.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_VALIDATED_HIDDEN_int32(rpc_num_workers,
std::max(std::thread::hardware_concurrency(), 1U),
"Number of workers (RPC)",
FLAG_IN_RANGE(1, INT32_MAX));
DEFINE_VALIDATED_int32(recovering_cluster_size, 0,
"Number of workers (including master) in the "
"previously snapshooted/wal cluster.",
FLAG_IN_RANGE(0, INT32_MAX));
DEFINE_bool(dynamic_graph_partitioner_enabled, false,
"If the dynamic graph partitioner should be enabled.");
#endif
// clang-format off // clang-format off
database::Config::Config() database::Config::Config()
// Durability flags. // Durability flags.
@ -81,17 +46,5 @@ database::Config::Config()
query_execution_time_sec{FLAGS_query_execution_time_sec}, query_execution_time_sec{FLAGS_query_execution_time_sec},
// Data location. // Data location.
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ",")) properties_on_disk(utils::Split(FLAGS_properties_on_disk, ","))
#ifndef MG_COMMUNITY
,
// Distributed flags.
dynamic_graph_partitioner_enabled{FLAGS_dynamic_graph_partitioner_enabled},
rpc_num_workers{FLAGS_rpc_num_workers},
worker_id{FLAGS_worker_id},
master_endpoint{FLAGS_master_host,
static_cast<uint16_t>(FLAGS_master_port)},
worker_endpoint{FLAGS_worker_host,
static_cast<uint16_t>(FLAGS_worker_port)},
recovering_cluster_size{FLAGS_recovering_cluster_size}
#endif
{} {}
// clang-format on // clang-format on

View File

@ -1,7 +1,5 @@
#include "database/counters.hpp" #include "database/counters.hpp"
#include "database/counters_rpc_messages.hpp"
namespace database { namespace database {
int64_t SingleNodeCounters::Get(const std::string &name) { int64_t SingleNodeCounters::Get(const std::string &name) {
@ -16,33 +14,4 @@ void SingleNodeCounters::Set(const std::string &name, int64_t value) {
if (!name_counter_pair.second) name_counter_pair.first->second.store(value); if (!name_counter_pair.second) name_counter_pair.first->second.store(value);
} }
MasterCounters::MasterCounters(communication::rpc::Server &server)
: rpc_server_(server) {
rpc_server_.Register<CountersGetRpc>(
[this](const auto &req_reader, auto *res_builder) {
CountersGetRes res(Get(req_reader.getName()));
res.Save(res_builder);
});
rpc_server_.Register<CountersSetRpc>(
[this](const auto &req_reader, auto *res_builder) {
Set(req_reader.getName(), req_reader.getValue());
return std::make_unique<CountersSetRes>();
});
}
WorkerCounters::WorkerCounters(
communication::rpc::ClientPool &master_client_pool)
: master_client_pool_(master_client_pool) {}
int64_t WorkerCounters::Get(const std::string &name) {
auto response = master_client_pool_.Call<CountersGetRpc>(name);
CHECK(response) << "CountersGetRpc failed";
return response->value;
}
void WorkerCounters::Set(const std::string &name, int64_t value) {
auto response = master_client_pool_.Call<CountersSetRpc>(name, value);
CHECK(response) << "CountersSetRpc failed";
}
} // namespace database } // namespace database

View File

@ -4,8 +4,6 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp" #include "data_structures/concurrent/concurrent_map.hpp"
namespace database { namespace database {
@ -41,25 +39,4 @@ class SingleNodeCounters : public Counters {
ConcurrentMap<std::string, std::atomic<int64_t>> counters_; ConcurrentMap<std::string, std::atomic<int64_t>> counters_;
}; };
/** Implementation for distributed master. */
class MasterCounters : public SingleNodeCounters {
public:
explicit MasterCounters(communication::rpc::Server &server);
private:
communication::rpc::Server &rpc_server_;
};
/** Implementation for distributed worker. */
class WorkerCounters : public Counters {
public:
explicit WorkerCounters(communication::rpc::ClientPool &master_client_pool);
int64_t Get(const std::string &name) override;
void Set(const std::string &name, int64_t value) override;
private:
communication::rpc::ClientPool &master_client_pool_;
};
} // namespace database } // namespace database

View File

@ -1,23 +0,0 @@
#>cpp
#pragma once
#include <string>
#include "communication/rpc/messages.hpp"
#include "database/counters_rpc_messages.capnp.h"
cpp<#
(lcp:namespace database)
(lcp:capnp-namespace "database")
(lcp:define-rpc counters-get
(:request ((name "std::string")))
(:response ((value :int64_t))))
(lcp:define-rpc counters-set
(:request ((name "std::string")
(value :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; database

View File

@ -2,41 +2,14 @@
#include "glog/logging.h" #include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp" #include "database/graph_db.hpp"
#include "database/storage_gc_master.hpp" #include "database/graph_db_accessor.hpp"
#include "database/storage_gc_single_node.hpp" #include "database/storage_gc_single_node.hpp"
#include "database/storage_gc_worker.hpp"
#include "distributed/bfs_rpc_clients.hpp"
#include "distributed/bfs_rpc_server.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_worker.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_server.hpp"
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "distributed/durability_rpc_server.hpp"
#include "distributed/index_rpc_server.hpp"
#include "distributed/plan_consumer.hpp"
#include "distributed/plan_dispatcher.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "distributed/token_sharing_rpc_server.hpp"
#include "distributed/transactional_cache_cleaner.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "distributed/updates_rpc_server.hpp"
#include "durability/paths.hpp" #include "durability/paths.hpp"
#include "durability/recovery.hpp" #include "durability/recovery.hpp"
#include "durability/snapshooter.hpp" #include "durability/snapshooter.hpp"
#include "storage/concurrent_id_mapper_master.hpp"
#include "storage/concurrent_id_mapper_single_node.hpp" #include "storage/concurrent_id_mapper_single_node.hpp"
#include "storage/concurrent_id_mapper_worker.hpp"
#include "transactions/engine_master.hpp"
#include "transactions/engine_single_node.hpp" #include "transactions/engine_single_node.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/file.hpp" #include "utils/file.hpp"
#include "utils/flag_validation.hpp" #include "utils/flag_validation.hpp"
@ -44,6 +17,7 @@ using namespace std::literals::chrono_literals;
using namespace storage; using namespace storage;
namespace database { namespace database {
namespace impl { namespace impl {
class PrivateBase : public GraphDb { class PrivateBase : public GraphDb {
@ -76,22 +50,6 @@ class PrivateBase : public GraphDb {
std::make_unique<Storage>(WorkerId(), config_.properties_on_disk); std::make_unique<Storage>(WorkerId(), config_.properties_on_disk);
} }
distributed::PullRpcClients &pull_clients() override {
LOG(FATAL) << "Remote pull clients only available in master.";
}
distributed::ProduceRpcServer &produce_server() override {
LOG(FATAL) << "Remote produce server only available in worker.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan consumer only available in distributed worker.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan dispatcher only available in distributed master.";
}
distributed::IndexRpcClients &index_rpc_clients() override {
LOG(FATAL) << "Index RPC clients only available in distributed master.";
}
protected: protected:
std::unique_ptr<Storage> storage_ = std::unique_ptr<Storage> storage_ =
std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk); std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk);
@ -128,7 +86,6 @@ struct TypemapPack {
class SingleNode : public PrivateBase { class SingleNode : public PrivateBase {
public: public:
explicit SingleNode(const Config &config) : PrivateBase(config) {} explicit SingleNode(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override { return GraphDb::Type::SINGLE_NODE; }
IMPL_GETTERS IMPL_GETTERS
tx::SingleNodeEngine tx_engine_{&wal_}; tx::SingleNodeEngine tx_engine_{&wal_};
@ -139,33 +96,6 @@ class SingleNode : public PrivateBase {
storage_->PropertiesOnDisk()}; storage_->PropertiesOnDisk()};
database::SingleNodeCounters counters_; database::SingleNodeCounters counters_;
std::vector<int> GetWorkerIds() const override { return {0}; } std::vector<int> GetWorkerIds() const override { return {0}; }
distributed::BfsRpcServer &bfs_subcursor_server() override {
LOG(FATAL) << "Subcursor server not available in single-node.";
}
distributed::BfsRpcClients &bfs_subcursor_clients() override {
LOG(FATAL) << "Subcursor clients not available in single-node.";
}
distributed::DataRpcServer &data_server() override {
LOG(FATAL) << "Remote data server not available in single-node.";
}
distributed::DataRpcClients &data_clients() override {
LOG(FATAL) << "Remote data clients not available in single-node.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan Dispatcher not available in single-node.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan Consumer not available in single-node.";
}
distributed::UpdatesRpcServer &updates_server() override {
LOG(FATAL) << "Remote updates server not available in single-node.";
}
distributed::UpdatesRpcClients &updates_clients() override {
LOG(FATAL) << "Remote updates clients not available in single-node.";
}
distributed::DataManager &data_manager() override {
LOG(FATAL) << "Remote data manager not available in single-node.";
}
void ReinitializeStorage() override { void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage // Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr; storage_gc_ = nullptr;
@ -175,170 +105,6 @@ class SingleNode : public PrivateBase {
} }
}; };
#define IMPL_DISTRIBUTED_GETTERS \
std::vector<int> GetWorkerIds() const override { \
return coordination_.GetWorkerIds(); \
} \
distributed::BfsRpcServer &bfs_subcursor_server() override { \
return bfs_subcursor_server_; \
} \
distributed::BfsRpcClients &bfs_subcursor_clients() override { \
return bfs_subcursor_clients_; \
} \
distributed::DataRpcServer &data_server() override { return data_server_; } \
distributed::DataRpcClients &data_clients() override { \
return data_clients_; \
} \
distributed::UpdatesRpcServer &updates_server() override { \
return updates_server_; \
} \
distributed::UpdatesRpcClients &updates_clients() override { \
return updates_clients_; \
} \
distributed::DataManager &data_manager() override { return data_manager_; }
class Master : public PrivateBase {
public:
explicit Master(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_MASTER;
}
// Makes a local snapshot and forces the workers to do the same. Snapshot is
// written here only if workers sucesfully created their own snapshot
bool MakeSnapshot(GraphDbAccessor &accessor) override {
auto workers_snapshot =
durability_rpc_clients_.MakeSnapshot(accessor.transaction_id());
if (!workers_snapshot.get()) return false;
// This can be further optimized by creating master snapshot at the same
// time as workers snapshots but this forces us to delete the master
// snapshot if we succeed in creating it and workers somehow fail. Because
// we have an assumption that every snapshot that exists on master with some
// tx_id visibility also exists on workers
return PrivateBase::MakeSnapshot(accessor);
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanDispatcher &plan_dispatcher() override {
return plan_dispatcher_;
}
distributed::PullRpcClients &pull_clients() override { return pull_clients_; }
distributed::IndexRpcClients &index_rpc_clients() override {
return index_rpc_clients_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
}
communication::rpc::Server server_{
config_.master_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
tx::MasterEngine tx_engine_{server_, rpc_worker_clients_, &wal_};
distributed::MasterCoordination coordination_{server_.endpoint()};
std::unique_ptr<StorageGcMaster> storage_gc_ =
std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
TypemapPack<MasterConcurrentIdMapper> typemap_pack_{server_};
database::MasterCounters counters_{server_};
distributed::BfsSubcursorStorage subcursor_storage_{this};
distributed::BfsRpcServer bfs_subcursor_server_{this, &server_,
&subcursor_storage_};
distributed::BfsRpcClients bfs_subcursor_clients_{this, &subcursor_storage_,
&rpc_worker_clients_};
distributed::DurabilityRpcClients durability_rpc_clients_{
rpc_worker_clients_};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanDispatcher plan_dispatcher_{rpc_worker_clients_};
distributed::PullRpcClients pull_clients_{rpc_worker_clients_};
distributed::IndexRpcClients index_rpc_clients_{rpc_worker_clients_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::TransactionalCacheCleaner cache_cleaner_{
tx_engine_, updates_server_, data_manager_};
distributed::ClusterDiscoveryMaster cluster_discovery_{server_, coordination_,
rpc_worker_clients_};
distributed::TokenSharingRpcClients token_sharing_clients_{
&rpc_worker_clients_};
distributed::TokenSharingRpcServer token_sharing_server_{
this, config_.worker_id, &coordination_, &server_,
&token_sharing_clients_};
};
class Worker : public PrivateBase {
public:
explicit Worker(const Config &config) : PrivateBase(config) {
cluster_discovery_.RegisterWorker(config.worker_id);
}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_WORKER;
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanConsumer &plan_consumer() override { return plan_consumer_; }
distributed::ProduceRpcServer &produce_server() override {
return produce_server_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
}
communication::rpc::Server server_{
config_.worker_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
distributed::WorkerCoordination coordination_{server_,
config_.master_endpoint};
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
tx::WorkerEngine tx_engine_{rpc_worker_clients_.GetClientPool(0)};
std::unique_ptr<StorageGcWorker> storage_gc_ =
std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
TypemapPack<WorkerConcurrentIdMapper> typemap_pack_{
rpc_worker_clients_.GetClientPool(0)};
database::WorkerCounters counters_{rpc_worker_clients_.GetClientPool(0)};
distributed::BfsSubcursorStorage subcursor_storage_{this};
distributed::BfsRpcServer bfs_subcursor_server_{this, &server_,
&subcursor_storage_};
distributed::BfsRpcClients bfs_subcursor_clients_{this, &subcursor_storage_,
&rpc_worker_clients_};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanConsumer plan_consumer_{server_};
distributed::ProduceRpcServer produce_server_{*this, tx_engine_, server_,
plan_consumer_};
distributed::IndexRpcServer index_rpc_server_{*this, server_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::WorkerTransactionalCacheCleaner cache_cleaner_{
tx_engine_, server_, produce_server_, updates_server_, data_manager_};
distributed::DurabilityRpcServer durability_rpc_server_{*this, server_};
distributed::ClusterDiscoveryWorker cluster_discovery_{
server_, coordination_, rpc_worker_clients_.GetClientPool(0)};
distributed::TokenSharingRpcClients token_sharing_clients_{
&rpc_worker_clients_};
distributed::TokenSharingRpcServer token_sharing_server_{
this, config_.worker_id, &coordination_, &server_,
&token_sharing_clients_};
};
#undef IMPL_GETTERS
PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl) PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
: impl_(std::move(impl)) { : impl_(std::move(impl)) {
if (impl_->config_.durability_enabled) if (impl_->config_.durability_enabled)
@ -346,61 +112,18 @@ PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
// Durability recovery. // Durability recovery.
{ {
auto db_type = impl_->type();
// What we should recover. // What we should recover.
std::experimental::optional<durability::RecoveryInfo> std::experimental::optional<durability::RecoveryInfo>
required_recovery_info; required_recovery_info;
if (db_type == Type::DISTRIBUTED_WORKER) {
required_recovery_info = dynamic_cast<impl::Worker *>(impl_.get())
->cluster_discovery_.recovery_info();
}
// What we recover. // What we recover.
std::experimental::optional<durability::RecoveryInfo> recovery_info; std::experimental::optional<durability::RecoveryInfo> recovery_info;
// Recover only if necessary. // Recover only if necessary.
if ((db_type != Type::DISTRIBUTED_WORKER && if (impl_->config_.db_recover_on_startup) {
impl_->config_.db_recover_on_startup) ||
(db_type == Type::DISTRIBUTED_WORKER && required_recovery_info)) {
recovery_info = durability::Recover(impl_->config_.durability_directory, recovery_info = durability::Recover(impl_->config_.durability_directory,
*impl_, required_recovery_info); *impl_, required_recovery_info);
} }
// Post-recovery setup and checking.
switch (db_type) {
case Type::DISTRIBUTED_MASTER:
dynamic_cast<impl::Master *>(impl_.get())
->coordination_.SetRecoveryInfo(recovery_info);
if (recovery_info) {
CHECK(impl_->config_.recovering_cluster_size > 0)
<< "Invalid cluster recovery size flag. Recovered cluster size "
"should be at least 1";
while (dynamic_cast<impl::Master *>(impl_.get())
->coordination_.CountRecoveredWorkers() !=
impl_->config_.recovering_cluster_size - 1) {
LOG(INFO) << "Waiting for workers to finish recovering..";
std::this_thread::sleep_for(2s);
}
}
// Start the dynamic graph partitioner inside token sharing server
if (impl_->config_.dynamic_graph_partitioner_enabled) {
dynamic_cast<impl::Master *>(impl_.get())
->token_sharing_server_.StartTokenSharing();
}
break;
case Type::DISTRIBUTED_WORKER:
if (required_recovery_info != recovery_info)
LOG(FATAL) << "Memgraph worker failed to recover the database state "
"recovered on the master";
dynamic_cast<impl::Worker *>(impl_.get())
->cluster_discovery_.NotifyWorkerRecovered();
break;
case Type::SINGLE_NODE:
break;
}
} }
if (impl_->config_.durability_enabled) { if (impl_->config_.durability_enabled) {
@ -434,14 +157,12 @@ PublicBase::~PublicBase() {
// If we are not a worker we can do a snapshot on exit if it's enabled. Doing // If we are not a worker we can do a snapshot on exit if it's enabled. Doing
// this on the master forces workers to do the same through rpcs // this on the master forces workers to do the same through rpcs
if (impl_->config_.snapshot_on_exit && if (impl_->config_.snapshot_on_exit) {
impl_->type() != Type::DISTRIBUTED_WORKER) {
GraphDbAccessor dba(*this); GraphDbAccessor dba(*this);
MakeSnapshot(dba); MakeSnapshot(dba);
} }
} }
GraphDb::Type PublicBase::type() const { return impl_->type(); }
Storage &PublicBase::storage() { return impl_->storage(); } Storage &PublicBase::storage() { return impl_->storage(); }
durability::WriteAheadLog &PublicBase::wal() { return impl_->wal(); } durability::WriteAheadLog &PublicBase::wal() { return impl_->wal(); }
tx::Engine &PublicBase::tx_engine() { return impl_->tx_engine(); } tx::Engine &PublicBase::tx_engine() { return impl_->tx_engine(); }
@ -460,42 +181,6 @@ int PublicBase::WorkerId() const { return impl_->WorkerId(); }
std::vector<int> PublicBase::GetWorkerIds() const { std::vector<int> PublicBase::GetWorkerIds() const {
return impl_->GetWorkerIds(); return impl_->GetWorkerIds();
} }
distributed::BfsRpcServer &PublicBase::bfs_subcursor_server() {
return impl_->bfs_subcursor_server();
}
distributed::BfsRpcClients &PublicBase::bfs_subcursor_clients() {
return impl_->bfs_subcursor_clients();
}
distributed::DataRpcServer &PublicBase::data_server() {
return impl_->data_server();
}
distributed::DataRpcClients &PublicBase::data_clients() {
return impl_->data_clients();
}
distributed::PlanDispatcher &PublicBase::plan_dispatcher() {
return impl_->plan_dispatcher();
}
distributed::IndexRpcClients &PublicBase::index_rpc_clients() {
return impl_->index_rpc_clients();
}
distributed::PlanConsumer &PublicBase::plan_consumer() {
return impl_->plan_consumer();
}
distributed::PullRpcClients &PublicBase::pull_clients() {
return impl_->pull_clients();
}
distributed::ProduceRpcServer &PublicBase::produce_server() {
return impl_->produce_server();
}
distributed::UpdatesRpcServer &PublicBase::updates_server() {
return impl_->updates_server();
}
distributed::UpdatesRpcClients &PublicBase::updates_clients() {
return impl_->updates_clients();
}
distributed::DataManager &PublicBase::data_manager() {
return impl_->data_manager();
}
bool PublicBase::MakeSnapshot(GraphDbAccessor &accessor) { bool PublicBase::MakeSnapshot(GraphDbAccessor &accessor) {
return impl_->MakeSnapshot(accessor); return impl_->MakeSnapshot(accessor);
@ -524,31 +209,4 @@ MasterBase::~MasterBase() { snapshot_creator_ = nullptr; }
SingleNode::SingleNode(Config config) SingleNode::SingleNode(Config config)
: MasterBase(std::make_unique<impl::SingleNode>(config)) {} : MasterBase(std::make_unique<impl::SingleNode>(config)) {}
Master::Master(Config config)
: MasterBase(std::make_unique<impl::Master>(config)) {}
io::network::Endpoint Master::endpoint() const {
return dynamic_cast<impl::Master *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Master::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Master *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
Worker::Worker(Config config)
: PublicBase(std::make_unique<impl::Worker>(config)) {}
io::network::Endpoint Worker::endpoint() const {
return dynamic_cast<impl::Worker *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Worker::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Worker *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
void Worker::WaitForShutdown() {
dynamic_cast<impl::Worker *>(impl_.get())->coordination_.WaitForShutdown();
}
} // namespace database } // namespace database

View File

@ -14,21 +14,6 @@
#include "transactions/engine.hpp" #include "transactions/engine.hpp"
#include "utils/scheduler.hpp" #include "utils/scheduler.hpp"
namespace distributed {
class BfsRpcServer;
class BfsRpcClients;
class DataRpcServer;
class DataRpcClients;
class PlanDispatcher;
class PlanConsumer;
class PullRpcClients;
class ProduceRpcServer;
class UpdatesRpcServer;
class UpdatesRpcClients;
class DataManager;
class IndexRpcClients;
} // namespace distributed
namespace database { namespace database {
/// Database configuration. Initialized from flags, but modifiable. /// Database configuration. Initialized from flags, but modifiable.
@ -84,12 +69,9 @@ struct Config {
*/ */
class GraphDb { class GraphDb {
public: public:
enum class Type { SINGLE_NODE, DISTRIBUTED_MASTER, DISTRIBUTED_WORKER };
GraphDb() {} GraphDb() {}
virtual ~GraphDb() {} virtual ~GraphDb() {}
virtual Type type() const = 0;
virtual Storage &storage() = 0; virtual Storage &storage() = 0;
virtual durability::WriteAheadLog &wal() = 0; virtual durability::WriteAheadLog &wal() = 0;
virtual tx::Engine &tx_engine() = 0; virtual tx::Engine &tx_engine() = 0;
@ -102,25 +84,6 @@ class GraphDb {
virtual int WorkerId() const = 0; virtual int WorkerId() const = 0;
virtual std::vector<int> GetWorkerIds() const = 0; virtual std::vector<int> GetWorkerIds() const = 0;
// Supported only in distributed master and worker, not in single-node.
virtual distributed::BfsRpcServer &bfs_subcursor_server() = 0;
virtual distributed::BfsRpcClients &bfs_subcursor_clients() = 0;
virtual distributed::DataRpcServer &data_server() = 0;
virtual distributed::DataRpcClients &data_clients() = 0;
virtual distributed::UpdatesRpcServer &updates_server() = 0;
virtual distributed::UpdatesRpcClients &updates_clients() = 0;
virtual distributed::DataManager &data_manager() = 0;
// Supported only in distributed master.
virtual distributed::PullRpcClients &pull_clients() = 0;
virtual distributed::PlanDispatcher &plan_dispatcher() = 0;
virtual distributed::IndexRpcClients &index_rpc_clients() = 0;
// Supported only in distributed worker.
// TODO remove once end2end testing is possible.
virtual distributed::ProduceRpcServer &produce_server() = 0;
virtual distributed::PlanConsumer &plan_consumer() = 0;
// Makes a snapshot from the visibility of the given accessor // Makes a snapshot from the visibility of the given accessor
virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0; virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0;
@ -146,7 +109,6 @@ class PrivateBase;
// initialization and cleanup. // initialization and cleanup.
class PublicBase : public GraphDb { class PublicBase : public GraphDb {
public: public:
Type type() const override;
Storage &storage() override; Storage &storage() override;
durability::WriteAheadLog &wal() override; durability::WriteAheadLog &wal() override;
tx::Engine &tx_engine() override; tx::Engine &tx_engine() override;
@ -157,18 +119,6 @@ class PublicBase : public GraphDb {
void CollectGarbage() override; void CollectGarbage() override;
int WorkerId() const override; int WorkerId() const override;
std::vector<int> GetWorkerIds() const override; std::vector<int> GetWorkerIds() const override;
distributed::BfsRpcServer &bfs_subcursor_server() override;
distributed::BfsRpcClients &bfs_subcursor_clients() override;
distributed::DataRpcServer &data_server() override;
distributed::DataRpcClients &data_clients() override;
distributed::PlanDispatcher &plan_dispatcher() override;
distributed::IndexRpcClients &index_rpc_clients() override;
distributed::PlanConsumer &plan_consumer() override;
distributed::PullRpcClients &pull_clients() override;
distributed::ProduceRpcServer &produce_server() override;
distributed::UpdatesRpcServer &updates_server() override;
distributed::UpdatesRpcClients &updates_clients() override;
distributed::DataManager &data_manager() override;
bool is_accepting_transactions() const { return is_accepting_transactions_; } bool is_accepting_transactions() const { return is_accepting_transactions_; }
bool MakeSnapshot(GraphDbAccessor &accessor) override; bool MakeSnapshot(GraphDbAccessor &accessor) override;
@ -201,24 +151,4 @@ class SingleNode : public MasterBase {
explicit SingleNode(Config config = Config()); explicit SingleNode(Config config = Config());
}; };
class Master : public MasterBase {
public:
explicit Master(Config config = Config());
/** Gets this master's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
};
class Worker : public impl::PublicBase {
public:
explicit Worker(Config config = Config());
/** Gets this worker's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
void WaitForShutdown();
};
} // namespace database } // namespace database

View File

@ -4,9 +4,6 @@
#include "database/graph_db_accessor.hpp" #include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp" #include "database/state_delta.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "storage/address_types.hpp" #include "storage/address_types.hpp"
#include "storage/edge.hpp" #include "storage/edge.hpp"
#include "storage/edge_accessor.hpp" #include "storage/edge_accessor.hpp"
@ -92,26 +89,6 @@ VertexAccessor GraphDbAccessor::InsertVertex(
return va; return va;
} }
VertexAccessor GraphDbAccessor::InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
CHECK(worker_id != db().WorkerId())
<< "Not allowed to call InsertVertexIntoRemote for local worker";
gid::Gid gid = db().updates_clients().CreateVertex(
worker_id, transaction_id(), labels, properties);
auto vertex = std::make_unique<Vertex>();
vertex->labels_ = labels;
for (auto &kv : properties) vertex->properties_.set(kv.first, kv.second);
db().data_manager()
.Elements<Vertex>(transaction_id())
.emplace(gid, nullptr, std::move(vertex));
return VertexAccessor({gid, worker_id}, *this);
}
std::experimental::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional( std::experimental::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
gid::Gid gid, bool current_state) { gid::Gid gid, bool current_state) {
VertexAccessor record_accessor(db_.storage().LocalAddress<Vertex>(gid), VertexAccessor record_accessor(db_.storage().LocalAddress<Vertex>(gid),
@ -144,8 +121,6 @@ EdgeAccessor GraphDbAccessor::FindEdge(gid::Gid gid, bool current_state) {
void GraphDbAccessor::BuildIndex(storage::Label label, void GraphDbAccessor::BuildIndex(storage::Label label,
storage::Property property) { storage::Property property) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_.type() != GraphDb::Type::DISTRIBUTED_WORKER)
<< "BuildIndex invoked on worker";
db_.storage().index_build_tx_in_progress_.access().insert(transaction_.id_); db_.storage().index_build_tx_in_progress_.access().insert(transaction_.id_);
@ -192,13 +167,6 @@ void GraphDbAccessor::BuildIndex(storage::Label label,
std::experimental::optional<std::vector<utils::Future<bool>>> std::experimental::optional<std::vector<utils::Future<bool>>>
index_rpc_completions; index_rpc_completions;
// Notify all workers to start building an index if we are the master since
// they don't have to wait anymore
if (db_.type() == GraphDb::Type::DISTRIBUTED_MASTER) {
index_rpc_completions.emplace(db_.index_rpc_clients().GetBuildIndexFutures(
label, property, transaction_id(), this->db_.WorkerId()));
}
// Add transaction to the build_tx_in_progress as this transaction doesn't // Add transaction to the build_tx_in_progress as this transaction doesn't
// change data and shouldn't block other parallel index creations // change data and shouldn't block other parallel index creations
auto read_transaction_id = dba.transaction().id_; auto read_transaction_id = dba.transaction().id_;
@ -352,14 +320,6 @@ bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
bool check_empty) { bool check_empty) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (!vertex_accessor.is_local()) {
auto address = vertex_accessor.address();
db().updates_clients().RemoveVertex(address.worker_id(), transaction_id(),
address.gid(), check_empty);
// We can't know if we are going to be able to remove vertex until deferred
// updates on a remote worker are executed
return true;
}
vertex_accessor.SwitchNew(); vertex_accessor.SwitchNew();
// it's possible the vertex was removed already in this transaction // it's possible the vertex was removed already in this transaction
// due to it getting matched multiple times by some patterns // due to it getting matched multiple times by some patterns
@ -402,59 +362,33 @@ EdgeAccessor GraphDbAccessor::InsertEdge(
storage::EdgeAddress edge_address; storage::EdgeAddress edge_address;
Vertex *from_updated; Vertex *from_updated;
if (from.is_local()) {
auto edge_accessor =
InsertOnlyEdge(from.address(), to.address(), edge_type, requested_gid);
edge_address = edge_accessor.address(),
from.SwitchNew(); auto edge_accessor =
from_updated = &from.update(); InsertOnlyEdge(from.address(), to.address(), edge_type, requested_gid);
edge_address = edge_accessor.address(),
// TODO when preparing WAL for distributed, most likely never use from.SwitchNew();
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion, from_updated = &from.update();
// in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, edge_accessor.gid(), from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
} else { // TODO when preparing WAL for distributed, most likely never use
edge_address = db().updates_clients().CreateEdge(transaction_id(), from, to, // `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
edge_type); // in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, edge_accessor.gid(), from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
from_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(from.gid());
// Create an Edge and insert it into the Cache so we see it locally.
db().data_manager()
.Elements<Edge>(transaction_id())
.emplace(
edge_address.gid(), nullptr,
std::make_unique<Edge>(from.address(), to.address(), edge_type));
}
from_updated->out_.emplace( from_updated->out_.emplace(
db_.storage().LocalizedAddressIfPossible(to.address()), edge_address, db_.storage().LocalizedAddressIfPossible(to.address()), edge_address,
edge_type); edge_type);
Vertex *to_updated; Vertex *to_updated;
if (to.is_local()) {
// ensure that the "to" accessor has the latest version (Switch new) // ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when // WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist // we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew(); to.SwitchNew();
to_updated = &to.update(); to_updated = &to.update();
} else {
// The RPC call for the `to` side is already handled if `from` is not local.
if (from.is_local() ||
from.address().worker_id() != to.address().worker_id()) {
db().updates_clients().AddInEdge(
transaction_id(), from,
db().storage().GlobalizedAddress(edge_address), to, edge_type);
}
to_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(to.gid());
}
to_updated->in_.emplace( to_updated->in_.emplace(
db_.storage().LocalizedAddressIfPossible(from.address()), edge_address, db_.storage().LocalizedAddressIfPossible(from.address()), edge_address,
edge_type); edge_type);
@ -492,35 +426,17 @@ int64_t GraphDbAccessor::EdgesCount() const {
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge, void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
bool remove_in_edge) { bool remove_in_edge) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (edge.is_local()) {
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
edge.address().local()->remove(edge.current_, transaction_); // it's possible the edge was removed already in this transaction
wal().Emplace( // due to it getting matched multiple times by some patterns
database::StateDelta::RemoveEdge(transaction_.id_, edge.gid())); // we can only delete it once, so check if it's already deleted
} else { edge.SwitchNew();
auto edge_addr = edge.GlobalAddress(); if (edge.current().is_expired_by(transaction_)) return;
auto from_addr = db().storage().GlobalizedAddress(edge.from_addr()); if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
CHECK(edge_addr.worker_id() == from_addr.worker_id()) if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
<< "Edge and it's 'from' vertex not on the same worker";
auto to_addr = db().storage().GlobalizedAddress(edge.to_addr());
db().updates_clients().RemoveEdge(transaction_id(), edge_addr.worker_id(),
edge_addr.gid(), from_addr.gid(),
to_addr);
// Another RPC is necessary only if the first did not handle vertices on edge.address().local()->remove(edge.current_, transaction_);
// both sides. wal().Emplace(database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
if (edge_addr.worker_id() != to_addr.worker_id()) {
db().updates_clients().RemoveInEdge(transaction_id(), to_addr.worker_id(),
to_addr.gid(), edge_addr);
}
}
} }
storage::Label GraphDbAccessor::Label(const std::string &label_name) { storage::Label GraphDbAccessor::Label(const std::string &label_name) {

View File

@ -9,7 +9,6 @@
#include "glog/logging.h" #include "glog/logging.h"
#include "database/graph_db.hpp" #include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "query/typed_value.hpp" #include "query/typed_value.hpp"
#include "storage/address_types.hpp" #include "storage/address_types.hpp"
#include "storage/edge_accessor.hpp" #include "storage/edge_accessor.hpp"
@ -78,13 +77,6 @@ class GraphDbAccessor {
VertexAccessor InsertVertex(std::experimental::optional<gid::Gid> VertexAccessor InsertVertex(std::experimental::optional<gid::Gid>
requested_gid = std::experimental::nullopt); requested_gid = std::experimental::nullopt);
/** Creates a new Vertex on the given worker. It is NOT allowed to call this
* function with this worker's id. */
VertexAccessor InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/** /**
* Removes the vertex of the given accessor. If the vertex has any outgoing or * Removes the vertex of the given accessor. If the vertex has any outgoing or
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to * incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to

View File

@ -3,22 +3,17 @@
#include "communication/bolt/v1/decoder/decoder.hpp" #include "communication/bolt/v1/decoder/decoder.hpp"
#include "communication/bolt/v1/encoder/primitive_encoder.hpp" #include "communication/bolt/v1/encoder/primitive_encoder.hpp"
#include "database/state_delta.capnp.h"
#include "durability/hashed_file_reader.hpp" #include "durability/hashed_file_reader.hpp"
#include "durability/hashed_file_writer.hpp" #include "durability/hashed_file_writer.hpp"
#include "storage/address_types.hpp" #include "storage/address_types.hpp"
#include "storage/gid.hpp" #include "storage/gid.hpp"
#include "storage/property_value.hpp" #include "storage/property_value.hpp"
#include "utils/serialization.hpp"
cpp<# cpp<#
(lcp:namespace database) (lcp:namespace database)
(lcp:capnp-namespace "database") (lcp:capnp-namespace "database")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64") (lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
(lcp:capnp-type-conversion "gid::Gid" "UInt64") (lcp:capnp-type-conversion "gid::Gid" "UInt64")
(lcp:capnp-type-conversion "storage::Label" "Storage.Common") (lcp:capnp-type-conversion "storage::Label" "Storage.Common")
@ -108,7 +103,7 @@ in StateDeltas.")
"Defines StateDelta type. For each type the comment indicates which values "Defines StateDelta type. For each type the comment indicates which values
need to be stored. All deltas have the transaction_id member, so that's need to be stored. All deltas have the transaction_id member, so that's
omitted in the comment.") omitted in the comment.")
(:serialize :capnp)) (:serialize))
#>cpp #>cpp
StateDelta() = default; StateDelta() = default;
StateDelta(const enum Type &type, tx::TransactionId tx_id) StateDelta(const enum Type &type, tx::TransactionId tx_id)
@ -174,6 +169,6 @@ omitted in the comment.")
/// Applies CRUD delta to database accessor. Fails on other types of deltas /// Applies CRUD delta to database accessor. Fails on other types of deltas
void Apply(GraphDbAccessor &dba) const; void Apply(GraphDbAccessor &dba) const;
cpp<#) cpp<#)
(:serialize :capnp)) (:serialize))
(lcp:pop-namespace) ;; database (lcp:pop-namespace) ;; database

View File

@ -6,7 +6,6 @@
#include "data_structures/concurrent/concurrent_map.hpp" #include "data_structures/concurrent/concurrent_map.hpp"
#include "database/storage.hpp" #include "database/storage.hpp"
#include "mvcc/version_list.hpp" #include "mvcc/version_list.hpp"
#include "stats/metrics.hpp"
#include "storage/deferred_deleter.hpp" #include "storage/deferred_deleter.hpp"
#include "storage/edge.hpp" #include "storage/edge.hpp"
#include "storage/garbage_collector.hpp" #include "storage/garbage_collector.hpp"

View File

@ -1,68 +0,0 @@
#pragma once
#include <mutex>
#include "database/storage_gc.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
namespace database {
class StorageGcMaster : public StorageGc {
public:
using StorageGc::StorageGc;
StorageGcMaster(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::Server &rpc_server,
distributed::MasterCoordination &coordination)
: StorageGc(storage, tx_engine, pause_sec),
rpc_server_(rpc_server),
coordination_(coordination) {
rpc_server_.Register<distributed::RanLocalGcRpc>(
[this](const auto &req_reader, auto *res_builder) {
distributed::RanLocalGcReq req;
req.Load(req_reader);
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
worker_safe_transaction_[req.worker_id] = req.local_oldest_active;
});
}
~StorageGcMaster() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
rpc_server_.UnRegister<distributed::RanLocalGcRpc>();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// Workers are sending information when it's safe to delete every
// transaction older than oldest_active from their perspective i.e. there
// won't exist another transaction in the future with id larger than or
// equal to oldest_active that might trigger a query into a commit log about
// the state of transactions which we are deleting.
auto safe_transaction = GetClogSafeTransaction(oldest_active);
if (safe_transaction) {
tx::TransactionId min_safe = *safe_transaction;
{
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
for (auto worker_id : coordination_.GetWorkerIds()) {
// Skip itself
if (worker_id == 0) continue;
min_safe = std::min(min_safe, worker_safe_transaction_[worker_id]);
}
}
// All workers reported back at least once
if (min_safe > 0) {
tx_engine_.GarbageCollectCommitLog(min_safe);
LOG(INFO) << "Clearing master commit log with tx: " << min_safe;
}
}
}
communication::rpc::Server &rpc_server_;
distributed::MasterCoordination &coordination_;
// Mapping of worker ids and oldest active transaction which is safe for
// deletion from worker perspective
std::unordered_map<int, tx::TransactionId> worker_safe_transaction_;
std::mutex worker_safe_transaction_mutex_;
};
} // namespace database

View File

@ -1,46 +0,0 @@
#pragma once
#include "communication/rpc/client_pool.hpp"
#include "database/storage_gc.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
#include "transactions/engine_worker.hpp"
#include "transactions/transaction.hpp"
namespace database {
class StorageGcWorker : public StorageGc {
public:
StorageGcWorker(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::ClientPool &master_client_pool,
int worker_id)
: StorageGc(storage, tx_engine, pause_sec),
master_client_pool_(master_client_pool),
worker_id_(worker_id) {}
~StorageGcWorker() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// We first need to delete transactions that we can delete to be sure that
// the locks are released as well. Otherwise some new transaction might
// try to acquire a lock which hasn't been released (if the transaction
// cache cleaner was not scheduled at this time), and take a look into the
// commit log which no longer contains that transaction id.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.ClearTransactionalCache(oldest_active);
auto safe_to_delete = GetClogSafeTransaction(oldest_active);
if (safe_to_delete) {
master_client_pool_.Call<distributed::RanLocalGcRpc>(*safe_to_delete,
worker_id_);
tx_engine_.GarbageCollectCommitLog(*safe_to_delete);
}
}
communication::rpc::ClientPool &master_client_pool_;
int worker_id_;
};
} // namespace database

View File

@ -1,178 +0,0 @@
#include "distributed/bfs_rpc_messages.hpp"
#include "distributed/data_manager.hpp"
#include "bfs_rpc_clients.hpp"
namespace distributed {
BfsRpcClients::BfsRpcClients(
database::GraphDb *db, distributed::BfsSubcursorStorage *subcursor_storage,
distributed::RpcWorkerClients *clients)
: db_(db), subcursor_storage_(subcursor_storage), clients_(clients) {}
std::unordered_map<int16_t, int64_t> BfsRpcClients::CreateBfsSubcursors(
tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
const std::vector<storage::EdgeType> &edge_types,
query::GraphView graph_view) {
auto futures = clients_->ExecuteOnWorkers<std::pair<int16_t, int64_t>>(
db_->WorkerId(),
[tx_id, direction, &edge_types, graph_view](int worker_id, auto &client) {
auto res = client.template Call<CreateBfsSubcursorRpc>(
tx_id, direction, edge_types, graph_view);
CHECK(res) << "CreateBfsSubcursor RPC failed!";
return std::make_pair(worker_id, res->member);
});
std::unordered_map<int16_t, int64_t> subcursor_ids;
subcursor_ids.emplace(
db_->WorkerId(),
subcursor_storage_->Create(tx_id, direction, edge_types, graph_view));
for (auto &future : futures) {
auto got = subcursor_ids.emplace(future.get());
CHECK(got.second) << "CreateBfsSubcursors failed: duplicate worker id";
}
return subcursor_ids;
}
void BfsRpcClients::RegisterSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<RegisterSubcursorsRpc>(subcursor_ids);
CHECK(res) << "RegisterSubcursors RPC failed!";
});
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->RegisterSubcursors(subcursor_ids);
}
void BfsRpcClients::RemoveBfsSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<RemoveBfsSubcursorRpc>(
subcursor_ids.at(worker_id));
CHECK(res) << "RemoveBfsSubcursor RPC failed!";
});
subcursor_storage_->Erase(subcursor_ids.at(db_->WorkerId()));
}
std::experimental::optional<VertexAccessor> BfsRpcClients::Pull(
int16_t worker_id, int64_t subcursor_id, database::GraphDbAccessor *dba) {
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_id)->Pull();
}
auto res =
clients_->GetClientPool(worker_id).Call<SubcursorPullRpc>(subcursor_id);
CHECK(res) << "SubcursorPull RPC failed!";
if (!res->vertex) return std::experimental::nullopt;
db_->data_manager()
.Elements<Vertex>(dba->transaction_id())
.emplace(res->vertex->global_address.gid(),
std::move(res->vertex->old_element_output),
std::move(res->vertex->new_element_output));
return VertexAccessor(res->vertex->global_address, *dba);
}
bool BfsRpcClients::ExpandLevel(
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
auto futures = clients_->ExecuteOnWorkers<bool>(
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
auto res =
client.template Call<ExpandLevelRpc>(subcursor_ids.at(worker_id));
CHECK(res) << "ExpandLevel RPC failed!";
return res->member;
});
bool expanded =
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))->ExpandLevel();
for (auto &future : futures) {
expanded |= future.get();
}
return expanded;
}
void BfsRpcClients::SetSource(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress source_address) {
CHECK(source_address.is_remote())
<< "SetSource should be called with global address";
int worker_id = source_address.worker_id();
if (worker_id == db_->WorkerId()) {
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->SetSource(source_address);
} else {
auto res = clients_->GetClientPool(worker_id).Call<SetSourceRpc>(
subcursor_ids.at(worker_id), source_address);
CHECK(res) << "SetSourceRpc failed!";
}
}
bool BfsRpcClients::ExpandToRemoteVertex(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
EdgeAccessor edge, VertexAccessor vertex) {
CHECK(!vertex.is_local())
<< "ExpandToRemoteVertex should not be called with local vertex";
int worker_id = vertex.address().worker_id();
auto res = clients_->GetClientPool(worker_id).Call<ExpandToRemoteVertexRpc>(
subcursor_ids.at(worker_id), edge.GlobalAddress(),
vertex.GlobalAddress());
CHECK(res) << "ExpandToRemoteVertex RPC failed!";
return res->member;
}
PathSegment BuildPathSegment(ReconstructPathRes *res,
database::GraphDbAccessor *dba) {
std::vector<EdgeAccessor> edges;
for (auto &edge : res->edges) {
dba->db()
.data_manager()
.Elements<Edge>(dba->transaction_id())
.emplace(edge.global_address.gid(), std::move(edge.old_element_output),
std::move(edge.new_element_output));
edges.emplace_back(edge.global_address, *dba);
}
return PathSegment{edges, res->next_vertex, res->next_edge};
}
PathSegment BfsRpcClients::ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress vertex, database::GraphDbAccessor *dba) {
int worker_id = vertex.worker_id();
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
->ReconstructPath(vertex);
}
auto res = clients_->GetClientPool(worker_id).Call<ReconstructPathRpc>(
subcursor_ids.at(worker_id), vertex);
return BuildPathSegment(&res.value(), dba);
}
PathSegment BfsRpcClients::ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::EdgeAddress edge, database::GraphDbAccessor *dba) {
int worker_id = edge.worker_id();
if (worker_id == db_->WorkerId()) {
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
->ReconstructPath(edge);
}
auto res = clients_->GetClientPool(worker_id).Call<ReconstructPathRpc>(
subcursor_ids.at(worker_id), edge);
return BuildPathSegment(&res.value(), dba);
}
void BfsRpcClients::PrepareForExpand(
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear) {
auto res = clients_->ExecuteOnWorkers<void>(
db_->WorkerId(), [clear, &subcursor_ids](int worker_id, auto &client) {
auto res = client.template Call<PrepareForExpandRpc>(
subcursor_ids.at(worker_id), clear);
CHECK(res) << "PrepareForExpand RPC failed!";
});
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
->PrepareForExpand(clear);
}
} // namespace distributed

View File

@ -1,62 +0,0 @@
/// @file
#pragma once
#include "distributed/bfs_subcursor.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "transactions/transaction.hpp"
namespace distributed {
/// Along with `BfsRpcServer`, this class is used to expose `BfsSubcursor`
/// interface over the network so that subcursors can communicate during the
/// traversal. It is just a thin wrapper making RPC calls that also takes
/// care for storing remote data into cache upon receival. Special care is taken
/// to avoid sending local RPCs. Instead, subcursor storage is accessed
/// directly.
class BfsRpcClients {
public:
BfsRpcClients(database::GraphDb *db,
distributed::BfsSubcursorStorage *subcursor_storage,
distributed::RpcWorkerClients *clients);
std::unordered_map<int16_t, int64_t> CreateBfsSubcursors(
tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
const std::vector<storage::EdgeType> &edge_types,
query::GraphView graph_view);
void RegisterSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
void RemoveBfsSubcursors(
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
std::experimental::optional<VertexAccessor> Pull(
int16_t worker_id, int64_t subcursor_id, database::GraphDbAccessor *dba);
bool ExpandLevel(const std::unordered_map<int16_t, int64_t> &subcursor_ids);
void SetSource(const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress source_address);
bool ExpandToRemoteVertex(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
EdgeAccessor edge, VertexAccessor vertex);
PathSegment ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::EdgeAddress edge, database::GraphDbAccessor *dba);
PathSegment ReconstructPath(
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
storage::VertexAddress vertex, database::GraphDbAccessor *dba);
void PrepareForExpand(
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear);
private:
database::GraphDb *db_;
distributed::BfsSubcursorStorage *subcursor_storage_;
distributed::RpcWorkerClients *clients_;
};
} // namespace distributed

View File

@ -1,280 +0,0 @@
#>cpp
#pragma once
#include <tuple>
#include "communication/rpc/messages.hpp"
#include "distributed/bfs_rpc_messages.capnp.h"
#include "distributed/bfs_subcursor.hpp"
#include "query/plan/operator.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'ast "/query/frontend/ast/ast.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'query "/query/common.capnp")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "storage::EdgeAddress" "Storage.Address")
(lcp:capnp-type-conversion "storage::VertexAddress" "Storage.Address")
(defun save-element (builder member)
#>cpp
if (${member}) {
if constexpr (std::is_same<TElement, Vertex>::value) {
auto builder = ${builder}.initVertex();
SaveVertex(*${member}, &builder, worker_id);
} else {
auto builder = ${builder}.initEdge();
SaveEdge(*${member}, &builder, worker_id);
}
} else {
${builder}.setNull();
}
cpp<#)
(defun load-element (reader member)
(let ((output-member (cl-ppcre:regex-replace "input$" member "output")))
#>cpp
if (!${reader}.isNull()) {
if constexpr (std::is_same<TElement, Vertex>::value) {
const auto reader = ${reader}.getVertex();
${output-member} = LoadVertex(reader);
} else {
const auto reader = ${reader}.getEdge();
${output-member} = LoadEdge(reader);
}
}
cpp<#))
(lcp:define-struct (serialized-graph-element t-element) ()
((global-address "storage::Address<mvcc::VersionList<TElement>>"
:capnp-type "Storage.Address")
(old-element-input "TElement *"
:save-fun
"if (old_element_input) {
ar << true;
SaveElement(ar, *old_element_input, worker_id);
} else {
ar << false;
}"
:load-fun ""
:capnp-type '((null "Void") (vertex "Dis.Vertex") (edge "Dis.Edge"))
:capnp-save #'save-element :capnp-load #'load-element)
(old-element-output "std::unique_ptr<TElement>"
:save-fun ""
:load-fun
"bool has_old;
ar >> has_old;
if (has_old) {
if constexpr (std::is_same<TElement, Vertex>::value) {
old_element_output = std::move(LoadVertex(ar));
} else {
old_element_output = std::move(LoadEdge(ar));
}
}"
:capnp-save :dont-save)
(new-element-input "TElement *"
:save-fun
"if (new_element_input) {
ar << true;
SaveElement(ar, *new_element_input, worker_id);
} else {
ar << false;
}"
:load-fun ""
:capnp-type '((null "Void") (vertex "Dis.Vertex") (edge "Dis.Edge"))
:capnp-save #'save-element :capnp-load #'load-element)
(new-element-output "std::unique_ptr<TElement>"
:save-fun ""
:load-fun
"bool has_new;
ar >> has_new;
if (has_new) {
if constexpr (std::is_same<TElement, Vertex>::value) {
new_element_output = std::move(LoadVertex(ar));
} else {
new_element_output = std::move(LoadEdge(ar));
}
}"
:capnp-save :dont-save)
(worker-id :int16_t :save-fun "" :load-fun "" :capnp-save :dont-save))
(:public
#>cpp
SerializedGraphElement(storage::Address<mvcc::VersionList<TElement>> global_address,
TElement *old_element_input, TElement *new_element_input,
int16_t worker_id)
: global_address(global_address),
old_element_input(old_element_input),
old_element_output(nullptr),
new_element_input(new_element_input),
new_element_output(nullptr),
worker_id(worker_id) {
CHECK(global_address.is_remote())
<< "Only global addresses should be used with SerializedGraphElement";
}
SerializedGraphElement(const RecordAccessor<TElement> &accessor)
: SerializedGraphElement(accessor.GlobalAddress(), accessor.GetOld(),
accessor.GetNew(),
accessor.db_accessor().db().WorkerId()) {}
SerializedGraphElement() {}
cpp<#)
(:serialize :capnp :type-args '(vertex edge)))
#>cpp
using SerializedVertex = SerializedGraphElement<Vertex>;
using SerializedEdge = SerializedGraphElement<Edge>;
cpp<#
(lcp:define-rpc create-bfs-subcursor
(:request
((tx-id "tx::TransactionId" :capnp-type "UInt64")
(direction "query::EdgeAtom::Direction"
:capnp-type "Ast.EdgeAtom.Direction" :capnp-init nil
:capnp-save (lcp:capnp-save-enum "::query::capnp::EdgeAtom::Direction"
"query::EdgeAtom::Direction"
'(in out both))
:capnp-load (lcp:capnp-load-enum "::query::capnp::EdgeAtom::Direction"
"query::EdgeAtom::Direction"
'(in out both)))
;; TODO(mtomic): Why isn't edge-types serialized?
(edge-types "std::vector<storage::EdgeType>"
:save-fun "" :load-fun "" :capnp-save :dont-save)
(graph-view "query::GraphView"
:capnp-type "Query.GraphView" :capnp-init nil
:capnp-save (lcp:capnp-save-enum "::query::capnp::GraphView"
"query::GraphView"
'(old new))
:capnp-load (lcp:capnp-load-enum "::query::capnp::GraphView"
"query::GraphView"
'(old new)))))
(:response ((member :int64_t))))
(lcp:define-rpc register-subcursors
(:request ((subcursor-ids "std::unordered_map<int16_t, int64_t>"
:capnp-type "Utils.Map(Utils.BoxInt16, Utils.BoxInt64)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<utils::capnp::BoxInt16, utils::capnp::BoxInt64>(
${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
key_builder.setValue(entry.first);
auto value_builder = builder->initValue();
value_builder.setValue(entry.second);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<utils::capnp::BoxInt16, utils::capnp::BoxInt64>(
&${member}, ${reader},
[](const auto &reader) {
int16_t key = reader.getKey().getValue();
int64_t value = reader.getValue().getValue();
return std::make_pair(key, value);
});
cpp<#))))
(:response ()))
(lcp:define-rpc remove-bfs-subcursor
(:request ((member :int64_t)))
(:response ()))
(lcp:define-rpc expand-level
(:request ((member :int64_t)))
(:response ((member :bool))))
(lcp:define-rpc subcursor-pull
(:request ((member :int64_t)))
(:response ((vertex "std::experimental::optional<SerializedVertex>" :initarg :move
:capnp-type "Utils.Optional(SerializedGraphElement)"
:capnp-save (lcp:capnp-save-optional "capnp::SerializedGraphElement" "SerializedVertex")
:capnp-load (lcp:capnp-load-optional "capnp::SerializedGraphElement" "SerializedVertex")))))
(lcp:define-rpc set-source
(:request
((subcursor-id :int64_t)
(source "storage::VertexAddress")))
(:response ()))
(lcp:define-rpc expand-to-remote-vertex
(:request
((subcursor-id :int64_t)
(edge "storage::EdgeAddress")
(vertex "storage::VertexAddress")))
(:response ((member :bool))))
(lcp:define-rpc reconstruct-path
(:request
((subcursor-id :int64_t)
(vertex "std::experimental::optional<storage::VertexAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::VertexAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::VertexAddress"))
(edge "std::experimental::optional<storage::EdgeAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::EdgeAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::EdgeAddress")))
(:public
#>cpp
using Capnp = capnp::ReconstructPathReq;
static const communication::rpc::MessageType TypeInfo;
ReconstructPathReq() {}
ReconstructPathReq(int64_t subcursor_id, storage::VertexAddress vertex)
: subcursor_id(subcursor_id),
vertex(vertex),
edge(std::experimental::nullopt) {}
ReconstructPathReq(int64_t subcursor_id, storage::EdgeAddress edge)
: subcursor_id(subcursor_id),
vertex(std::experimental::nullopt),
edge(edge) {}
cpp<#))
(:response
((subcursor-id :int64_t ;; TODO(mtomic): Unused?
:save-fun "" :load-fun "" :capnp-save :dont-save)
(edges "std::vector<SerializedEdge>" :capnp-type "List(SerializedGraphElement)"
:capnp-save (lcp:capnp-save-vector "capnp::SerializedGraphElement" "SerializedEdge")
:capnp-load (lcp:capnp-load-vector "capnp::SerializedGraphElement" "SerializedEdge"))
(next-vertex "std::experimental::optional<storage::VertexAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::VertexAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::VertexAddress"))
(next-edge "std::experimental::optional<storage::EdgeAddress>"
:capnp-save (lcp:capnp-save-optional "storage::capnp::Address" "storage::EdgeAddress")
:capnp-load (lcp:capnp-load-optional "storage::capnp::Address" "storage::EdgeAddress")))
(:public
#>cpp
using Capnp = capnp::ReconstructPathRes;
static const communication::rpc::MessageType TypeInfo;
ReconstructPathRes() {}
ReconstructPathRes(
const std::vector<EdgeAccessor> &edge_accessors,
std::experimental::optional<storage::VertexAddress> next_vertex,
std::experimental::optional<storage::EdgeAddress> next_edge)
: next_vertex(std::move(next_vertex)), next_edge(std::move(next_edge)) {
CHECK(!static_cast<bool>(next_vertex) || !static_cast<bool>(next_edge))
<< "At most one of `next_vertex` and `next_edge` should be set";
for (const auto &edge : edge_accessors) {
edges.emplace_back(edge);
}
}
cpp<#)))
(lcp:define-rpc prepare-for-expand
(:request
((subcursor-id :int64_t)
(clear :bool)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,126 +0,0 @@
/// @file
#pragma once
#include <map>
#include "communication/rpc/server.hpp"
#include "distributed/bfs_rpc_messages.hpp"
#include "distributed/bfs_subcursor.hpp"
namespace distributed {
/// Along with `BfsRpcClients`, this class is used to expose `BfsSubcursor`
/// interface over the network so that subcursors can communicate during the
/// traversal. It is just a thin wrapper forwarding RPC calls to subcursors in
/// subcursor storage.
class BfsRpcServer {
public:
BfsRpcServer(database::GraphDb *db, communication::rpc::Server *server,
BfsSubcursorStorage *subcursor_storage)
: db_(db), server_(server), subcursor_storage_(subcursor_storage) {
server_->Register<CreateBfsSubcursorRpc>(
[this](const auto &req_reader, auto *res_builder) {
CreateBfsSubcursorReq req;
req.Load(req_reader);
CreateBfsSubcursorRes res(subcursor_storage_->Create(
req.tx_id, req.direction, req.edge_types, req.graph_view));
res.Save(res_builder);
});
server_->Register<RegisterSubcursorsRpc>(
[this](const auto &req_reader, auto *res_builder) {
RegisterSubcursorsReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_ids.at(db_->WorkerId()))
->RegisterSubcursors(req.subcursor_ids);
RegisterSubcursorsRes res;
res.Save(res_builder);
});
server_->Register<RemoveBfsSubcursorRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveBfsSubcursorReq req;
req.Load(req_reader);
subcursor_storage_->Erase(req.member);
RemoveBfsSubcursorRes res;
res.Save(res_builder);
});
server_->Register<SetSourceRpc>(
[this](const auto &req_reader, auto *res_builder) {
SetSourceReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_id)->SetSource(req.source);
SetSourceRes res;
res.Save(res_builder);
});
server_->Register<ExpandLevelRpc>([this](const auto &req_reader,
auto *res_builder) {
ExpandLevelReq req;
req.Load(req_reader);
ExpandLevelRes res(subcursor_storage_->Get(req.member)->ExpandLevel());
res.Save(res_builder);
});
server_->Register<SubcursorPullRpc>(
[this](const auto &req_reader, auto *res_builder) {
SubcursorPullReq req;
req.Load(req_reader);
auto vertex = subcursor_storage_->Get(req.member)->Pull();
if (!vertex) {
SubcursorPullRes res;
res.Save(res_builder);
return;
}
SubcursorPullRes res(*vertex);
res.Save(res_builder);
});
server_->Register<ExpandToRemoteVertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
ExpandToRemoteVertexReq req;
req.Load(req_reader);
ExpandToRemoteVertexRes res(
subcursor_storage_->Get(req.subcursor_id)
->ExpandToLocalVertex(req.edge, req.vertex));
res.Save(res_builder);
});
server_->Register<ReconstructPathRpc>([this](const auto &req_reader,
auto *res_builder) {
ReconstructPathReq req;
req.Load(req_reader);
auto subcursor = subcursor_storage_->Get(req.subcursor_id);
PathSegment result;
if (req.vertex) {
result = subcursor->ReconstructPath(*req.vertex);
} else if (req.edge) {
result = subcursor->ReconstructPath(*req.edge);
} else {
LOG(FATAL) << "`edge` or `vertex` should be set in ReconstructPathReq";
}
ReconstructPathRes res(result.edges, result.next_vertex,
result.next_edge);
res.Save(res_builder);
});
server_->Register<PrepareForExpandRpc>([this](const auto &req_reader,
auto *res_builder) {
PrepareForExpandReq req;
req.Load(req_reader);
subcursor_storage_->Get(req.subcursor_id)->PrepareForExpand(req.clear);
PrepareForExpandRes res;
res.Save(res_builder);
});
}
private:
database::GraphDb *db_;
communication::rpc::Server *server_;
BfsSubcursorStorage *subcursor_storage_;
};
} // namespace distributed

View File

@ -1,196 +0,0 @@
#include <unordered_map>
#include "distributed/bfs_rpc_clients.hpp"
#include "query/plan/operator.hpp"
#include "storage/address_types.hpp"
#include "storage/vertex_accessor.hpp"
#include "bfs_subcursor.hpp"
namespace distributed {
using query::TypedValue;
ExpandBfsSubcursor::ExpandBfsSubcursor(
database::GraphDb *db, tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types, query::GraphView graph_view)
: dba_(*db, tx_id),
direction_(direction),
edge_types_(std::move(edge_types)),
graph_view_(graph_view) {
Reset();
}
void ExpandBfsSubcursor::Reset() {
pull_index_ = 0;
processed_.clear();
to_visit_current_.clear();
to_visit_next_.clear();
}
void ExpandBfsSubcursor::SetSource(storage::VertexAddress source_address) {
Reset();
auto source = VertexAccessor(source_address, dba_);
SwitchAccessor(source, graph_view_);
processed_.emplace(source, std::experimental::nullopt);
ExpandFromVertex(source);
}
void ExpandBfsSubcursor::PrepareForExpand(bool clear) {
if (clear) {
Reset();
} else {
std::swap(to_visit_current_, to_visit_next_);
to_visit_next_.clear();
}
}
bool ExpandBfsSubcursor::ExpandLevel() {
bool expanded = false;
for (const auto &expansion : to_visit_current_) {
expanded |= ExpandFromVertex(expansion.second);
}
pull_index_ = 0;
return expanded;
}
std::experimental::optional<VertexAccessor> ExpandBfsSubcursor::Pull() {
return pull_index_ < to_visit_next_.size()
? std::experimental::make_optional(
to_visit_next_[pull_index_++].second)
: std::experimental::nullopt;
}
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
VertexAccessor vertex) {
CHECK(vertex.address().is_local())
<< "ExpandToLocalVertex called with remote vertex";
edge = dba_.db().storage().LocalizedAddressIfPossible(edge);
SwitchAccessor(vertex, graph_view_);
std::lock_guard<std::mutex> lock(mutex_);
auto got = processed_.emplace(vertex, edge);
if (got.second) {
to_visit_next_.emplace_back(edge, vertex);
}
return got.second;
}
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
storage::VertexAddress vertex) {
auto vertex_accessor = VertexAccessor(vertex, dba_);
return ExpandToLocalVertex(edge, VertexAccessor(vertex, dba_));
}
PathSegment ExpandBfsSubcursor::ReconstructPath(
storage::EdgeAddress edge_address) {
EdgeAccessor edge(edge_address, dba_);
CHECK(edge.address().is_local()) << "ReconstructPath called with remote edge";
DCHECK(edge.from_addr().is_local()) << "`from` vertex should always be local";
DCHECK(!edge.to_addr().is_local()) << "`to` vertex should be remote when "
"calling ReconstructPath with edge";
PathSegment result;
result.edges.emplace_back(edge);
ReconstructPathHelper(edge.from(), &result);
return result;
}
PathSegment ExpandBfsSubcursor::ReconstructPath(
storage::VertexAddress vertex_addr) {
VertexAccessor vertex(vertex_addr, dba_);
CHECK(vertex.address().is_local())
<< "ReconstructPath called with remote vertex";
PathSegment result;
ReconstructPathHelper(vertex, &result);
return result;
}
void ExpandBfsSubcursor::ReconstructPathHelper(VertexAccessor vertex,
PathSegment *result) {
auto it = processed_.find(vertex);
CHECK(it != processed_.end())
<< "ReconstructPath called with unvisited vertex";
auto in_edge_address = it->second;
while (in_edge_address) {
// In-edge is stored on another worker. It should be returned to master from
// that worker, and path reconstruction should be continued there.
if (in_edge_address->is_remote()) {
result->next_edge = in_edge_address;
break;
}
result->edges.emplace_back(*in_edge_address, dba_);
auto &in_edge = result->edges.back();
auto next_vertex_address =
in_edge.from_is(vertex) ? in_edge.to_addr() : in_edge.from_addr();
// We own the in-edge, but the next vertex on the path is stored on another
// worker.
if (next_vertex_address.is_remote()) {
result->next_vertex = next_vertex_address;
break;
}
vertex = VertexAccessor(next_vertex_address, dba_);
in_edge_address = processed_[vertex];
}
}
bool ExpandBfsSubcursor::ExpandToVertex(EdgeAccessor edge,
VertexAccessor vertex) {
// TODO(mtomic): lambda filtering in distributed
return vertex.is_local()
? ExpandToLocalVertex(edge.address(), vertex)
: dba_.db().bfs_subcursor_clients().ExpandToRemoteVertex(
subcursor_ids_, edge, vertex);
}
bool ExpandBfsSubcursor::ExpandFromVertex(VertexAccessor vertex) {
bool expanded = false;
if (direction_ != query::EdgeAtom::Direction::IN) {
for (const EdgeAccessor &edge : vertex.out(&edge_types_))
expanded |= ExpandToVertex(edge, edge.to());
}
if (direction_ != query::EdgeAtom::Direction::OUT) {
for (const EdgeAccessor &edge : vertex.in(&edge_types_))
expanded |= ExpandToVertex(edge, edge.from());
}
return expanded;
}
BfsSubcursorStorage::BfsSubcursorStorage(database::GraphDb *db) : db_(db) {}
int64_t BfsSubcursorStorage::Create(tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view) {
std::lock_guard<std::mutex> lock(mutex_);
int64_t id = next_subcursor_id_++;
auto got = storage_.emplace(
id, std::make_unique<ExpandBfsSubcursor>(
db_, tx_id, direction, std::move(edge_types), graph_view));
CHECK(got.second) << "Subcursor with ID " << id << " already exists";
return id;
}
void BfsSubcursorStorage::Erase(int64_t subcursor_id) {
std::lock_guard<std::mutex> lock(mutex_);
auto removed = storage_.erase(subcursor_id);
CHECK(removed == 1) << "Subcursor with ID " << subcursor_id << " not found";
}
ExpandBfsSubcursor *BfsSubcursorStorage::Get(int64_t subcursor_id) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = storage_.find(subcursor_id);
CHECK(it != storage_.end())
<< "Subcursor with ID " << subcursor_id << " not found";
return it->second.get();
}
} // namespace distributed

View File

@ -1,141 +0,0 @@
/// @file
#pragma once
#include <map>
#include <memory>
#include <unordered_map>
#include "glog/logging.h"
#include "query/plan/operator.hpp"
namespace database {
class GraphDb;
}
namespace distributed {
/// Path from BFS source to a vertex might span multiple workers. This struct
/// stores information describing segment of a path stored on a worker and
/// information necessary to continue path reconstruction on another worker.
struct PathSegment {
std::vector<EdgeAccessor> edges;
std::experimental::optional<storage::VertexAddress> next_vertex;
std::experimental::optional<storage::EdgeAddress> next_edge;
};
/// Class storing the worker-local state of distributed BFS traversal. For each
/// traversal (uniquely identified by cursor id), there is one instance of this
/// class per worker, and those instances communicate via RPC calls.
class ExpandBfsSubcursor {
public:
ExpandBfsSubcursor(database::GraphDb *db, tx::TransactionId tx_id,
query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view);
// Stores subcursor ids of other workers.
void RegisterSubcursors(std::unordered_map<int16_t, int64_t> subcursor_ids) {
subcursor_ids_ = std::move(subcursor_ids);
}
/// Sets the source to be used for new expansion.
void SetSource(storage::VertexAddress source_address);
/// Notifies the subcursor that a new expansion should take place.
/// `to_visit_next_` must be moved to `to_visit_current_` synchronously for
/// all subcursors participating in expansion to avoid race condition with
/// `ExpandToRemoteVertex` RPC requests. Also used before setting new source
/// with `clear` set to true, to avoid a race condition similar to one
/// described above.
///
/// @param clear if set to true, `Reset` will be called instead of moving
/// `to_visit_next_`
void PrepareForExpand(bool clear);
/// Expands the BFS frontier once. Returns true if there was a successful
/// expansion.
bool ExpandLevel();
/// Pulls the next vertex in the current BFS frontier, if there is one.
std::experimental::optional<VertexAccessor> Pull();
/// Expands to a local vertex, if it wasn't already visited. Returns true if
/// expansion was successful.
bool ExpandToLocalVertex(storage::EdgeAddress edge, VertexAccessor vertex);
bool ExpandToLocalVertex(storage::EdgeAddress edge,
storage::VertexAddress vertex);
/// Reconstruct the part of path ending with given edge, stored on this
/// worker.
PathSegment ReconstructPath(storage::EdgeAddress edge_address);
/// Reconstruct the part of path to given vertex stored on this worker.
PathSegment ReconstructPath(storage::VertexAddress vertex_addr);
private:
/// Used to reset subcursor state before starting expansion from new source.
void Reset();
/// Expands to a local or remote vertex, returns true if expansion was
/// successful.
bool ExpandToVertex(EdgeAccessor edge, VertexAccessor vertex);
/// Tries to expand to all vertices connected to given one and returns true if
/// any of them was successful.
bool ExpandFromVertex(VertexAccessor vertex);
/// Helper for path reconstruction doing the actual work.
void ReconstructPathHelper(VertexAccessor vertex, PathSegment *result);
database::GraphDbAccessor dba_;
/// IDs of subcursors on other workers, used when sending RPCs.
std::unordered_map<int16_t, int64_t> subcursor_ids_;
query::EdgeAtom::Direction direction_;
std::vector<storage::EdgeType> edge_types_;
query::GraphView graph_view_;
/// Mutex protecting `to_visit_next_` and `processed_`, because there is a
/// race between expansions done locally using `ExpandToLocalVertex` and
/// incoming `ExpandToRemoteVertex` RPCs.
std::mutex mutex_;
/// List of visited vertices and their incoming edges. Local address is stored
/// for local edges, global address for remote edges.
std::unordered_map<VertexAccessor,
std::experimental::optional<storage::EdgeAddress>>
processed_;
/// List of vertices at the current expansion level.
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>>
to_visit_current_;
/// List of unvisited vertices reachable from current expansion level.
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>> to_visit_next_;
/// Index of the vertex from `to_visit_next_` to return on next pull.
size_t pull_index_;
};
/// Thread-safe storage for BFS subcursors.
class BfsSubcursorStorage {
public:
explicit BfsSubcursorStorage(database::GraphDb *db);
int64_t Create(tx::TransactionId tx_id, query::EdgeAtom::Direction direction,
std::vector<storage::EdgeType> edge_types,
query::GraphView graph_view);
void Erase(int64_t subcursor_id);
ExpandBfsSubcursor *Get(int64_t subcursor_id);
private:
database::GraphDb *db_;
std::mutex mutex_;
std::map<int64_t, std::unique_ptr<ExpandBfsSubcursor>> storage_;
int64_t next_subcursor_id_{0};
};
} // namespace distributed

View File

@ -1,99 +0,0 @@
#include "glog/logging.h"
#include "database/storage.hpp"
#include "distributed/cache.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <typename TRecord>
TRecord *Cache<TRecord>::FindNew(gid::Gid gid) {
std::lock_guard<std::mutex> guard{lock_};
auto found = cache_.find(gid);
DCHECK(found != cache_.end())
<< "FindNew for uninitialized remote Vertex/Edge";
auto &pair = found->second;
if (!pair.second) {
pair.second = std::unique_ptr<TRecord>(pair.first->CloneData());
}
return pair.second.get();
}
template <typename TRecord>
void Cache<TRecord>::FindSetOldNew(tx::TransactionId tx_id, int worker_id,
gid::Gid gid, TRecord *&old_record,
TRecord *&new_record) {
{
std::lock_guard<std::mutex> guard(lock_);
auto found = cache_.find(gid);
if (found != cache_.end()) {
old_record = found->second.first.get();
new_record = found->second.second.get();
return;
}
}
auto remote = data_clients_.RemoteElement<TRecord>(worker_id, tx_id, gid);
LocalizeAddresses(*remote);
// This logic is a bit strange because we need to make sure that someone
// else didn't get a response and updated the cache before we did and we
// need a lock for that, but we also need to check if we can now return
// that result - otherwise we could get incosistent results for remote
// FindSetOldNew
std::lock_guard<std::mutex> guard(lock_);
auto it_pair = cache_.emplace(
gid, std::make_pair<rec_uptr, rec_uptr>(std::move(remote), nullptr));
old_record = it_pair.first->second.first.get();
new_record = it_pair.first->second.second.get();
}
template <typename TRecord>
void Cache<TRecord>::emplace(gid::Gid gid, rec_uptr old_record,
rec_uptr new_record) {
if (old_record) LocalizeAddresses(*old_record);
if (new_record) LocalizeAddresses(*new_record);
std::lock_guard<std::mutex> guard{lock_};
// We can't replace existing data because some accessors might be using
// it.
// TODO - consider if it's necessary and OK to copy just the data content.
auto found = cache_.find(gid);
if (found != cache_.end())
return;
else
cache_[gid] = std::make_pair(std::move(old_record), std::move(new_record));
}
template <typename TRecord>
void Cache<TRecord>::ClearCache() {
std::lock_guard<std::mutex> guard{lock_};
cache_.clear();
}
template <>
void Cache<Vertex>::LocalizeAddresses(Vertex &vertex) {
auto localize_edges = [this](auto &edges) {
for (auto &element : edges) {
element.vertex = storage_.LocalizedAddressIfPossible(element.vertex);
element.edge = storage_.LocalizedAddressIfPossible(element.edge);
}
};
localize_edges(vertex.in_.storage());
localize_edges(vertex.out_.storage());
}
template <>
void Cache<Edge>::LocalizeAddresses(Edge &edge) {
edge.from_ = storage_.LocalizedAddressIfPossible(edge.from_);
edge.to_ = storage_.LocalizedAddressIfPossible(edge.to_);
}
template class Cache<Vertex>;
template class Cache<Edge>;
} // namespace distributed

View File

@ -1,62 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "distributed/data_rpc_clients.hpp"
#include "storage/gid.hpp"
namespace database {
class Storage;
}
namespace distributed {
/**
* Used for caching Vertices and Edges that are stored on another worker in a
* distributed system. Maps global IDs to (old, new) Vertex/Edge pointer
* pairs. It is possible that either "old" or "new" are nullptrs, but at
* least one must be not-null. The Cache is the owner of TRecord
* objects it points to.
*
* @tparam TRecord - Edge or Vertex
*/
template <typename TRecord>
class Cache {
using rec_uptr = std::unique_ptr<TRecord>;
public:
Cache(database::Storage &storage, distributed::DataRpcClients &data_clients)
: storage_(storage), data_clients_(data_clients) {}
/// Returns the new data for the given ID. Creates it (as copy of old) if
/// necessary.
TRecord *FindNew(gid::Gid gid);
/// For the Vertex/Edge with the given global ID, looks for the data visible
/// from the given transaction's ID and command ID, and caches it. Sets the
/// given pointers to point to the fetched data. Analogue to
/// mvcc::VersionList::find_set_old_new.
void FindSetOldNew(tx::TransactionId tx_id, int worker_id, gid::Gid gid,
TRecord *&old_record, TRecord *&new_record);
/// Sets the given records as (new, old) data for the given gid.
void emplace(gid::Gid gid, rec_uptr old_record, rec_uptr new_record);
/// Removes all the data from the cache.
void ClearCache();
private:
database::Storage &storage_;
std::mutex lock_;
distributed::DataRpcClients &data_clients_;
// TODO it'd be better if we had VertexData and EdgeData in here, as opposed
// to Vertex and Edge.
std::unordered_map<gid::Gid, std::pair<rec_uptr, rec_uptr>> cache_;
// Localizes all the addresses in the record.
void LocalizeAddresses(TRecord &record);
};
} // namespace distributed

View File

@ -1,42 +0,0 @@
#include "communication/rpc/client_pool.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryMaster::ClusterDiscoveryMaster(
Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients)
: server_(server),
coordination_(coordination),
rpc_worker_clients_(rpc_worker_clients) {
server_.Register<RegisterWorkerRpc>([this](const auto &req_reader,
auto *res_builder) {
RegisterWorkerReq req;
req.Load(req_reader);
bool registration_successful =
this->coordination_.RegisterWorker(req.desired_worker_id, req.endpoint);
if (registration_successful) {
rpc_worker_clients_.ExecuteOnWorkers<void>(
0, [req](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<ClusterDiscoveryRpc>(
req.desired_worker_id, req.endpoint);
CHECK(result) << "ClusterDiscoveryRpc failed";
});
}
RegisterWorkerRes res(registration_successful,
this->coordination_.RecoveryInfo(),
this->coordination_.GetWorkers());
res.Save(res_builder);
});
server_.Register<NotifyWorkerRecoveredRpc>(
[this](const auto &req_reader, auto *res_builder) {
this->coordination_.WorkerRecovered(req_reader.getMember());
});
}
} // namespace distributed

View File

@ -1,27 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/rpc_worker_clients.hpp"
namespace distributed {
using Server = communication::rpc::Server;
/** Handle cluster discovery on master.
*
* Cluster discovery on master handles worker registration and broadcasts new
* worker information to already registered workers, and already registered
* worker information to the new worker.
*/
class ClusterDiscoveryMaster final {
public:
ClusterDiscoveryMaster(Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients);
private:
Server &server_;
MasterCoordination &coordination_;
RpcWorkerClients &rpc_worker_clients_;
};
} // namespace distributed

View File

@ -1,41 +0,0 @@
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryWorker::ClusterDiscoveryWorker(
Server &server, WorkerCoordination &coordination,
communication::rpc::ClientPool &client_pool)
: server_(server), coordination_(coordination), client_pool_(client_pool) {
server_.Register<ClusterDiscoveryRpc>(
[this](const auto &req_reader, auto *res_builder) {
ClusterDiscoveryReq req;
req.Load(req_reader);
this->coordination_.RegisterWorker(req.worker_id, req.endpoint);
});
}
void ClusterDiscoveryWorker::RegisterWorker(int worker_id) {
auto result =
client_pool_.Call<RegisterWorkerRpc>(worker_id, server_.endpoint());
CHECK(result) << "RegisterWorkerRpc failed";
CHECK(result->registration_successful)
<< "Unable to assign requested ID (" << worker_id << ") to worker!";
worker_id_ = worker_id;
for (auto &kv : result->workers) {
coordination_.RegisterWorker(kv.first, kv.second);
}
recovery_info_ = result->recovery_info;
}
void ClusterDiscoveryWorker::NotifyWorkerRecovered() {
CHECK(worker_id_ >= 0)
<< "Workers id is not yet assigned, preform registration before "
"notifying that the recovery finished";
auto result = client_pool_.Call<NotifyWorkerRecoveredRpc>(worker_id_);
CHECK(result) << "NotifyWorkerRecoveredRpc failed";
}
} // namespace distributed

View File

@ -1,50 +0,0 @@
#pragma once
#include <experimental/optional>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/server.hpp"
#include "distributed/coordination_worker.hpp"
#include "durability/recovery.hpp"
namespace distributed {
using Server = communication::rpc::Server;
using ClientPool = communication::rpc::ClientPool;
/** Handle cluster discovery on worker.
*
* Cluster discovery on worker handles worker registration by sending an rpc
* request to master and processes received rpc response with other worker
* information.
*/
class ClusterDiscoveryWorker final {
public:
ClusterDiscoveryWorker(Server &server, WorkerCoordination &coordination,
ClientPool &client_pool);
/**
* Registers a worker with the master.
*
* @param worker_id - Desired ID. If master can't assign the desired worker
* id, worker will exit.
*/
void RegisterWorker(int worker_id);
/**
* Notifies the master that the worker finished recovering. Assumes that the
* worker was already registered with master.
*/
void NotifyWorkerRecovered();
/** Returns the recovery info. Valid only after registration. */
auto recovery_info() const { return recovery_info_; }
private:
int worker_id_{-1};
Server &server_;
WorkerCoordination &coordination_;
communication::rpc::ClientPool &client_pool_;
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,34 +0,0 @@
#include "glog/logging.h"
#include "distributed/coordination.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
Coordination::Coordination(const Endpoint &master_endpoint) {
// The master is always worker 0.
workers_.emplace(0, master_endpoint);
}
Endpoint Coordination::GetEndpoint(int worker_id) {
auto found = workers_.find(worker_id);
CHECK(found != workers_.end()) << "No endpoint registered for worker id: "
<< worker_id;
return found->second;
}
std::vector<int> Coordination::GetWorkerIds() const {
std::vector<int> worker_ids;
for (auto worker : workers_) worker_ids.push_back(worker.first);
return worker_ids;
}
void Coordination::AddWorker(int worker_id, Endpoint endpoint) {
workers_.emplace(worker_id, endpoint);
}
std::unordered_map<int, Endpoint> Coordination::GetWorkers() {
return workers_;
}
} // namespace distributed

View File

@ -1,36 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "io/network/endpoint.hpp"
namespace distributed {
/** Coordination base class. This class is not thread safe. */
class Coordination {
public:
explicit Coordination(const io::network::Endpoint &master_endpoint);
/** Gets the endpoint for the given worker ID from the master. */
io::network::Endpoint GetEndpoint(int worker_id);
/** Returns all workers id, this includes master id(0). */
std::vector<int> GetWorkerIds() const;
/** Gets the mapping of worker id to worker endpoint including master (worker
* id = 0).
*/
std::unordered_map<int, io::network::Endpoint> GetWorkers();
protected:
~Coordination() {}
/** Adds a worker to coordination. */
void AddWorker(int worker_id, io::network::Endpoint endpoint);
private:
std::unordered_map<int, io::network::Endpoint> workers_;
};
} // namespace distributed

View File

@ -1,92 +0,0 @@
#include <chrono>
#include <thread>
#include "glog/logging.h"
#include "communication/rpc/client.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_rpc_messages.hpp"
#include "io/network/utils.hpp"
namespace distributed {
MasterCoordination::MasterCoordination(const Endpoint &master_endpoint)
: Coordination(master_endpoint) {}
bool MasterCoordination::RegisterWorker(int desired_worker_id,
Endpoint endpoint) {
// Worker's can't register before the recovery phase on the master is done to
// ensure the whole cluster is in a consistent state.
while (true) {
{
std::lock_guard<std::mutex> guard(lock_);
if (recovery_done_) break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
// Check if the desired worker id already exists.
if (workers.find(desired_worker_id) != workers.end()) {
LOG(WARNING) << "Unable to assign requested ID (" << desired_worker_id
<< ") to worker at: " << endpoint;
// If the desired worker ID is already assigned, return -1 and don't add
// that worker to master coordination.
return false;
}
AddWorker(desired_worker_id, endpoint);
return true;
}
void MasterCoordination::WorkerRecovered(int worker_id) {
CHECK(recovered_workers_.insert(worker_id).second)
<< "Worker already notified about finishing recovery";
}
Endpoint MasterCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
MasterCoordination::~MasterCoordination() {
using namespace std::chrono_literals;
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
communication::rpc::Client client(kv.second);
auto result = client.Call<StopWorkerRpc>();
CHECK(result) << "StopWorkerRpc failed for worker: " << kv.first;
}
// Make sure all workers have died.
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
while (io::network::CanEstablishConnection(kv.second))
std::this_thread::sleep_for(0.5s);
}
}
void MasterCoordination::SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info) {
std::lock_guard<std::mutex> guard(lock_);
recovery_done_ = true;
recovery_info_ = info;
}
int MasterCoordination::CountRecoveredWorkers() const {
return recovered_workers_.size();
}
std::experimental::optional<durability::RecoveryInfo>
MasterCoordination::RecoveryInfo() const {
std::lock_guard<std::mutex> guard(lock_);
CHECK(recovery_done_) << "RecoveryInfo requested before it's available";
return recovery_info_;
}
} // namespace distributed

View File

@ -1,61 +0,0 @@
#pragma once
#include <experimental/optional>
#include <mutex>
#include <set>
#include <unordered_map>
#include "distributed/coordination.hpp"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Master side. */
class MasterCoordination final : public Coordination {
public:
explicit MasterCoordination(const Endpoint &master_endpoint);
/** Shuts down all the workers and this master server. */
~MasterCoordination();
/** Registers a new worker with this master coordination.
*
* @param desired_worker_id - The ID the worker would like to have.
* @return True if the desired ID for the worker is available, or false
* if the desired ID is already taken.
*/
bool RegisterWorker(int desired_worker_id, Endpoint endpoint);
/*
* Worker `worker_id` finished with recovering, adds it to the set of
* recovered workers.
*/
void WorkerRecovered(int worker_id);
Endpoint GetEndpoint(int worker_id);
/// Sets the recovery info. nullopt indicates nothing was recovered.
void SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info);
std::experimental::optional<durability::RecoveryInfo> RecoveryInfo() const;
int CountRecoveredWorkers() const;
private:
// Most master functions aren't thread-safe.
mutable std::mutex lock_;
/// Durabiliry recovery info.
/// Indicates if the recovery phase is done.
bool recovery_done_{false};
/// Set of workers that finished sucesfully recovering
std::set<int> recovered_workers_;
/// If nullopt nothing was recovered.
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,72 +0,0 @@
#>cpp
#pragma once
#include <experimental/optional>
#include <unordered_map>
#include "communication/rpc/messages.hpp"
#include "distributed/coordination_rpc_messages.capnp.h"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'dur "/durability/recovery.capnp")
(lcp:capnp-import 'io "/io/network/endpoint.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:define-rpc register-worker
(:request
((desired-worker-id :int16_t)
(endpoint "io::network::Endpoint" :capnp-type "Io.Endpoint")))
(:response
((registration-successful :bool)
(recovery-info "std::experimental::optional<durability::RecoveryInfo>"
:capnp-type "Utils.Optional(Dur.RecoveryInfo)"
:capnp-save (lcp:capnp-save-optional "durability::capnp::RecoveryInfo"
"durability::RecoveryInfo")
:capnp-load (lcp:capnp-load-optional "durability::capnp::RecoveryInfo"
"durability::RecoveryInfo"))
(workers "std::unordered_map<int, io::network::Endpoint>"
:capnp-type "Utils.Map(Utils.BoxInt16, Io.Endpoint)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<utils::capnp::BoxInt16, io::network::capnp::Endpoint>(${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
key_builder.setValue(entry.first);
auto value_builder = builder->initValue();
entry.second.Save(&value_builder);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<utils::capnp::BoxInt16, io::network::capnp::Endpoint>(&${member}, ${reader},
[](const auto &reader) {
io::network::Endpoint value;
value.Load(reader.getValue());
return std::make_pair(reader.getKey().getValue(), value);
});
cpp<#)))))
(lcp:define-rpc cluster-discovery
(:request
((worker-id :int16_t)
(endpoint "io::network::Endpoint" :capnp-type "Io.Endpoint")))
(:response ()))
(lcp:define-rpc stop-worker
(:request ())
(:response ()))
(lcp:define-rpc notify-worker-recovered
(:request ((member :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,46 +0,0 @@
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "glog/logging.h"
#include "distributed/coordination_rpc_messages.hpp"
#include "distributed/coordination_worker.hpp"
namespace distributed {
using namespace std::literals::chrono_literals;
WorkerCoordination::WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint)
: Coordination(master_endpoint), server_(server) {}
void WorkerCoordination::RegisterWorker(int worker_id, Endpoint endpoint) {
std::lock_guard<std::mutex> guard(lock_);
AddWorker(worker_id, endpoint);
}
void WorkerCoordination::WaitForShutdown() {
using namespace std::chrono_literals;
std::mutex mutex;
std::condition_variable cv;
bool shutdown = false;
server_.Register<StopWorkerRpc>([&](const auto &req_reader, auto *res_builder) {
std::unique_lock<std::mutex> lk(mutex);
shutdown = true;
lk.unlock();
cv.notify_one();
});
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&shutdown] { return shutdown; });
}
io::network::Endpoint WorkerCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
} // namespace distributed

View File

@ -1,33 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "communication/rpc/server.hpp"
#include "distributed/coordination.hpp"
namespace distributed {
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Worker side. */
class WorkerCoordination final : public Coordination {
using Endpoint = io::network::Endpoint;
public:
WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint);
/** Registers the worker with the given endpoint. */
void RegisterWorker(int worker_id, Endpoint endpoint);
/** Starts listening for a remote shutdown command (issued by the master).
* Blocks the calling thread until that has finished. */
void WaitForShutdown();
Endpoint GetEndpoint(int worker_id);
private:
communication::rpc::Server &server_;
mutable std::mutex lock_;
};
} // namespace distributed

View File

@ -1,54 +0,0 @@
#include "database/storage.hpp"
#include "distributed/data_manager.hpp"
namespace distributed {
template <typename TRecord>
Cache<TRecord> &DataManager::GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found != access.end()) return found->second;
return access
.emplace(
tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_.storage()), std::ref(data_clients_)))
.first->second;
}
template <>
Cache<Vertex> &DataManager::Elements<Vertex>(tx::TransactionId tx_id) {
return GetCache(vertices_caches_, tx_id);
}
template <>
Cache<Edge> &DataManager::Elements<Edge>(tx::TransactionId tx_id) {
return GetCache(edges_caches_, tx_id);
}
DataManager::DataManager(database::GraphDb &db,
distributed::DataRpcClients &data_clients)
: db_(db), data_clients_(data_clients) {}
void DataManager::ClearCacheForSingleTransaction(tx::TransactionId tx_id) {
Elements<Vertex>(tx_id).ClearCache();
Elements<Edge>(tx_id).ClearCache();
}
void DataManager::ClearTransactionalCache(tx::TransactionId oldest_active) {
auto vertex_access = vertices_caches_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edges_caches_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
} // namespace distributed

View File

@ -1,45 +0,0 @@
#pragma once
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "transactions/type.hpp"
class Vertex;
class Edge;
namespace distributed {
/// Handles remote data caches for edges and vertices, per transaction.
class DataManager {
template <typename TRecord>
using CacheT = ConcurrentMap<tx::TransactionId, Cache<TRecord>>;
// Helper, gets or inserts a data cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id);
public:
DataManager(database::GraphDb &db, distributed::DataRpcClients &data_clients);
/// Gets or creates the remote vertex/edge cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &Elements(tx::TransactionId tx_id);
/// Removes all the caches for a single transaction.
void ClearCacheForSingleTransaction(tx::TransactionId tx_id);
/// Clears the cache of local transactions that have expired. The signature of
/// this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
DataRpcClients &data_clients_;
CacheT<Vertex> vertices_caches_;
CacheT<Edge> edges_caches_;
};
} // namespace distributed

View File

@ -1,49 +0,0 @@
#include <unordered_map>
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_messages.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <>
std::unique_ptr<Edge> DataRpcClients::RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<EdgeRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "EdgeRpc failed";
return std::move(response->edge_output);
}
template <>
std::unique_ptr<Vertex> DataRpcClients::RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<VertexRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "VertexRpc failed";
return std::move(response->vertex_output);
}
std::unordered_map<int, int64_t> DataRpcClients::VertexCounts(
tx::TransactionId tx_id) {
auto future_results = clients_.ExecuteOnWorkers<std::pair<int, int64_t>>(
-1, [tx_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto response = client_pool.Call<VertexCountRpc>(tx_id);
CHECK(response) << "VertexCountRpc failed";
return std::make_pair(worker_id, response->member);
});
std::unordered_map<int, int64_t> results;
for (auto &result : future_results) {
auto result_pair = result.get();
int worker = result_pair.first;
int vertex_count = result_pair.second;
results[worker] = vertex_count;
}
return results;
}
} // namespace distributed

View File

@ -1,31 +0,0 @@
#pragma once
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides access to other worker's data.
class DataRpcClients {
public:
DataRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Returns a remote worker's record (vertex/edge) data for the given params.
/// That worker must own the vertex/edge for the given id, and that vertex
/// must be visible in given transaction.
template <typename TRecord>
std::unique_ptr<TRecord> RemoteElement(int worker_id, tx::TransactionId tx_id,
gid::Gid gid);
/// Returns (worker_id, vertex_count) for each worker and the number of
/// vertices on it from the perspective of transaction `tx_id`.
std::unordered_map<int, int64_t> VertexCounts(tx::TransactionId tx_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,76 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/data_rpc_messages.capnp.h"
#include "distributed/serialization.hpp"
#include "storage/edge.hpp"
#include "storage/gid.hpp"
#include "storage/vertex.hpp"
#include "transactions/type.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-import 'dist "/distributed/serialization.capnp")
(lcp:define-struct tx-gid-pair ()
((tx-id "tx::TransactionId" :capnp-type "UInt64")
(gid "gid::Gid" :capnp-type "UInt64"))
(:serialize :capnp))
(lcp:define-rpc vertex
(:request ((member "TxGidPair")))
(:response
((vertex-input "const Vertex *"
:save-fun "SaveVertex(ar, *vertex_input, worker_id);" :load-fun ""
:capnp-type "Dist.Vertex"
:capnp-save
(lambda (builder member)
#>cpp
SaveVertex(*${member}, &${builder}, worker_id);
cpp<#)
:capnp-load
(lambda (reader member)
(declare (ignore member))
#>cpp
vertex_output = LoadVertex<const capnp::Vertex::Reader>(${reader});
cpp<#))
(worker-id :int64_t :save-fun "" :load-fun "" :capnp-save :dont-save)
(vertex-output "std::unique_ptr<Vertex>" :initarg nil
:save-fun "" :load-fun "vertex_output = LoadVertex(ar);"
:capnp-save :dont-save))))
(lcp:define-rpc edge
(:request ((member "TxGidPair")))
(:response
((edge-input "const Edge *"
:save-fun "SaveEdge(ar, *edge_input, worker_id);" :load-fun ""
:capnp-type "Dist.Edge"
:capnp-save
(lambda (builder member)
#>cpp
SaveEdge(*${member}, &${builder}, worker_id);
cpp<#)
:capnp-load
(lambda (reader member)
(declare (ignore member))
#>cpp
edge_output = LoadEdge<const capnp::Edge::Reader>(${reader});
cpp<#))
(worker-id :int64_t :save-fun "" :load-fun "" :capnp-save :dont-save)
(edge-output "std::unique_ptr<Edge>" :initarg nil
:save-fun "" :load-fun "edge_output = LoadEdge(ar);"
:capnp-save :dont-save))))
(lcp:define-rpc vertex-count
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ((member :int64_t))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,43 +0,0 @@
#include <memory>
#include "data_rpc_server.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/data_rpc_messages.hpp"
namespace distributed {
DataRpcServer::DataRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<VertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
database::GraphDbAccessor dba(db_, req_reader.getMember().getTxId());
auto vertex = dba.FindVertex(req_reader.getMember().getGid(), false);
CHECK(vertex.GetOld())
<< "Old record must exist when sending vertex by RPC";
VertexRes response(vertex.GetOld(), db_.WorkerId());
response.Save(res_builder);
});
rpc_server_.Register<EdgeRpc>([this](const auto &req_reader,
auto *res_builder) {
database::GraphDbAccessor dba(db_, req_reader.getMember().getTxId());
auto edge = dba.FindEdge(req_reader.getMember().getGid(), false);
CHECK(edge.GetOld()) << "Old record must exist when sending edge by RPC";
EdgeRes response(edge.GetOld(), db_.WorkerId());
response.Save(res_builder);
});
rpc_server_.Register<VertexCountRpc>(
[this](const auto &req_reader, auto *res_builder) {
VertexCountReq req;
req.Load(req_reader);
database::GraphDbAccessor dba(db_, req.member);
int64_t size = 0;
for (auto vertex : dba.Vertices(false)) ++size;
VertexCountRes res(size);
res.Save(res_builder);
});
}
} // namespace distributed

View File

@ -1,17 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
namespace distributed {
/// Serves this worker's data to others.
class DataRpcServer {
public:
DataRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,25 +0,0 @@
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "transactions/transaction.hpp"
#include "utils/future.hpp"
namespace distributed {
utils::Future<bool> DurabilityRpcClients::MakeSnapshot(tx::TransactionId tx) {
return utils::make_future(std::async(std::launch::async, [this, tx] {
auto futures = clients_.ExecuteOnWorkers<bool>(
0, [tx](int worker_id, communication::rpc::ClientPool &client_pool) {
auto res = client_pool.Call<MakeSnapshotRpc>(tx);
if (!res) return false;
return res->member;
});
bool created = true;
for (auto &future : futures) {
created &= future.get();
}
return created;
}));
}
} // namespace distributed

View File

@ -1,28 +0,0 @@
#pragma once
#include <future>
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides an ability to trigger snapshooting on other workers.
class DurabilityRpcClients {
public:
DurabilityRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
// Sends a snapshot request to workers and returns a future which becomes true
// if all workers sucesfully completed their snapshot creation, false
// otherwise
// @param tx - transaction from which to take db snapshot
utils::Future<bool> MakeSnapshot(tx::TransactionId tx);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "distributed/durability_rpc_messages.capnp.h"
#include "transactions/transaction.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc make-snapshot
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ((member :bool))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,20 +0,0 @@
#include "distributed/durability_rpc_server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/durability_rpc_messages.hpp"
namespace distributed {
DurabilityRpcServer::DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<MakeSnapshotRpc>(
[this](const auto &req_reader, auto *res_builder) {
database::GraphDbAccessor dba(this->db_, req_reader.getMember());
MakeSnapshotRes res(this->db_.MakeSnapshot(dba));
res.Save(res_builder);
});
}
} // namespace distributed

View File

@ -1,21 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
namespace database {
class GraphDb;
};
namespace distributed {
class DurabilityRpcServer {
public:
DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,25 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "distributed/index_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:define-rpc build-index
(:request
((label "storage::Label" :capnp-type "Storage.Common")
(property "storage::Property" :capnp-type "Storage.Common")
(tx-id "tx::TransactionId" :capnp-type "UInt64")))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,33 +0,0 @@
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/index_rpc_server.hpp"
namespace distributed {
IndexRpcServer::IndexRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<BuildIndexRpc>(
[this](const auto &req_reader, auto *res_builder) {
BuildIndexReq req;
req.Load(req_reader);
database::LabelPropertyIndex::Key key{req.label, req.property};
database::GraphDbAccessor dba(db_, req.tx_id);
if (db_.storage().label_property_index_.CreateIndex(key) == false) {
// If we are a distributed worker we just have to wait till the index
// (which should be in progress of being created) is created so that
// our return guarantess that the index has been built - this assumes
// that no worker thread that is creating an index will fail
while (!dba.LabelPropertyIndexExists(key.label_, key.property_)) {
// TODO reconsider this constant, currently rule-of-thumb chosen
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
} else {
dba.PopulateIndex(key);
dba.EnableIndex(key);
}
});
}
} // namespace distributed

View File

@ -1,22 +0,0 @@
#pragma once
namespace communication::rpc {
class Server;
}
namespace database {
class GraphDb;
}
namespace distributed {
class IndexRpcServer {
public:
IndexRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,41 +0,0 @@
#include "distributed/plan_consumer.hpp"
namespace distributed {
PlanConsumer::PlanConsumer(communication::rpc::Server &server)
: server_(server) {
server_.Register<DispatchPlanRpc>(
[this](const auto &req_reader, auto *res_builder) {
DispatchPlanReq req;
req.Load(req_reader);
plan_cache_.access().insert(
req.plan_id, std::make_unique<PlanPack>(req.plan, req.symbol_table,
std::move(req.storage)));
DispatchPlanRes res;
res.Save(res_builder);
});
server_.Register<RemovePlanRpc>(
[this](const auto &req_reader, auto *res_builder) {
plan_cache_.access().remove(req_reader.getMember());
});
}
PlanConsumer::PlanPack &PlanConsumer::PlanForId(int64_t plan_id) const {
auto accessor = plan_cache_.access();
auto found = accessor.find(plan_id);
CHECK(found != accessor.end())
<< "Missing plan and symbol table for plan id: " << plan_id;
return *found->second;
}
std::vector<int64_t> PlanConsumer::CachedPlanIds() const {
std::vector<int64_t> plan_ids;
auto access = plan_cache_.access();
plan_ids.reserve(access.size());
for (auto &kv : access) plan_ids.emplace_back(kv.first);
return plan_ids;
}
} // namespace distributed

View File

@ -1,44 +0,0 @@
#pragma once
#include <vector>
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan consumption from master. Creates and holds a local cache of
* plans. Worker side. */
class PlanConsumer {
public:
struct PlanPack {
PlanPack(std::shared_ptr<query::plan::LogicalOperator> plan,
query::SymbolTable symbol_table, query::AstStorage storage)
: plan(plan),
symbol_table(std::move(symbol_table)),
storage(std::move(storage)) {}
std::shared_ptr<query::plan::LogicalOperator> plan;
query::SymbolTable symbol_table;
const query::AstStorage storage;
};
explicit PlanConsumer(communication::rpc::Server &server);
/** Return cached plan and symbol table for a given plan id. */
PlanPack &PlanForId(int64_t plan_id) const;
/** Return the ids of all the cached plans. For testing. */
std::vector<int64_t> CachedPlanIds() const;
private:
communication::rpc::Server &server_;
// TODO remove unique_ptr. This is to get it to work, emplacing into a
// ConcurrentMap is tricky.
mutable ConcurrentMap<int64_t, std::unique_ptr<PlanPack>> plan_cache_;
};
} // namespace distributed

View File

@ -1,35 +0,0 @@
#include <distributed/plan_dispatcher.hpp>
namespace distributed {
PlanDispatcher::PlanDispatcher(RpcWorkerClients &clients) : clients_(clients) {}
void PlanDispatcher::DispatchPlan(
int64_t plan_id, std::shared_ptr<query::plan::LogicalOperator> plan,
const query::SymbolTable &symbol_table) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id, plan, symbol_table](
int worker_id, communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<DispatchPlanRpc>(plan_id, plan, symbol_table);
CHECK(result) << "DispatchPlanRpc failed";
});
for (auto &future : futures) {
future.wait();
}
}
void PlanDispatcher::RemovePlan(int64_t plan_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<RemovePlanRpc>(plan_id);
CHECK(result) << "Failed to remove plan from worker";
});
for (auto &future : futures) {
future.wait();
}
}
} // namespace distributed

View File

@ -1,30 +0,0 @@
#pragma once
#include "distributed/coordination.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan dispatching to all workers. Uses MasterCoordination to
* acomplish that. Master side.
*/
class PlanDispatcher {
public:
explicit PlanDispatcher(RpcWorkerClients &clients);
/** Dispatch a plan to all workers and wait for their acknowledgement. */
void DispatchPlan(int64_t plan_id,
std::shared_ptr<query::plan::LogicalOperator> plan,
const query::SymbolTable &symbol_table);
/** Remove a plan from all workers and wait for their acknowledgement. */
void RemovePlan(int64_t plan_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,59 +0,0 @@
#>cpp
#pragma once
#include "communication/rpc/messages.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
#include "distributed/plan_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-import 'plan "/query/plan/operator.capnp")
(lcp:capnp-import 'sem "/query/frontend/semantic/symbol.capnp")
(defun load-plan (reader member)
#>cpp
query::plan::LogicalOperator::LoadHelper helper;
${member} = utils::LoadSharedPtr<query::plan::capnp::LogicalOperator, query::plan::LogicalOperator>(
${reader}, [&helper](const auto &reader) {
auto op = query::plan::LogicalOperator::Construct(reader);
op->Load(reader, &helper);
return op.release();
}, &helper.loaded_ops);
storage = std::move(helper.ast_storage);
cpp<#)
(defun save-plan (builder member)
#>cpp
query::plan::LogicalOperator::SaveHelper helper;
utils::SaveSharedPtr<query::plan::capnp::LogicalOperator, query::plan::LogicalOperator>(
${member}, &${builder},
[&helper](auto *builder, const auto &val) {
val.Save(builder, &helper);
}, &helper.saved_ops);
cpp<#)
(lcp:define-rpc dispatch-plan
(:request
((plan-id :int64_t)
(plan "std::shared_ptr<query::plan::LogicalOperator>"
:capnp-type "Utils.SharedPtr(Plan.LogicalOperator)"
:capnp-save #'save-plan :capnp-load #'load-plan)
(symbol-table "query::SymbolTable" :capnp-type "Sem.SymbolTable")
(storage "query::AstStorage" :initarg nil
:save-fun ""
:load-fun "storage = std::move(ar.template get_helper<query::AstStorage>(query::AstStorage::kHelperId));"
:capnp-save :dont-save)))
(:response ()))
(lcp:define-rpc remove-plan
(:request ((member :int64_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,176 +0,0 @@
#include "distributed/produce_rpc_server.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "transactions/engine_worker.hpp"
namespace distributed {
ProduceRpcServer::OngoingProduce::OngoingProduce(
database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols)
: dba_{db, tx_id},
context_(dba_),
pull_symbols_(std::move(pull_symbols)),
frame_(symbol_table.max_position()),
cursor_(op->MakeCursor(dba_)) {
context_.symbol_table_ = std::move(symbol_table);
context_.parameters_ = std::move(parameters);
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::Pull() {
if (!accumulation_.empty()) {
auto results = std::move(accumulation_.back());
accumulation_.pop_back();
for (auto &element : results) {
try {
query::ReconstructTypedValue(element);
} catch (query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
return std::make_pair(std::move(results), cursor_state_);
}
}
return std::make_pair(std::move(results), PullState::CURSOR_IN_PROGRESS);
}
return PullOneFromCursor();
}
PullState ProduceRpcServer::OngoingProduce::Accumulate() {
while (true) {
auto result = PullOneFromCursor();
if (result.second != PullState::CURSOR_IN_PROGRESS)
return result.second;
else
accumulation_.emplace_back(std::move(result.first));
}
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::PullOneFromCursor() {
std::vector<query::TypedValue> results;
// Check if we already exhausted this cursor (or it entered an error
// state). This happens when we accumulate before normal pull.
if (cursor_state_ != PullState::CURSOR_IN_PROGRESS) {
return std::make_pair(results, cursor_state_);
}
try {
if (cursor_->Pull(frame_, context_)) {
results.reserve(pull_symbols_.size());
for (const auto &symbol : pull_symbols_) {
results.emplace_back(std::move(frame_[symbol]));
}
} else {
cursor_state_ = PullState::CURSOR_EXHAUSTED;
}
} catch (const mvcc::SerializationError &) {
cursor_state_ = PullState::SERIALIZATION_ERROR;
} catch (const utils::LockTimeoutException &) {
cursor_state_ = PullState::LOCK_TIMEOUT_ERROR;
} catch (const RecordDeletedError &) {
cursor_state_ = PullState::UPDATE_DELETED_ERROR;
} catch (const query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
} catch (const query::RemoveAttachedVertexException &) {
cursor_state_ = PullState::UNABLE_TO_DELETE_VERTEX_ERROR;
} catch (const query::QueryRuntimeException &) {
cursor_state_ = PullState::QUERY_ERROR;
} catch (const query::HintedAbortError &) {
cursor_state_ = PullState::HINTED_ABORT_ERROR;
}
return std::make_pair(std::move(results), cursor_state_);
}
ProduceRpcServer::ProduceRpcServer(
database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer)
: db_(db),
produce_rpc_server_(server),
plan_consumer_(plan_consumer),
tx_engine_(tx_engine) {
produce_rpc_server_.Register<PullRpc>(
[this](const auto &req_reader, auto *res_builder) {
PullReq req;
req.Load(req_reader);
PullRes res(Pull(req));
res.Save(res_builder);
});
produce_rpc_server_.Register<TransactionCommandAdvancedRpc>(
[this](const auto &req_reader, auto *res_builder) {
TransactionCommandAdvancedReq req;
req.Load(req_reader);
tx_engine_.UpdateCommand(req.member);
db_.data_manager().ClearCacheForSingleTransaction(req.member);
TransactionCommandAdvancedRes res;
res.Save(res_builder);
});
}
void ProduceRpcServer::FinishAndClearOngoingProducePlans(
tx::TransactionId tx_id) {
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
for (auto it = ongoing_produces_.begin(); it != ongoing_produces_.end();) {
if (std::get<0>(it->first) == tx_id) {
it = ongoing_produces_.erase(it);
} else {
++it;
}
}
}
ProduceRpcServer::OngoingProduce &ProduceRpcServer::GetOngoingProduce(
const PullReq &req) {
auto key_tuple = std::make_tuple(req.tx_id, req.command_id, req.plan_id);
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
auto found = ongoing_produces_.find(key_tuple);
if (found != ongoing_produces_.end()) {
return found->second;
}
if (db_.type() == database::GraphDb::Type::DISTRIBUTED_WORKER) {
// On the worker cache the snapshot to have one RPC less.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.RunningTransaction(req.tx_id, req.tx_snapshot);
}
auto &plan_pack = plan_consumer_.PlanForId(req.plan_id);
return ongoing_produces_
.emplace(std::piecewise_construct, std::forward_as_tuple(key_tuple),
std::forward_as_tuple(db_, req.tx_id, plan_pack.plan,
plan_pack.symbol_table, req.params,
req.symbols))
.first->second;
}
PullResData ProduceRpcServer::Pull(const PullReq &req) {
auto &ongoing_produce = GetOngoingProduce(req);
PullResData result(db_.WorkerId(), req.send_old, req.send_new);
result.pull_state = PullState::CURSOR_IN_PROGRESS;
if (req.accumulate) {
result.pull_state = ongoing_produce.Accumulate();
// If an error ocurred, we need to return that error.
if (result.pull_state != PullState::CURSOR_EXHAUSTED) {
return result;
}
}
for (int i = 0; i < req.batch_size; ++i) {
auto pull_result = ongoing_produce.Pull();
result.pull_state = pull_result.second;
if (pull_result.second != PullState::CURSOR_IN_PROGRESS) break;
result.frames.emplace_back(std::move(pull_result.first));
}
return result;
}
} // namespace distributed

View File

@ -1,92 +0,0 @@
#pragma once
#include <cstdint>
#include <map>
#include <mutex>
#include <utility>
#include <vector>
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/plan_consumer.hpp"
#include "query/context.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/interpret/frame.hpp"
#include "query/parameters.hpp"
#include "query/plan/operator.hpp"
#include "query/typed_value.hpp"
#include "transactions/engine.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Handles the execution of a plan on the worker, requested by the remote
/// master. Assumes that (tx_id, plan_id) uniquely identifies an execution, and
/// that there will never be parallel requests for the same execution thus
/// identified.
class ProduceRpcServer {
/// Encapsulates a Cursor execution in progress. Can be used for pulling a
/// single result from the execution, or pulling all and accumulating the
/// results. Accumulations are used for synchronizing updates in distributed
/// MG (see query::plan::Synchronize).
class OngoingProduce {
public:
OngoingProduce(database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols);
/// Returns a vector of typed values (one for each `pull_symbol`), and an
/// indication of the pull result. The result data is valid only if the
/// returned state is CURSOR_IN_PROGRESS.
std::pair<std::vector<query::TypedValue>, PullState> Pull();
/// Accumulates all the frames pulled from the cursor and returns
/// CURSOR_EXHAUSTED. If an error occurs, an appropriate value is returned.
PullState Accumulate();
private:
database::GraphDbAccessor dba_;
query::Context context_;
std::vector<query::Symbol> pull_symbols_;
query::Frame frame_;
PullState cursor_state_{PullState::CURSOR_IN_PROGRESS};
std::vector<std::vector<query::TypedValue>> accumulation_;
std::unique_ptr<query::plan::Cursor> cursor_;
/// Pulls and returns a single result from the cursor.
std::pair<std::vector<query::TypedValue>, PullState> PullOneFromCursor();
};
public:
ProduceRpcServer(database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer);
/// Finish and clear ongoing produces for all plans that are tied to a
/// transaction with tx_id.
void FinishAndClearOngoingProducePlans(tx::TransactionId tx_id);
private:
std::mutex ongoing_produces_lock_;
/// Mapping of (tx id, command id, plan id) to OngoingProduce.
/// The command_id should be the command_id at the initialization of a cursor
/// that can call ProduceRpcServer.
std::map<std::tuple<tx::TransactionId, tx::CommandId, int64_t>,
OngoingProduce>
ongoing_produces_;
database::GraphDb &db_;
communication::rpc::Server &produce_rpc_server_;
const distributed::PlanConsumer &plan_consumer_;
tx::Engine &tx_engine_;
/// Gets an ongoing produce for the given pull request. Creates a new one if
/// there is none currently existing.
OngoingProduce &GetOngoingProduce(const PullReq &req);
/// Performs a single remote pull for the given request.
PullResData Pull(const PullReq &req);
};
} // namespace distributed

View File

@ -1,547 +0,0 @@
#>cpp
#pragma once
#include <cstdint>
#include <functional>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/pull_produce_rpc_messages.capnp.h"
#include "distributed/serialization.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "storage/address_types.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:in-impl
#>cpp
#include "database/graph_db_accessor.hpp"
#include "distributed/data_manager.hpp"
cpp<#)
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'sem "/query/frontend/semantic/symbol.capnp")
(lcp:capnp-import 'tx "/transactions/common.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "tx::CommandId" "UInt32")
(lcp:capnp-type-conversion "tx::Snapshot" "Tx.Snapshot")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
#>cpp
/// The default number of results returned via RPC from remote execution to the
/// master that requested it.
constexpr int kDefaultBatchSize = 20;
cpp<#
(lcp:define-enum pull-state
(cursor-exhausted
cursor-in-progress
serialization-error
lock-timeout-error
update-deleted-error
reconstruction-error
unable-to-delete-vertex-error
hinted-abort-error
query-error)
(:documentation "Returned along with a batch of results in the remote-pull
RPC. Indicates the state of execution on the worker.")
(:serialize))
(lcp:define-struct pull-data ()
((pull-state "PullState")
(frames "std::vector<std::vector<query::TypedValue>>"))
(:documentation
"The data returned to the end consumer (the Pull operator). Contains only
the relevant parts of the response, ready for use."))
(lcp:define-struct pull-res-data ()
((pull-state "PullState"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::PullState" "PullState")
:capnp-load (lcp:capnp-load-enum "capnp::PullState" "PullState"))
(frames "std::vector<std::vector<query::TypedValue>>"
:capnp-type "List(List(Dis.TypedValue))"
:capnp-save
(lambda (builder member)
#>cpp
for (size_t frame_i = 0; frame_i < ${member}.size(); ++frame_i) {
const auto &frame = ${member}[frame_i];
auto frame_builder = ${builder}.init(frame_i, frame.size());
for (size_t val_i = 0; val_i < frame.size(); ++val_i) {
const auto &value = frame[val_i];
auto value_builder = frame_builder[val_i];
utils::SaveCapnpTypedValue(
value, &value_builder,
[this](const auto &value, auto *builder) {
this->SaveGraphElement(value, builder);
});
}
}
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
${member}.reserve(${reader}.size());
for (const auto &frame_reader : ${reader}) {
std::vector<query::TypedValue> current_frame;
current_frame.reserve(frame_reader.size());
for (const auto &value_reader : frame_reader) {
query::TypedValue value;
utils::LoadCapnpTypedValue(
value_reader, &value,
[this, dba](const auto &reader, auto *value) {
this->LoadGraphElement(dba, reader, value);
});
current_frame.emplace_back(value);
}
${member}.emplace_back(current_frame);
}
cpp<#))
(worker-id :int16_t :capnp-save :dont-save
:documentation
"Id of the worker on which the response is created, used for
serializing vertices (converting local to global addresses). Indicates which
of (old, new) records of a graph element should be sent.")
(send-old :bool :capnp-save :dont-save)
(send-new :bool :capnp-save :dont-save)
;; Temporary caches used between deserialization and post-processing
;; (transfering the ownership of this data to a Cache).
(vertices "std::vector<GraphElementData<Vertex>>" :capnp-save :dont-save)
(edges "std::vector<GraphElementData<Edge>>" :capnp-save :dont-save)
(paths "std::vector<PathData>" :capnp-save :dont-save))
(:documentation
"The data of the remote pull response. Post-processing is required after
deserialization to initialize Vertex/Edge typed values in the frames (possibly
encapsulated in lists/maps) to their proper values. This requires a
GraphDbAccessor and therefore can't be done as part of deserialization.
TODO - make it possible to inject a &GraphDbAcessor from the Pull layer all
the way into RPC data deserialization to remove the requirement for
post-processing. The current approach of holding references to parts of the
frame (potentially embedded in lists/maps) is too error-prone.")
(:public
#>cpp
private:
cpp<#
(lcp:define-struct (graph-element-data t-record) ()
((global-address "storage::Address<mvcc::VersionList<TRecord>>")
(old-record "std::unique_ptr<TRecord>")
(new-record "std::unique_ptr<TRecord>")
(element-in-frame
"query::TypedValue *"
:documentation
"The position in frame is optional. This same structure is used for
deserializing path elements, in which case the vertex/edge in question is not
directly part of the frame."))
(:documentation
"Temp cache for deserialized vertices and edges. These objects are
created during deserialization. They are used immediatelly after during
post-processing. The vertex/edge data ownership gets transfered to the Cache,
and the `element_in_frame` reference is used to set the appropriate accessor
to the appropriate value. Not used on side that generates the response.")
(:public
#>cpp
GraphElementData(storage::Address<mvcc::VersionList<TRecord>> address,
std::unique_ptr<TRecord> old_record, std::unique_ptr<TRecord> new_record,
query::TypedValue *element_in_frame)
: global_address(address),
old_record(std::move(old_record)),
new_record(std::move(new_record)),
element_in_frame(element_in_frame) {}
cpp<#))
(lcp:define-struct path-data ()
((vertices "std::vector<GraphElementData<Vertex>>")
(edges "std::vector<GraphElementData<Edge>>")
(path-in-frame "query::TypedValue *"))
(:public
#>cpp
PathData(query::TypedValue *path_in_frame) : path_in_frame(path_in_frame) {}
cpp<#)
(:documentation "Same like `GraphElementData`, but for paths."))
#>cpp
public:
PullResData() {} // Default constructor required for serialization.
PullResData(int worker_id, bool send_old, bool send_new)
: worker_id(worker_id), send_old(send_old), send_new(send_new) {}
PullResData(const PullResData &) = delete;
PullResData &operator=(const PullResData &) = delete;
PullResData(PullResData &&) = default;
PullResData &operator=(PullResData &&) = default;
/// Saves a typed value that is a vertex/edge/path.
template <class TArchive>
void SaveGraphElement(TArchive &ar, const query::TypedValue &value) const {
// Helper template function for storing a vertex or an edge.
auto save_element = [&ar, this](auto element_accessor) {
ar << element_accessor.GlobalAddress().raw();
// If both old and new are null, we need to reconstruct.
if (!(element_accessor.GetOld() || element_accessor.GetNew())) {
bool result = element_accessor.Reconstruct();
CHECK(result) << "Attempting to serialize an element not visible to "
"current transaction.";
}
auto *old_rec = element_accessor.GetOld();
if (send_old && old_rec) {
ar << true;
distributed::SaveElement(ar, *old_rec, worker_id);
} else {
ar << false;
}
if (send_new) {
// Must call SwitchNew as that will trigger a potentially necesary
// Reconstruct.
element_accessor.SwitchNew();
auto *new_rec = element_accessor.GetNew();
if (new_rec) {
ar << true;
distributed::SaveElement(ar, *new_rec, worker_id);
} else {
ar << false;
}
} else {
ar << false;
}
};
switch (value.type()) {
case query::TypedValue::Type::Vertex:
save_element(value.ValueVertex());
break;
case query::TypedValue::Type::Edge:
save_element(value.ValueEdge());
break;
case query::TypedValue::Type::Path: {
auto &path = value.ValuePath();
ar << path.size();
save_element(path.vertices()[0]);
for (size_t i = 0; i < path.size(); ++i) {
save_element(path.edges()[i]);
save_element(path.vertices()[i + 1]);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << value.type();
}
}
/// Loads a typed value that is a vertex/edge/path. Part of the
/// deserialization process, populates the temporary data caches which are
/// processed later.
template <class TArchive>
void LoadGraphElement(TArchive &ar, query::TypedValue::Type type,
query::TypedValue &value) {
auto load_edge = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadEdge(ar) : nullptr;
};
auto load_vertex = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadVertex(ar) : nullptr;
};
switch (type) {
case query::TypedValue::Type::Vertex: {
storage::VertexAddress::StorageT address;
ar >> address;
vertices.emplace_back(storage::VertexAddress(address), load_vertex(ar),
load_vertex(ar), &value);
break;
}
case query::TypedValue::Type::Edge: {
storage::VertexAddress::StorageT address;
ar >> address;
edges.emplace_back(storage::EdgeAddress(address), load_edge(ar),
load_edge(ar), &value);
break;
}
case query::TypedValue::Type::Path: {
size_t path_size;
ar >> path_size;
paths.emplace_back(&value);
auto &path_data = paths.back();
storage::VertexAddress::StorageT vertex_address;
storage::EdgeAddress::StorageT edge_address;
ar >> vertex_address;
path_data.vertices.emplace_back(storage::VertexAddress(vertex_address),
load_vertex(ar), load_vertex(ar),
nullptr);
for (size_t i = 0; i < path_size; ++i) {
ar >> edge_address;
path_data.edges.emplace_back(storage::EdgeAddress(edge_address),
load_edge(ar), load_edge(ar), nullptr);
ar >> vertex_address;
path_data.vertices.emplace_back(
storage::VertexAddress(vertex_address), load_vertex(ar),
load_vertex(ar), nullptr);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << type;
}
}
cpp<#)
(:private
#>cpp
void SaveGraphElement(const query::TypedValue &,
distributed::capnp::TypedValue::Builder *) const;
void LoadGraphElement(database::GraphDbAccessor *,
const distributed::capnp::TypedValue::Reader &,
query::TypedValue *);
cpp<#)
(:serialize :capnp :load-args '((dba "database::GraphDbAccessor *"))))
(lcp:in-impl
#>cpp
void PullResData::SaveGraphElement(
const query::TypedValue &value,
distributed::capnp::TypedValue::Builder *builder) const {
auto save_element = [this](auto accessor, auto *builder) {
builder->setAddress(accessor.GlobalAddress().raw());
// If both old and new are null, we need to reconstruct
if (!(accessor.GetOld() || accessor.GetNew())) {
bool result = accessor.Reconstruct();
CHECK(result) << "Attempting to serialize an element not visible to "
"current transaction.";
}
auto *old_rec = accessor.GetOld();
if (send_old && old_rec) {
auto old_builder = builder->initOld();
distributed::SaveElement(*old_rec, &old_builder, worker_id);
}
if (send_new) {
// Must call SwitchNew as that will trigger a potentially necesary
// Reconstruct.
accessor.SwitchNew();
auto *new_rec = accessor.GetNew();
if (new_rec) {
auto new_builder = builder->initNew();
distributed::SaveElement(*new_rec, &new_builder, worker_id);
}
}
};
switch (value.type()) {
case query::TypedValue::Type::Vertex: {
auto vertex_builder = builder->initVertex();
save_element(value.ValueVertex(), &vertex_builder);
break;
}
case query::TypedValue::Type::Edge: {
auto edge_builder = builder->initEdge();
save_element(value.ValueEdge(), &edge_builder);
break;
}
case query::TypedValue::Type::Path: {
const auto &path = value.ValuePath();
auto path_builder = builder->initPath();
auto vertices_builder = path_builder.initVertices(path.vertices().size());
for (size_t i = 0; i < path.vertices().size(); ++i) {
auto vertex_builder = vertices_builder[i];
save_element(path.vertices()[i], &vertex_builder);
}
auto edges_builder = path_builder.initEdges(path.edges().size());
for (size_t i = 0; i < path.edges().size(); ++i) {
auto edge_builder = edges_builder[i];
save_element(path.edges()[i], &edge_builder);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << value.type();
}
}
void PullResData::LoadGraphElement(
database::GraphDbAccessor *dba,
const distributed::capnp::TypedValue::Reader &reader,
query::TypedValue *value) {
auto load_vertex = [dba](const auto &vertex_reader) {
storage::VertexAddress global_address(vertex_reader.getAddress());
auto old_record =
vertex_reader.hasOld()
? distributed::LoadVertex<const distributed::capnp::Vertex::Reader>(
vertex_reader.getOld())
: nullptr;
auto new_record =
vertex_reader.hasNew()
? distributed::LoadVertex<const distributed::capnp::Vertex::Reader>(
vertex_reader.getNew())
: nullptr;
dba->db()
.data_manager()
.Elements<Vertex>(dba->transaction_id())
.emplace(global_address.gid(), std::move(old_record),
std::move(new_record));
return VertexAccessor(global_address, *dba);
};
auto load_edge = [dba](const auto &edge_reader) {
storage::EdgeAddress global_address(edge_reader.getAddress());
auto old_record =
edge_reader.hasOld()
? distributed::LoadEdge<const distributed::capnp::Edge::Reader>(
edge_reader.getOld())
: nullptr;
auto new_record =
edge_reader.hasNew()
? distributed::LoadEdge<const distributed::capnp::Edge::Reader>(
edge_reader.getNew())
: nullptr;
dba->db()
.data_manager()
.Elements<Edge>(dba->transaction_id())
.emplace(global_address.gid(), std::move(old_record),
std::move(new_record));
return EdgeAccessor(global_address, *dba);
};
switch (reader.which()) {
case distributed::capnp::TypedValue::VERTEX:
*value = load_vertex(reader.getVertex());
break;
case distributed::capnp::TypedValue::EDGE:
*value = load_edge(reader.getEdge());
break;
case distributed::capnp::TypedValue::PATH: {
auto vertices_reader = reader.getPath().getVertices();
auto edges_reader = reader.getPath().getEdges();
query::Path path(load_vertex(vertices_reader[0]));
for (size_t i = 0; i < edges_reader.size(); ++i) {
path.Expand(load_edge(edges_reader[i]));
path.Expand(load_vertex(vertices_reader[i + 1]));
}
*value = path;
break;
}
default:
LOG(FATAL) << "Unsupported graph element type.";
}
}
cpp<#)
(lcp:define-rpc pull
(:request
((tx-id "tx::TransactionId")
(tx-snapshot "tx::Snapshot")
(plan-id :int64_t)
(command-id "tx::CommandId")
(params "Parameters"
:save-fun
"
ar << params.size();
for (auto &kv : params) {
ar << kv.first;
// Params never contain a vertex/edge, so save plan TypedValue.
utils::SaveTypedValue(ar, kv.second);
}
"
:load-fun
"
size_t params_size;
ar >> params_size;
for (size_t i = 0; i < params_size; ++i) {
int token_pos;
ar >> token_pos;
query::TypedValue param;
// Params never contain a vertex/edge, so load plan TypedValue.
utils::LoadTypedValue(ar, param);
params.Add(token_pos, param);
}
"
:capnp-type "Utils.Map(Utils.BoxInt64, Dis.TypedValue)"
:capnp-save
(lambda (builder member)
#>cpp
auto entries_builder = ${builder}.initEntries(${member}.size());
size_t i = 0;
for (auto &entry : params) {
auto builder = entries_builder[i];
auto key_builder = builder.initKey();
key_builder.setValue(entry.first);
auto value_builder = builder.initValue();
utils::SaveCapnpTypedValue(entry.second, &value_builder);
++i;
}
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
for (const auto &entry_reader : ${reader}.getEntries()) {
query::TypedValue value;
utils::LoadCapnpTypedValue(entry_reader.getValue(), &value);
${member}.Add(entry_reader.getKey().getValue(), value);
}
cpp<#))
(symbols "std::vector<query::Symbol>"
:capnp-type "List(Sem.Symbol)"
:capnp-save (lcp:capnp-save-vector "query::capnp::Symbol" "query::Symbol")
:capnp-load (lcp:capnp-load-vector "query::capnp::Symbol" "query::Symbol"))
(accumulate :bool)
(batch-size :int64_t)
;; Indicates which of (old, new) records of a graph element should be sent.
(send-old :bool)
(send-new :bool)))
(:response
((data "PullResData" :initarg :move
:save-fun
"
ar << data.pull_state;
ar << data.frames.size();
// We need to indicate how many values are in each frame.
// Assume all the frames have an equal number of elements.
ar << (data.frames.size() == 0 ? 0 : data.frames[0].size());
for (const auto &frame : data.frames) {
for (const auto &value : frame) {
utils::SaveTypedValue<TArchive>(
ar, value, [this](TArchive &ar, const query::TypedValue &value) {
data.SaveGraphElement(ar, value);
});
}
}
"
:load-fun
"
ar >> data.pull_state;
size_t frame_count;
ar >> frame_count;
data.frames.reserve(frame_count);
size_t frame_size;
ar >> frame_size;
for (size_t i = 0; i < frame_count; ++i) {
data.frames.emplace_back();
auto &current_frame = data.frames.back();
current_frame.reserve(frame_size);
for (size_t j = 0; j < frame_size; ++j) {
current_frame.emplace_back();
utils::LoadTypedValue<TArchive>(
ar, current_frame.back(),
[this](TArchive &ar, query::TypedValue::TypedValue::Type type,
query::TypedValue &value) {
data.LoadGraphElement(ar, type, value);
});
}
}
"))
(:serialize :capnp :base t :load-args '((dba "database::GraphDbAccessor *")))))
;; TODO make a separate RPC for the continuation of an existing pull, as an
;; optimization not to have to send the full PullReqData pack every time.
(lcp:define-rpc transaction-command-advanced
(:request ((member "tx::TransactionId")))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,41 +0,0 @@
#include <functional>
#include "distributed/data_manager.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
utils::Future<PullData> PullRpcClients::Pull(
database::GraphDbAccessor &dba, int worker_id, int64_t plan_id,
tx::CommandId command_id, const Parameters &params,
const std::vector<query::Symbol> &symbols, bool accumulate,
int batch_size) {
return clients_.ExecuteOnWorker<
PullData>(worker_id, [&dba, plan_id, command_id, params, symbols,
accumulate, batch_size](int worker_id,
ClientPool &client_pool) {
auto load_pull_res = [&dba](const auto &res_reader) {
PullRes res;
res.Load(res_reader, &dba);
return res;
};
auto result = client_pool.CallWithLoad<PullRpc>(
load_pull_res, dba.transaction_id(), dba.transaction().snapshot(),
plan_id, command_id, params, symbols, accumulate, batch_size, true,
true);
return PullData{result->data.pull_state, std::move(result->data.frames)};
});
}
std::vector<utils::Future<void>>
PullRpcClients::NotifyAllTransactionCommandAdvanced(tx::TransactionId tx_id) {
return clients_.ExecuteOnWorkers<void>(
0, [tx_id](int worker_id, auto &client) {
auto res = client.template Call<TransactionCommandAdvancedRpc>(tx_id);
CHECK(res) << "TransactionCommandAdvanceRpc failed";
});
}
} // namespace distributed

View File

@ -1,48 +0,0 @@
#pragma once
#include <vector>
#include "database/graph_db_accessor.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Provides means of calling for the execution of a plan on some remote worker,
/// and getting the results of that execution. The results are returned in
/// batches and are therefore accompanied with an enum indicator of the state of
/// remote execution.
class PullRpcClients {
using ClientPool = communication::rpc::ClientPool;
public:
PullRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Calls a remote pull asynchroniously. IMPORTANT: take care not to call this
/// function for the same (tx_id, worker_id, plan_id, command_id) before the
/// previous call has ended.
///
/// @todo: it might be cleaner to split Pull into {InitRemoteCursor,
/// Pull, RemoteAccumulate}, but that's a lot of refactoring and more
/// RPC calls.
utils::Future<PullData> Pull(database::GraphDbAccessor &dba, int worker_id,
int64_t plan_id, tx::CommandId command_id,
const Parameters &params,
const std::vector<query::Symbol> &symbols,
bool accumulate,
int batch_size = kDefaultBatchSize);
auto GetWorkerIds() { return clients_.GetWorkerIds(); }
std::vector<utils::Future<void>> NotifyAllTransactionCommandAdvanced(
tx::TransactionId tx_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,154 +0,0 @@
#pragma once
#include <functional>
#include <type_traits>
#include <unordered_map>
#include "communication/rpc/client_pool.hpp"
#include "distributed/coordination.hpp"
#include "distributed/index_rpc_messages.hpp"
#include "distributed/token_sharing_rpc_messages.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "storage/types.hpp"
#include "transactions/transaction.hpp"
#include "utils/future.hpp"
#include "utils/thread.hpp"
namespace distributed {
/** A cache of RPC clients (of the given name/kind) per MG distributed worker.
* Thread safe. */
class RpcWorkerClients {
public:
explicit RpcWorkerClients(Coordination &coordination)
: coordination_(coordination),
thread_pool_(std::thread::hardware_concurrency()) {}
RpcWorkerClients(const RpcWorkerClients &) = delete;
RpcWorkerClients(RpcWorkerClients &&) = delete;
RpcWorkerClients &operator=(const RpcWorkerClients &) = delete;
RpcWorkerClients &operator=(RpcWorkerClients &&) = delete;
auto &GetClientPool(int worker_id) {
std::lock_guard<std::mutex> guard{lock_};
auto found = client_pools_.find(worker_id);
if (found != client_pools_.end()) return found->second;
return client_pools_
.emplace(std::piecewise_construct, std::forward_as_tuple(worker_id),
std::forward_as_tuple(coordination_.GetEndpoint(worker_id)))
.first->second;
}
auto GetWorkerIds() { return coordination_.GetWorkerIds(); }
/** Asynchroniously executes the given function on the rpc client for the
* given worker id. Returns an `utils::Future` of the given `execute`
* function's
* return type. */
template <typename TResult>
auto ExecuteOnWorker(
int worker_id,
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
execute) {
auto &client_pool = GetClientPool(worker_id);
return thread_pool_.Run(execute, worker_id, std::ref(client_pool));
}
/** Asynchroniously executes the `execute` function on all worker rpc clients
* except the one whose id is `skip_worker_id`. Returns a vectore of futures
* contaning the results of the `execute` function. */
template <typename TResult>
auto ExecuteOnWorkers(
int skip_worker_id,
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
execute) {
std::vector<utils::Future<TResult>> futures;
for (auto &worker_id : coordination_.GetWorkerIds()) {
if (worker_id == skip_worker_id) continue;
futures.emplace_back(std::move(ExecuteOnWorker(worker_id, execute)));
}
return futures;
}
private:
// TODO make Coordination const, it's member GetEndpoint must be const too.
Coordination &coordination_;
std::unordered_map<int, communication::rpc::ClientPool> client_pools_;
std::mutex lock_;
utils::ThreadPool thread_pool_;
};
/** Wrapper class around a RPC call to build indices.
*/
class IndexRpcClients {
public:
explicit IndexRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
auto GetBuildIndexFutures(const storage::Label &label,
const storage::Property &property,
tx::TransactionId transaction_id, int worker_id) {
return clients_.ExecuteOnWorkers<bool>(
worker_id,
[label, property, transaction_id](
int worker_id, communication::rpc::ClientPool &client_pool) {
return static_cast<bool>(
client_pool.Call<BuildIndexRpc>(label, property, transaction_id));
});
}
private:
RpcWorkerClients &clients_;
};
/** Wrapper class around a RPC call to share token between workers.
*/
class TokenSharingRpcClients {
public:
explicit TokenSharingRpcClients(RpcWorkerClients *clients)
: clients_(clients) {}
auto TransferToken(int worker_id) {
return clients_->ExecuteOnWorker<void>(
worker_id,
[](int worker_id, communication::rpc::ClientPool &client_pool) {
CHECK(client_pool.Call<TokenTransferRpc>())
<< "Unable to transfer token";
});
}
private:
RpcWorkerClients *clients_;
};
/** Join ongoing produces on all workers.
*
* Sends a RPC request to all workers when a transaction is ending, notifying
* them to end all ongoing produces tied to that transaction.
*/
class OngoingProduceJoinerRpcClients {
public:
OngoingProduceJoinerRpcClients(RpcWorkerClients &clients)
: clients_(clients) {}
void JoinOngoingProduces(tx::TransactionId tx_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [tx_id](int worker_id, communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<distributed::WaitOnTransactionEndRpc>(tx_id);
CHECK(result)
<< "[WaitOnTransactionEndRpc] failed to notify that transaction "
<< tx_id << " ended";
});
// We need to wait for all workers to destroy pending futures to avoid
// using already destroyed (released) transaction objects.
for (auto &future : futures) {
future.wait();
}
}
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,71 +0,0 @@
@0xccb448f0b998d9c8;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("distributed::capnp");
struct Address {
gid @0 :UInt64;
workerId @1 :Int16;
}
struct PropertyValue {
id @0 :UInt16;
value @1 :TypedValue;
}
struct Edge {
from @0 :Address;
to @1 :Address;
typeId @2 :UInt16;
properties @3 :List(PropertyValue);
}
struct Vertex {
outEdges @0 :List(EdgeEntry);
inEdges @1 :List(EdgeEntry);
labelIds @2 :List(UInt16);
properties @3 :List(PropertyValue);
struct EdgeEntry {
vertexAddress @0 :Address;
edgeAddress @1 :Address;
edgeTypeId @2 :UInt16;
}
}
struct TypedValue {
union {
nullType @0 :Void;
bool @1 :Bool;
integer @2 :Int64;
double @3 :Float64;
string @4 :Text;
list @5 :List(TypedValue);
map @6 :List(Entry);
vertex @7 :VertexAccessor;
edge @8 :EdgeAccessor;
path @9 :Path;
}
struct Entry {
key @0 :Text;
value @1 :TypedValue;
}
struct VertexAccessor {
address @0 :UInt64;
old @1 :Vertex;
new @2: Vertex;
}
struct EdgeAccessor {
address @0 :UInt64;
old @1 :Edge;
new @2: Edge;
}
struct Path {
vertices @0 :List(VertexAccessor);
edges @1 :List(EdgeAccessor);
}
}

View File

@ -1,120 +0,0 @@
#include "distributed/serialization.hpp"
namespace {
template <class TAddress>
void SaveAddress(TAddress address,
distributed::capnp::Address::Builder *builder,
int16_t worker_id) {
builder->setGid(address.is_local() ? address.local()->gid_ : address.gid());
builder->setWorkerId(address.is_local() ? worker_id : address.worker_id());
}
storage::VertexAddress LoadVertexAddress(
const distributed::capnp::Address::Reader &reader) {
return {reader.getGid(), reader.getWorkerId()};
}
storage::EdgeAddress LoadEdgeAddress(
const distributed::capnp::Address::Reader &reader) {
return {reader.getGid(), reader.getWorkerId()};
}
void SaveProperties(
const PropertyValueStore &props,
::capnp::List<distributed::capnp::PropertyValue>::Builder *builder) {
int64_t i = 0;
for (const auto &kv : props) {
auto prop_builder = (*builder)[i];
prop_builder.setId(kv.first.Id());
auto value_builder = prop_builder.initValue();
utils::SaveCapnpTypedValue(kv.second, &value_builder);
++i;
}
}
PropertyValueStore LoadProperties(
const ::capnp::List<distributed::capnp::PropertyValue>::Reader &reader) {
PropertyValueStore props;
for (const auto &prop_reader : reader) {
query::TypedValue value;
utils::LoadCapnpTypedValue(prop_reader.getValue(), &value);
props.set(storage::Property(prop_reader.getId()), value);
}
return props;
}
} // namespace
namespace distributed {
void SaveVertex(const Vertex &vertex, capnp::Vertex::Builder *builder,
int16_t worker_id) {
auto save_edges = [worker_id](const auto &edges, auto *edges_builder) {
int64_t i = 0;
for (const auto &edge : edges) {
auto edge_builder = (*edges_builder)[i];
auto vertex_addr_builder = edge_builder.initVertexAddress();
SaveAddress(edge.vertex, &vertex_addr_builder, worker_id);
auto edge_addr_builder = edge_builder.initEdgeAddress();
SaveAddress(edge.edge, &edge_addr_builder, worker_id);
edge_builder.setEdgeTypeId(edge.edge_type.Id());
++i;
}
};
auto out_builder = builder->initOutEdges(vertex.out_.size());
save_edges(vertex.out_, &out_builder);
auto in_builder = builder->initInEdges(vertex.in_.size());
save_edges(vertex.in_, &in_builder);
auto labels_builder = builder->initLabelIds(vertex.labels_.size());
for (size_t i = 0; i < vertex.labels_.size(); ++i) {
labels_builder.set(i, vertex.labels_[i].Id());
}
auto properties_builder = builder->initProperties(vertex.properties_.size());
SaveProperties(vertex.properties_, &properties_builder);
}
template <>
std::unique_ptr<Vertex> LoadVertex(const capnp::Vertex::Reader &reader) {
auto vertex = std::make_unique<Vertex>();
auto load_edges = [](const auto &edges_reader) {
Edges edges;
for (const auto &edge_reader : edges_reader) {
auto vertex_address = LoadVertexAddress(edge_reader.getVertexAddress());
auto edge_address = LoadEdgeAddress(edge_reader.getEdgeAddress());
storage::EdgeType edge_type(edge_reader.getEdgeTypeId());
edges.emplace(vertex_address, edge_address, edge_type);
}
return edges;
};
vertex->out_ = load_edges(reader.getOutEdges());
vertex->in_ = load_edges(reader.getInEdges());
for (const auto &label_id : reader.getLabelIds()) {
vertex->labels_.emplace_back(label_id);
}
vertex->properties_ = LoadProperties(reader.getProperties());
return vertex;
}
void SaveEdge(const Edge &edge, capnp::Edge::Builder *builder,
int16_t worker_id) {
auto from_builder = builder->initFrom();
SaveAddress(edge.from_, &from_builder, worker_id);
auto to_builder = builder->initTo();
SaveAddress(edge.to_, &to_builder, worker_id);
builder->setTypeId(edge.edge_type_.Id());
auto properties_builder = builder->initProperties(edge.properties_.size());
SaveProperties(edge.properties_, &properties_builder);
}
template <>
std::unique_ptr<Edge> LoadEdge(const capnp::Edge::Reader &reader) {
auto from = LoadVertexAddress(reader.getFrom());
auto to = LoadVertexAddress(reader.getTo());
auto edge =
std::make_unique<Edge>(from, to, storage::EdgeType{reader.getTypeId()});
edge->properties_ = LoadProperties(reader.getProperties());
return edge;
}
} // namespace distributed

View File

@ -1,209 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "distributed/serialization.capnp.h"
#include "storage/address_types.hpp"
#include "storage/edge.hpp"
#include "storage/types.hpp"
#include "storage/vertex.hpp"
#include "utils/serialization.hpp"
namespace distributed {
namespace impl {
// Saves the given address into the given archive. Converts a local address to a
// global one, using the given worker_id.
template <typename TArchive, typename TAddress>
void SaveAddress(TArchive &ar, TAddress address, int worker_id) {
if (address.is_local()) {
ar << address.local()->gid_;
ar << worker_id;
} else {
ar << address.gid();
ar << address.worker_id();
}
};
// Saves the given properties into the given archive.
template <typename TArchive>
void SaveProperties(TArchive &ar, const PropertyValueStore &props) {
ar << props.size();
for (auto &kv : props) {
ar << kv.first.Id();
utils::SaveTypedValue(ar, kv.second);
}
}
} // namespace impl
void SaveVertex(const Vertex &vertex, capnp::Vertex::Builder *builder,
int16_t worker_id);
/**
* Saves the given vertex into the given Boost archive.
*
* @param ar - Archive into which to serialize.
* @param vertex - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveVertex(TArchive &ar, const Vertex &vertex, int worker_id) {
auto save_edges = [&ar, worker_id](auto &edges) {
ar << edges.size();
for (auto &edge_struct : edges) {
impl::SaveAddress(ar, edge_struct.vertex, worker_id);
impl::SaveAddress(ar, edge_struct.edge, worker_id);
ar << edge_struct.edge_type.Id();
}
};
save_edges(vertex.out_);
save_edges(vertex.in_);
ar << vertex.labels_.size();
for (auto &label : vertex.labels_) {
ar << label.Id();
}
impl::SaveProperties(ar, vertex.properties_);
}
void SaveEdge(const Edge &edge, capnp::Edge::Builder *builder,
int16_t worker_id);
/**
* Saves the given edge into the given Boost archive.
*
* @param - Archive into which to serialize.
* @param edge - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveEdge(TArchive &ar, const Edge &edge, int worker_id) {
impl::SaveAddress(ar, edge.from_, worker_id);
impl::SaveAddress(ar, edge.to_, worker_id);
ar << edge.edge_type_.Id();
impl::SaveProperties(ar, edge.properties_);
}
/// Alias for `SaveEdge` allowing for param type resolution.
inline void SaveElement(const Edge &record, capnp::Edge::Builder *builder,
int16_t worker_id) {
return SaveEdge(record, builder, worker_id);
}
/// Alias for `SaveVertex` allowing for param type resolution.
inline void SaveElement(const Vertex &record, capnp::Vertex::Builder *builder,
int16_t worker_id) {
return SaveVertex(record, builder, worker_id);
}
/// Alias for `SaveEdge` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Edge &record, int worker_id) {
return SaveEdge(ar, record, worker_id);
}
/// Alias for `SaveVertex` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Vertex &record, int worker_id) {
return SaveVertex(ar, record, worker_id);
}
namespace impl {
template <typename TArchive>
storage::VertexAddress LoadVertexAddress(TArchive &ar) {
gid::Gid vertex_id;
ar >> vertex_id;
int worker_id;
ar >> worker_id;
return {vertex_id, worker_id};
}
template <typename TArchive>
void LoadProperties(TArchive &ar, PropertyValueStore &store) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Property::IdT prop;
ar >> prop;
query::TypedValue value;
utils::LoadTypedValue(ar, value);
store.set(storage::Property(prop), static_cast<PropertyValue>(value));
}
}
} // namespace impl
/**
* Loads a Vertex from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Vertex> LoadVertex(TArchive &ar) {
auto vertex = std::make_unique<Vertex>();
auto decode_edges = [&ar](Edges &edges) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
auto vertex_address = impl::LoadVertexAddress(ar);
storage::EdgeType::IdT edge_type;
gid::Gid edge_id;
ar >> edge_id;
int edge_worker_id;
ar >> edge_worker_id;
ar >> edge_type;
edges.emplace(vertex_address, {edge_id, edge_worker_id},
storage::EdgeType(edge_type));
}
};
decode_edges(vertex->out_);
decode_edges(vertex->in_);
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Label::IdT label;
ar >> label;
vertex->labels_.emplace_back(label);
}
impl::LoadProperties(ar, vertex->properties_);
return vertex;
}
template <>
std::unique_ptr<Vertex> LoadVertex(const capnp::Vertex::Reader &reader);
/**
* Loads an Edge from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Edge> LoadEdge(TArchive &ar) {
auto from = impl::LoadVertexAddress(ar);
auto to = impl::LoadVertexAddress(ar);
storage::EdgeType::IdT edge_type;
ar >> edge_type;
auto edge = std::make_unique<Edge>(from, to, storage::EdgeType{edge_type});
impl::LoadProperties(ar, edge->properties_);
return edge;
}
template <>
std::unique_ptr<Edge> LoadEdge(const capnp::Edge::Reader &reader);
} // namespace distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include "communication/rpc/messages.hpp"
#include "distributed/storage_gc_rpc_messages.capnp.h"
#include "io/network/endpoint.hpp"
#include "transactions/transaction.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc ran-local-gc
(:request
((local-oldest-active "tx::TransactionId" :capnp-type "UInt64")
(worker-id :int16_t)))
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,20 +0,0 @@
#>cpp
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "distributed/token_sharing_rpc_messages.capnp.h"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc token-transfer
(:request ())
(:response ()))
(lcp:pop-namespace) ;; distributed

View File

@ -1,100 +0,0 @@
#pragma once
#include "distributed/rpc_worker_clients.hpp"
#include "storage/dynamic_graph_partitioner/dgp.hpp"
namespace communication::rpc {
class Server;
}
namespace database {
class GraphDb;
};
namespace distributed {
/// Shares the token between dynamic graph partitioners instances across workers
/// by passing the token from one worker to another, in a circular fashion. This
/// guarantees that no two workers will execute the dynamic graph partitioner
/// step in the same time.
class TokenSharingRpcServer {
public:
TokenSharingRpcServer(database::GraphDb *db, int worker_id,
distributed::Coordination *coordination,
communication::rpc::Server *server,
distributed::TokenSharingRpcClients *clients)
: worker_id_(worker_id),
coordination_(coordination),
server_(server),
clients_(clients),
dgp_(db) {
server_->Register<distributed::TokenTransferRpc>(
[this](const auto &req_reader, auto *res_builder) { token_ = true; });
runner_ = std::thread([this]() {
while (true) {
// Wait till we get the token
while (!token_) {
if (shutting_down_) break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
if (shutting_down_) break;
token_ = false;
dgp_.Run();
// Transfer token to next
auto workers = coordination_->GetWorkerIds();
sort(workers.begin(), workers.end());
int next_worker = -1;
auto pos = std::upper_bound(workers.begin(), workers.end(), worker_id_);
if (pos != workers.end()) {
next_worker = *pos;
} else {
next_worker = workers[0];
}
clients_->TransferToken(next_worker);
}
});
}
/// Starts the token sharing server which in turn starts the dynamic graph
/// partitioner.
void StartTokenSharing() {
started_ = true;
token_ = true;
}
~TokenSharingRpcServer() {
shutting_down_ = true;
if (runner_.joinable()) runner_.join();
if (started_ && worker_id_ == 0) {
// Wait till we get the token back otherwise some worker might try to
// migrate to another worker while that worker is shutting down or
// something else bad might happen
// TODO(dgleich): Solve this better in the future since this blocks
// shutting down until spinner steps complete
while (!token_) {
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
}
private:
int worker_id_;
distributed::Coordination *coordination_;
communication::rpc::Server *server_;
distributed::TokenSharingRpcClients *clients_;
std::atomic<bool> started_{false};
std::atomic<bool> token_{false};
std::atomic<bool> shutting_down_{false};
std::thread runner_;
DynamicGraphPartitioner dgp_;
};
} // namespace distributed

View File

@ -1,86 +0,0 @@
#pragma once
#include <functional>
#include <vector>
#include "communication/rpc/server.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "transactions/engine.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/scheduler.hpp"
namespace distributed {
/// Periodically calls `ClearTransactionalCache(oldest_transaction)` on all
/// registered objects.
class TransactionalCacheCleaner {
/// The wait time between two releases of local transaction objects that have
/// expired on the master.
static constexpr std::chrono::seconds kCacheReleasePeriod{1};
public:
template <typename... T>
TransactionalCacheCleaner(tx::Engine &tx_engine, T &... caches)
: tx_engine_(tx_engine) {
Register(caches...);
cache_clearing_scheduler_.Run(
"DistrTxCacheGc", kCacheReleasePeriod,
[this]() { this->Clear(tx_engine_.GlobalGcSnapshot().back()); });
}
protected:
/// Registers the given object for transactional cleaning. The object will
/// periodically get it's `ClearCache(tx::TransactionId)` method called
/// with the oldest active transaction id. Note that the ONLY guarantee for
/// the call param is that there are no transactions alive that have an id
/// lower than it.
template <typename TCache>
void Register(TCache &cache) {
functions_.emplace_back([&cache](tx::TransactionId oldest_active) {
cache.ClearTransactionalCache(oldest_active);
});
}
private:
template <typename TCache, typename... T>
void Register(TCache &cache, T &... caches) {
Register(cache);
Register(caches...);
}
void Clear(tx::TransactionId oldest_active) {
for (auto &f : functions_) f(oldest_active);
}
tx::Engine &tx_engine_;
std::vector<std::function<void(tx::TransactionId &oldest_active)>> functions_;
utils::Scheduler cache_clearing_scheduler_;
};
/// Registers a RPC server that listens for `WaitOnTransactionEnd` requests
/// that require all ongoing produces to finish. It also periodically calls
/// `ClearTransactionalCache` on all registered objects.
class WorkerTransactionalCacheCleaner : public TransactionalCacheCleaner {
public:
template <class... T>
WorkerTransactionalCacheCleaner(tx::WorkerEngine &tx_engine,
communication::rpc::Server &server,
ProduceRpcServer &produce_server,
T &... caches)
: TransactionalCacheCleaner(tx_engine, caches...),
rpc_server_(server),
produce_server_(produce_server) {
Register(tx_engine);
rpc_server_.Register<WaitOnTransactionEndRpc>([this](const auto &req_reader,
auto *res_builder) {
produce_server_.FinishAndClearOngoingProducePlans(req_reader.getMember());
});
}
private:
communication::rpc::Server &rpc_server_;
ProduceRpcServer &produce_server_;
};
} // namespace distributed

View File

@ -1,17 +0,0 @@
#>cpp
#pragma once
#include "distributed/transactional_cache_cleaner_rpc_messages.capnp.h"
#include "communication/rpc/messages.hpp"
#include "transactions/type.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:define-rpc wait-on-transaction-end
(:request ((member "tx::TransactionId" :capnp-type "UInt64")))
(:response ()))
(lcp:pop-namespace)

View File

@ -1,116 +0,0 @@
#include <unordered_map>
#include <vector>
#include "distributed/updates_rpc_clients.hpp"
#include "query/exceptions.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
namespace {
void RaiseIfRemoteError(UpdateResult result) {
switch (result) {
case UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
throw query::RemoveAttachedVertexException();
case UpdateResult::SERIALIZATION_ERROR:
throw mvcc::SerializationError();
case UpdateResult::LOCK_TIMEOUT_ERROR:
throw utils::LockTimeoutException(
"Remote LockTimeoutError during edge creation");
case UpdateResult::UPDATE_DELETED_ERROR:
throw RecordDeletedError();
case UpdateResult::DONE:
break;
}
}
}
UpdateResult UpdatesRpcClients::Update(int worker_id,
const database::StateDelta &delta) {
auto res = worker_clients_.GetClientPool(worker_id).Call<UpdateRpc>(delta);
CHECK(res) << "UpdateRpc failed on worker: " << worker_id;
return res->member;
}
gid::Gid UpdatesRpcClients::CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto res = worker_clients_.GetClientPool(worker_id).Call<CreateVertexRpc>(
CreateVertexReqData{tx_id, labels, properties});
CHECK(res) << "CreateVertexRpc failed on worker: " << worker_id;
CHECK(res->member.result == UpdateResult::DONE)
<< "Remote Vertex creation result not UpdateResult::DONE";
return res->member.gid;
}
storage::EdgeAddress UpdatesRpcClients::CreateEdge(
tx::TransactionId tx_id, VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(from.address().is_remote()) << "In CreateEdge `from` must be remote";
int from_worker = from.address().worker_id();
auto res = worker_clients_.GetClientPool(from_worker)
.Call<CreateEdgeRpc>(CreateEdgeReqData{
from.gid(), to.GlobalAddress(), edge_type, tx_id});
CHECK(res) << "CreateEdge RPC failed on worker: " << from_worker;
RaiseIfRemoteError(res->member.result);
return {res->member.gid, from_worker};
}
void UpdatesRpcClients::AddInEdge(tx::TransactionId tx_id,
VertexAccessor &from,
storage::EdgeAddress edge_address,
VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(to.address().is_remote() && edge_address.is_remote() &&
(from.GlobalAddress().worker_id() != to.address().worker_id()))
<< "AddInEdge should only be called when `to` is remote and "
"`from` is not on the same worker as `to`.";
auto worker_id = to.GlobalAddress().worker_id();
auto res = worker_clients_.GetClientPool(worker_id).Call<AddInEdgeRpc>(
AddInEdgeReqData{from.GlobalAddress(), edge_address, to.gid(), edge_type,
tx_id});
CHECK(res) << "AddInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveVertex(int worker_id, tx::TransactionId tx_id,
gid::Gid gid, bool check_empty) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveVertexRpc>(
RemoveVertexReqData{gid, tx_id, check_empty});
CHECK(res) << "RemoveVertex RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid edge_gid, gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveEdgeRpc>(
RemoveEdgeData{tx_id, edge_gid, vertex_from_id, vertex_to_addr});
CHECK(res) << "RemoveEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id,
storage::EdgeAddress edge_address) {
CHECK(edge_address.is_remote()) << "RemoveInEdge edge_address is local.";
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveInEdgeRpc>(
RemoveInEdgeData{tx_id, vertex_id, edge_address});
CHECK(res) << "RemoveInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
std::vector<utils::Future<UpdateResult>> UpdatesRpcClients::UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id) {
return worker_clients_.ExecuteOnWorkers<UpdateResult>(
skip_worker_id, [tx_id](int worker_id, auto &client) {
auto res = client.template Call<UpdateApplyRpc>(tx_id);
CHECK(res) << "UpdateApplyRpc failed";
return res->member;
});
}
} // namespace distributed

View File

@ -1,76 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "database/state_delta.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Exposes the functionality to send updates to other workers (that own the
/// graph element we are updating). Also enables us to call for a worker to
/// apply the accumulated deferred updates, or discard them.
class UpdatesRpcClients {
public:
explicit UpdatesRpcClients(RpcWorkerClients &clients)
: worker_clients_(clients) {}
/// Sends an update delta to the given worker.
UpdateResult Update(int worker_id, const database::StateDelta &delta);
/// Creates a vertex on the given worker and returns it's id.
gid::Gid CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates an edge on the given worker and returns it's address. If the `to`
/// vertex is on the same worker as `from`, then all remote CRUD will be
/// handled by a call to this function. Otherwise a separate call to
/// `AddInEdge` might be necessary. Throws all the exceptions that can
/// occur remotely as a result of updating a vertex.
storage::EdgeAddress CreateEdge(tx::TransactionId tx_id,
VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type);
/// Adds the edge with the given address to the `to` vertex as an incoming
/// edge. Only used when `to` is remote and not on the same worker as `from`.
void AddInEdge(tx::TransactionId tx_id, VertexAccessor &from,
storage::EdgeAddress edge_address, VertexAccessor &to,
storage::EdgeType edge_type);
/// Removes a vertex from the other worker.
void RemoveVertex(int worker_id, tx::TransactionId tx_id, gid::Gid gid,
bool check_empty);
/// Removes an edge on another worker. This also handles the `from` vertex
/// outgoing edge, as that vertex is on the same worker as the edge. If the
/// `to` vertex is on the same worker, then that side is handled too by the
/// single RPC call, otherwise a separate call has to be made to
/// RemoveInEdge.
void RemoveEdge(tx::TransactionId tx_id, int worker_id, gid::Gid edge_gid,
gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr);
void RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id, storage::EdgeAddress edge_address);
/// Calls for all the workers (except the given one) to apply their updates
/// and returns the future results.
std::vector<utils::Future<UpdateResult>> UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id);
private:
RpcWorkerClients &worker_clients_;
};
} // namespace distributed

View File

@ -1,187 +0,0 @@
#>cpp
#pragma once
#include <unordered_map>
#include "communication/rpc/messages.hpp"
#include "database/state_delta.hpp"
#include "distributed/updates_rpc_messages.capnp.h"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
cpp<#
(lcp:namespace distributed)
(lcp:capnp-namespace "distributed")
(lcp:capnp-import 'db "/database/state_delta.capnp")
(lcp:capnp-import 'dis "/distributed/serialization.capnp")
(lcp:capnp-import 'storage "/storage/serialization.capnp")
(lcp:capnp-import 'utils "/utils/serialization.capnp")
(lcp:capnp-type-conversion "tx::TransactionId" "UInt64")
(lcp:capnp-type-conversion "gid::Gid" "UInt64")
(lcp:capnp-type-conversion "storage::Label" "Storage.Common")
(lcp:capnp-type-conversion "storage::EdgeType" "Storage.Common")
(lcp:capnp-type-conversion "storage::Property" "Storage.Common")
(lcp:capnp-type-conversion "storage::EdgeAddress" "Storage.Address")
(lcp:capnp-type-conversion "storage::VertexAddress" "Storage.Address")
(lcp:define-enum update-result
(done
serialization-error
lock-timeout-error
update-deleted-error
unable-to-delete-vertex-error)
(:documentation "The result of sending or applying a deferred update to a worker.")
(:serialize))
(lcp:define-rpc update
(:request ((member "database::StateDelta" :capnp-type "Db.StateDelta")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-rpc update-apply
(:request ((member "tx::TransactionId")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct create-result ()
((result "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult"))
(gid "gid::Gid" :documentation "Only valid if creation was successful."))
(:serialize :boost :capnp))
(lcp:define-struct create-vertex-req-data ()
((tx-id "tx::TransactionId")
(labels "std::vector<storage::Label>"
:capnp-save (lcp:capnp-save-vector "storage::capnp::Common" "storage::Label")
:capnp-load (lcp:capnp-load-vector "storage::capnp::Common" "storage::Label"))
(properties "std::unordered_map<storage::Property, query::TypedValue>"
:save-fun
#>cpp
ar << properties.size();
for (auto &kv : properties) {
ar << kv.first;
utils::SaveTypedValue(ar, kv.second);
}
cpp<#
:load-fun
#>cpp
size_t props_size;
ar >> props_size;
for (size_t i = 0; i < props_size; ++i) {
storage::Property p;
ar >> p;
query::TypedValue tv;
utils::LoadTypedValue(ar, tv);
properties.emplace(p, std::move(tv));
}
cpp<#
:capnp-type "Utils.Map(Storage.Common, Dis.TypedValue)"
:capnp-save
(lambda (builder member)
#>cpp
utils::SaveMap<storage::capnp::Common, capnp::TypedValue>(
${member}, &${builder},
[](auto *builder, const auto &entry) {
auto key_builder = builder->initKey();
entry.first.Save(&key_builder);
auto value_builder = builder->initValue();
utils::SaveCapnpTypedValue(entry.second, &value_builder);
});
cpp<#)
:capnp-load
(lambda (reader member)
#>cpp
utils::LoadMap<storage::capnp::Common, capnp::TypedValue>(
&${member}, ${reader},
[](const auto &reader) {
storage::Property prop;
prop.Load(reader.getKey());
query::TypedValue value;
utils::LoadCapnpTypedValue(reader.getValue(), &value);
return std::make_pair(prop, value);
});
cpp<#)))
(:serialize :capnp))
(lcp:define-rpc create-vertex
(:request ((member "CreateVertexReqData")))
(:response ((member "CreateResult"))))
(lcp:define-struct create-edge-req-data ()
((from "gid::Gid")
(to "storage::VertexAddress")
(edge-type "storage::EdgeType")
(tx-id "tx::TransactionId"))
(:serialize :capnp))
(lcp:define-rpc create-edge
(:request ((member "CreateEdgeReqData")))
(:response ((member "CreateResult"))))
(lcp:define-struct add-in-edge-req-data ()
((from "storage::VertexAddress")
(edge-address "storage::EdgeAddress")
(to "gid::Gid")
(edge-type "storage::EdgeType")
(tx-id "tx::TransactionId"))
(:serialize :capnp))
(lcp:define-rpc add-in-edge
(:request ((member "AddInEdgeReqData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-vertex-req-data ()
((gid "gid::Gid")
(tx-id "tx::TransactionId")
(check-empty :bool))
(:serialize :capnp))
(lcp:define-rpc remove-vertex
(:request ((member "RemoveVertexReqData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-edge-data ()
((tx-id "tx::TransactionId")
(edge-id "gid::Gid")
(vertex-from-id "gid::Gid")
(vertex-to-address "storage::VertexAddress"))
(:serialize :capnp))
(lcp:define-rpc remove-edge
(:request ((member "RemoveEdgeData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:define-struct remove-in-edge-data ()
((tx-id "tx::TransactionId")
(vertex "gid::Gid")
(edge-address "storage::EdgeAddress"))
(:serialize :capnp))
(lcp:define-rpc remove-in-edge
(:request ((member "RemoveInEdgeData")))
(:response ((member "UpdateResult"
:capnp-init nil
:capnp-save (lcp:capnp-save-enum "capnp::UpdateResult" "UpdateResult")
:capnp-load (lcp:capnp-load-enum "capnp::UpdateResult" "UpdateResult")))))
(lcp:pop-namespace) ;; distributed

View File

@ -1,385 +0,0 @@
#include <utility>
#include "glog/logging.h"
#include "distributed/updates_rpc_server.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Emplace(
const database::StateDelta &delta) {
auto gid = std::is_same<TRecordAccessor, VertexAccessor>::value
? delta.vertex_id
: delta.edge_id;
std::lock_guard<utils::SpinLock> guard{lock_};
auto found = deltas_.find(gid);
if (found == deltas_.end()) {
found =
deltas_
.emplace(gid, std::make_pair(FindAccessor(gid),
std::vector<database::StateDelta>{}))
.first;
}
found->second.second.emplace_back(delta);
// TODO call `RecordAccessor::update` to force serialization errors to
// fail-fast (as opposed to when all the deltas get applied).
//
// This is problematic because `VersionList::update` needs to become
// thread-safe within the same transaction. Note that the concurrency is
// possible both between the owner worker interpretation thread and an RPC
// thread (current thread), as well as multiple RPC threads if this
// object's lock is released (perhaps desirable).
//
// A potential solution *might* be that `LockStore::Lock` returns a `bool`
// indicating if the caller was the one obtaining the lock (not the same
// as lock already being held by the same transaction).
//
// Another thing that needs to be done (if we do this) is ensuring that
// `LockStore::Take` is thread-safe when called in parallel in the same
// transaction. Currently it's thread-safe only when called in parallel
// from different transactions (only one manages to take the RecordLock).
//
// Deferring the implementation of this as it's tricky, and essentially an
// optimization.
//
// try {
// found->second.first.update();
// } catch (const mvcc::SerializationError &) {
// return UpdateResult::SERIALIZATION_ERROR;
// } catch (const RecordDeletedError &) {
// return UpdateResult::UPDATE_DELETED_ERROR;
// } catch (const utils::LockTimeoutException &) {
// return UpdateResult::LOCK_TIMEOUT_ERROR;
// }
return UpdateResult::DONE;
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto result = db_accessor_.InsertVertex();
for (auto &label : labels) result.add_label(label);
for (auto &kv : properties) result.PropsSet(kv.first, kv.second);
std::lock_guard<utils::SpinLock> guard{lock_};
deltas_.emplace(result.gid(),
std::make_pair(result, std::vector<database::StateDelta>{}));
return result.gid();
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateEdge(
gid::Gid from, storage::VertexAddress to, storage::EdgeType edge_type) {
auto &db = db_accessor_.db();
auto from_addr = db.storage().LocalizedAddressIfPossible(
storage::VertexAddress(from, db.WorkerId()));
auto to_addr = db.storage().LocalizedAddressIfPossible(to);
auto edge = db_accessor_.InsertOnlyEdge(from_addr, to_addr, edge_type);
std::lock_guard<utils::SpinLock> guard{lock_};
deltas_.emplace(edge.gid(),
std::make_pair(edge, std::vector<database::StateDelta>{}));
return edge.gid();
}
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Apply() {
std::lock_guard<utils::SpinLock> guard{lock_};
for (auto &kv : deltas_) {
auto &record_accessor = kv.second.first;
// We need to reconstruct the record as in the meantime some local
// update might have updated it.
record_accessor.Reconstruct();
for (database::StateDelta &delta : kv.second.second) {
try {
auto &dba = db_accessor_;
switch (delta.type) {
case database::StateDelta::Type::TRANSACTION_BEGIN:
case database::StateDelta::Type::TRANSACTION_COMMIT:
case database::StateDelta::Type::TRANSACTION_ABORT:
case database::StateDelta::Type::CREATE_VERTEX:
case database::StateDelta::Type::CREATE_EDGE:
case database::StateDelta::Type::BUILD_INDEX:
LOG(FATAL) << "Can only apply record update deltas for remote "
"graph element";
case database::StateDelta::Type::REMOVE_VERTEX:
if (!db_accessor().RemoveVertex(
reinterpret_cast<VertexAccessor &>(record_accessor),
delta.check_empty)) {
return UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR;
}
break;
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
case database::StateDelta::Type::SET_PROPERTY_EDGE:
record_accessor.PropsSet(delta.property, delta.value);
break;
case database::StateDelta::Type::ADD_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.add_label(delta.label);
break;
case database::StateDelta::Type::REMOVE_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.remove_label(delta.label);
break;
case database::StateDelta::Type::ADD_OUT_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.out_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_to_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::ADD_IN_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.in_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_from_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::REMOVE_EDGE:
// We only remove the edge as a result of this StateDelta,
// because the removal of edge from vertex in/out is performed
// in REMOVE_[IN/OUT]_EDGE deltas.
db_accessor_.RemoveEdge(
reinterpret_cast<EdgeAccessor &>(record_accessor), false,
false);
break;
case database::StateDelta::Type::REMOVE_OUT_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveOutEdge(delta.edge_address);
break;
case database::StateDelta::Type::REMOVE_IN_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveInEdge(delta.edge_address);
break;
}
} catch (const mvcc::SerializationError &) {
return UpdateResult::SERIALIZATION_ERROR;
} catch (const RecordDeletedError &) {
return UpdateResult::UPDATE_DELETED_ERROR;
} catch (const utils::LockTimeoutException &) {
return UpdateResult::LOCK_TIMEOUT_ERROR;
}
}
}
return UpdateResult::DONE;
}
UpdatesRpcServer::UpdatesRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db) {
server.Register<UpdateRpc>([this](const auto &req_reader, auto *res_builder) {
UpdateReq req;
req.Load(req_reader);
using DeltaType = database::StateDelta::Type;
auto &delta = req.member;
switch (delta.type) {
case DeltaType::SET_PROPERTY_VERTEX:
case DeltaType::ADD_LABEL:
case DeltaType::REMOVE_LABEL:
case database::StateDelta::Type::REMOVE_OUT_EDGE:
case database::StateDelta::Type::REMOVE_IN_EDGE: {
UpdateRes res(
GetUpdates(vertex_updates_, delta.transaction_id).Emplace(delta));
res.Save(res_builder);
return;
}
case DeltaType::SET_PROPERTY_EDGE: {
UpdateRes res(
GetUpdates(edge_updates_, delta.transaction_id).Emplace(delta));
res.Save(res_builder);
return;
}
default:
LOG(FATAL) << "Can't perform a remote update with delta type: "
<< static_cast<int>(req.member.type);
}
});
server.Register<UpdateApplyRpc>(
[this](const auto &req_reader, auto *res_builder) {
UpdateApplyReq req;
req.Load(req_reader);
UpdateApplyRes res(Apply(req.member));
res.Save(res_builder);
});
server.Register<CreateVertexRpc>([this](const auto &req_reader,
auto *res_builder) {
CreateVertexReq req;
req.Load(req_reader);
gid::Gid gid = GetUpdates(vertex_updates_, req.member.tx_id)
.CreateVertex(req.member.labels, req.member.properties);
CreateVertexRes res(CreateResult{UpdateResult::DONE, gid});
res.Save(res_builder);
});
server.Register<CreateEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
CreateEdgeReq req;
req.Load(req_reader);
auto data = req.member;
auto creation_result = CreateEdge(data);
// If `from` and `to` are both on this worker, we handle it in this
// RPC call. Do it only if CreateEdge succeeded.
if (creation_result.result == UpdateResult::DONE &&
data.to.worker_id() == db_.WorkerId()) {
auto to_delta = database::StateDelta::AddInEdge(
data.tx_id, data.to.gid(), {data.from, db_.WorkerId()},
{creation_result.gid, db_.WorkerId()}, data.edge_type);
creation_result.result =
GetUpdates(vertex_updates_, data.tx_id).Emplace(to_delta);
}
CreateEdgeRes res(creation_result);
res.Save(res_builder);
});
server.Register<AddInEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
AddInEdgeReq req;
req.Load(req_reader);
auto to_delta = database::StateDelta::AddInEdge(
req.member.tx_id, req.member.to, req.member.from,
req.member.edge_address, req.member.edge_type);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
AddInEdgeRes res(result);
res.Save(res_builder);
});
server.Register<RemoveVertexRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveVertexReq req;
req.Load(req_reader);
auto to_delta = database::StateDelta::RemoveVertex(
req.member.tx_id, req.member.gid, req.member.check_empty);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
RemoveVertexRes res(result);
res.Save(res_builder);
});
server.Register<RemoveEdgeRpc>(
[this](const auto &req_reader, auto *res_builder) {
RemoveEdgeReq req;
req.Load(req_reader);
RemoveEdgeRes res(RemoveEdge(req.member));
res.Save(res_builder);
});
server.Register<RemoveInEdgeRpc>([this](const auto &req_reader,
auto *res_builder) {
RemoveInEdgeReq req;
req.Load(req_reader);
auto data = req.member;
RemoveInEdgeRes res(GetUpdates(vertex_updates_, data.tx_id)
.Emplace(database::StateDelta::RemoveInEdge(
data.tx_id, data.vertex, data.edge_address)));
res.Save(res_builder);
});
}
UpdateResult UpdatesRpcServer::Apply(tx::TransactionId tx_id) {
auto apply = [tx_id](auto &collection) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found == access.end()) {
return UpdateResult::DONE;
}
auto result = found->second.Apply();
access.remove(tx_id);
return result;
};
auto vertex_result = apply(vertex_updates_);
auto edge_result = apply(edge_updates_);
if (vertex_result != UpdateResult::DONE) return vertex_result;
if (edge_result != UpdateResult::DONE) return edge_result;
return UpdateResult::DONE;
}
void UpdatesRpcServer::ClearTransactionalCache(
tx::TransactionId oldest_active) {
auto vertex_access = vertex_updates_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edge_updates_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
UpdatesRpcServer::TransactionUpdates<TAccessor> &UpdatesRpcServer::GetUpdates(
MapT<TAccessor> &updates, tx::TransactionId tx_id) {
return updates.access()
.emplace(tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_), tx_id))
.first->second;
}
CreateResult UpdatesRpcServer::CreateEdge(const CreateEdgeReqData &req) {
auto gid = GetUpdates(edge_updates_, req.tx_id)
.CreateEdge(req.from, req.to, req.edge_type);
auto from_delta = database::StateDelta::AddOutEdge(
req.tx_id, req.from, req.to, {gid, db_.WorkerId()}, req.edge_type);
auto result = GetUpdates(vertex_updates_, req.tx_id).Emplace(from_delta);
return {result, gid};
}
UpdateResult UpdatesRpcServer::RemoveEdge(const RemoveEdgeData &data) {
// Edge removal.
auto deletion_delta =
database::StateDelta::RemoveEdge(data.tx_id, data.edge_id);
auto result = GetUpdates(edge_updates_, data.tx_id).Emplace(deletion_delta);
// Out-edge removal, for sure is local.
if (result == UpdateResult::DONE) {
auto remove_out_delta = database::StateDelta::RemoveOutEdge(
data.tx_id, data.vertex_from_id, {data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_out_delta);
}
// In-edge removal, might not be local.
if (result == UpdateResult::DONE &&
data.vertex_to_address.worker_id() == db_.WorkerId()) {
auto remove_in_delta = database::StateDelta::RemoveInEdge(
data.tx_id, data.vertex_to_address.gid(),
{data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_in_delta);
}
return result;
}
template <>
VertexAccessor
UpdatesRpcServer::TransactionUpdates<VertexAccessor>::FindAccessor(
gid::Gid gid) {
return db_accessor_.FindVertex(gid, false);
}
template <>
EdgeAccessor UpdatesRpcServer::TransactionUpdates<EdgeAccessor>::FindAccessor(
gid::Gid gid) {
return db_accessor_.FindEdge(gid, false);
}
} // namespace distributed

View File

@ -1,104 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/edge_accessor.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "storage/vertex_accessor.hpp"
#include "transactions/type.hpp"
#include "utils/thread/sync.hpp"
namespace distributed {
/// An RPC server that accepts and holds deferred updates (deltas) until it's
/// told to apply or discard them. The updates are organized and applied per
/// transaction in this single updates server.
///
/// Attempts to get serialization and update-after-delete errors to happen as
/// soon as possible during query execution (fail fast).
class UpdatesRpcServer {
// Remote updates for one transaction.
template <typename TRecordAccessor>
class TransactionUpdates {
public:
TransactionUpdates(database::GraphDb &db, tx::TransactionId tx_id)
: db_accessor_(db, tx_id) {}
/// Adds a delta and returns the result. Does not modify the state (data) of
/// the graph element the update is for, but calls the `update` method to
/// fail-fast on serialization and update-after-delete errors.
UpdateResult Emplace(const database::StateDelta &delta);
/// Creates a new vertex and returns it's gid.
gid::Gid CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates a new edge and returns it's gid. Does not update vertices at the
/// end of the edge.
gid::Gid CreateEdge(gid::Gid from, storage::VertexAddress to,
storage::EdgeType edge_type);
/// Applies all the deltas on the record.
UpdateResult Apply();
auto &db_accessor() { return db_accessor_; }
private:
database::GraphDbAccessor db_accessor_;
std::unordered_map<
gid::Gid, std::pair<TRecordAccessor, std::vector<database::StateDelta>>>
deltas_;
// Multiple workers might be sending remote updates concurrently.
utils::SpinLock lock_;
// Helper method specialized for [Vertex|Edge]Accessor.
TRecordAccessor FindAccessor(gid::Gid gid);
};
public:
UpdatesRpcServer(database::GraphDb &db, communication::rpc::Server &server);
/// Applies all existsing updates for the given transaction ID. If there are
/// no updates for that transaction, nothing happens. Clears the updates cache
/// after applying them, regardless of the result.
UpdateResult Apply(tx::TransactionId tx_id);
/// Clears the cache of local transactions that are completed. The signature
/// of this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
template <typename TAccessor>
using MapT =
ConcurrentMap<tx::TransactionId, TransactionUpdates<TAccessor>>;
MapT<VertexAccessor> vertex_updates_;
MapT<EdgeAccessor> edge_updates_;
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
TransactionUpdates<TAccessor> &GetUpdates(MapT<TAccessor> &updates,
tx::TransactionId tx_id);
// Performs edge creation for the given request.
CreateResult CreateEdge(const CreateEdgeReqData &req);
// Performs edge removal for the given request.
UpdateResult RemoveEdge(const RemoveEdgeData &data);
};
} // namespace distributed

View File

@ -1,9 +0,0 @@
@0xb3d70bc0576218f3;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("durability::capnp");
struct RecoveryInfo {
snapshotTxId @0 :UInt64;
maxWalTxId @1 :UInt64;
}

View File

@ -5,7 +5,6 @@
#include "database/graph_db.hpp" #include "database/graph_db.hpp"
#include "durability/hashed_file_reader.hpp" #include "durability/hashed_file_reader.hpp"
#include "durability/recovery.capnp.h"
#include "storage/vertex_accessor.hpp" #include "storage/vertex_accessor.hpp"
#include "transactions/type.hpp" #include "transactions/type.hpp"
@ -25,25 +24,6 @@ struct RecoveryInfo {
max_wal_tx_id == other.max_wal_tx_id; max_wal_tx_id == other.max_wal_tx_id;
} }
bool operator!=(const RecoveryInfo &other) const { return !(*this == other); } bool operator!=(const RecoveryInfo &other) const { return !(*this == other); }
void Save(capnp::RecoveryInfo::Builder *builder) const {
builder->setSnapshotTxId(snapshot_tx_id);
builder->setMaxWalTxId(max_wal_tx_id);
}
void Load(const capnp::RecoveryInfo::Reader &reader) {
snapshot_tx_id = reader.getSnapshotTxId();
max_wal_tx_id = reader.getMaxWalTxId();
}
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &snapshot_tx_id;
ar &max_wal_tx_id;
}
}; };
/** Reads snapshot metadata from the end of the file without messing up the /** Reads snapshot metadata from the end of the file without messing up the

View File

@ -4,29 +4,5 @@ set(io_src_files
network/socket.cpp network/socket.cpp
network/utils.cpp) network/utils.cpp)
# Use this function to add each capnp file to generation. This way each file is
# standalone and we avoid recompiling everything.
# NOTE: io_src_files and io_capnp_files are globally updated.
# TODO: This is duplicated from src/CMakeLists.txt, find a good way to
# generalize this on per subdirectory basis.
function(add_capnp capnp_src_file)
set(cpp_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.c++)
set(h_file ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file}.h)
add_custom_command(OUTPUT ${cpp_file} ${h_file}
COMMAND ${CAPNP_EXE} compile -o${CAPNP_CXX_EXE} ${capnp_src_file} -I ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${capnp_src_file} capnproto-proj
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Update *global* io_capnp_files
set(io_capnp_files ${io_capnp_files} ${cpp_file} ${h_file} PARENT_SCOPE)
# Update *global* io_src_files
set(io_src_files ${io_src_files} ${cpp_file} PARENT_SCOPE)
endfunction(add_capnp)
add_capnp(network/endpoint.capnp)
add_custom_target(generate_io_capnp DEPENDS ${io_capnp_files})
add_library(mg-io STATIC ${io_src_files}) add_library(mg-io STATIC ${io_src_files})
target_link_libraries(mg-io stdc++fs Threads::Threads fmt glog mg-utils) target_link_libraries(mg-io stdc++fs Threads::Threads fmt glog mg-utils)
target_link_libraries(mg-io capnp kj)
add_dependencies(mg-io generate_io_capnp)

View File

@ -1,10 +0,0 @@
@0x93c2449a1e02365a;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("io::network::capnp");
struct Endpoint {
address @0 :Text;
port @1 :UInt16;
family @2 :UInt8;
}

View File

@ -24,18 +24,6 @@ Endpoint::Endpoint(const std::string &address, uint16_t port)
CHECK(family_ != 0) << "Not a valid IPv4 or IPv6 address: " << address; CHECK(family_ != 0) << "Not a valid IPv4 or IPv6 address: " << address;
} }
void Endpoint::Save(capnp::Endpoint::Builder *builder) const {
builder->setAddress(address_);
builder->setPort(port_);
builder->setFamily(family_);
}
void Endpoint::Load(const capnp::Endpoint::Reader &reader) {
address_ = reader.getAddress();
port_ = reader.getPort();
family_ = reader.getFamily();
}
bool Endpoint::operator==(const Endpoint &other) const { bool Endpoint::operator==(const Endpoint &other) const {
return address_ == other.address_ && port_ == other.port_ && return address_ == other.address_ && port_ == other.port_ &&
family_ == other.family_; family_ == other.family_;

View File

@ -5,7 +5,6 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "io/network/endpoint.capnp.h"
#include "utils/exceptions.hpp" #include "utils/exceptions.hpp"
namespace io::network { namespace io::network {
@ -27,9 +26,6 @@ class Endpoint {
bool operator==(const Endpoint &other) const; bool operator==(const Endpoint &other) const;
friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint); friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint);
void Save(capnp::Endpoint::Builder *builder) const;
void Load(const capnp::Endpoint::Reader &reader);
private: private:
std::string address_; std::string address_;
uint16_t port_{0}; uint16_t port_{0};

View File

@ -1514,7 +1514,6 @@ code generation."
(when schema (write-line schema out)))))) (when schema (write-line schema out))))))
;; Now generate the save/load C++ code in the cpp file. ;; Now generate the save/load C++ code in the cpp file.
(write-line "// Autogenerated Cap'n Proto serialization code" cpp-out) (write-line "// Autogenerated Cap'n Proto serialization code" cpp-out)
(write-line "#include \"utils/serialization.hpp\"" cpp-out)
(let (open-namespaces) (let (open-namespaces)
(dolist (cpp-class (remove-if (lambda (cpp-type) (not (typep cpp-type 'cpp-class))) cpp-types)) (dolist (cpp-class (remove-if (lambda (cpp-type) (not (typep cpp-type 'cpp-class))) cpp-types))
;; Check if we need to open or close namespaces ;; Check if we need to open or close namespaces

View File

@ -12,9 +12,9 @@
#include <glog/logging.h> #include <glog/logging.h>
#include "communication/bolt/v1/session.hpp" #include "communication/bolt/v1/session.hpp"
#include "communication/server.hpp"
#include "config.hpp" #include "config.hpp"
#include "database/graph_db.hpp" #include "database/graph_db.hpp"
#include "stats/stats.hpp"
#include "telemetry/telemetry.hpp" #include "telemetry/telemetry.hpp"
#include "utils/flag_validation.hpp" #include "utils/flag_validation.hpp"
#include "utils/signals.hpp" #include "utils/signals.hpp"
@ -103,8 +103,7 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
/// Run the Memgraph server. /// Run the Memgraph server.
/// ///
/// Sets up all the required state before running `memgraph_main` and does any /// Sets up all the required state before running `memgraph_main` and does any
/// required cleanup afterwards. `get_stats_prefix` is used to obtain the /// required cleanup afterwards.
/// prefix when logging Memgraph's statistics.
/// ///
/// Command line arguments and configuration files are read before calling any /// Command line arguments and configuration files are read before calling any
/// of the supplied functions. Therefore, you should use flags only from those /// of the supplied functions. Therefore, you should use flags only from those
@ -116,8 +115,7 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
/// ///
/// @code /// @code
/// int main(int argc, char *argv[]) { /// int main(int argc, char *argv[]) {
/// auto get_stats_prefix = []() -> std::string { return "memgraph"; }; /// return WithInit(argc, argv, SingleNodeMain);
/// return WithInit(argc, argv, get_stats_prefix, SingleNodeMain);
/// } /// }
/// @endcode /// @endcode
/// ///
@ -126,8 +124,8 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
/// `InitSignalHandlers` with appropriate function to shutdown the server you /// `InitSignalHandlers` with appropriate function to shutdown the server you
/// started. /// started.
int WithInit(int argc, char **argv, int WithInit(int argc, char **argv,
const std::function<std::string()> &get_stats_prefix,
const std::function<void()> &memgraph_main) { const std::function<void()> &memgraph_main) {
google::SetUsageMessage("Memgraph database server");
gflags::SetVersionString(version_string); gflags::SetVersionString(version_string);
// Load config before parsing arguments, so that flags from the command line // Load config before parsing arguments, so that flags from the command line
@ -142,9 +140,6 @@ int WithInit(int argc, char **argv,
// Unhandled exception handler init. // Unhandled exception handler init.
std::set_terminate(&utils::TerminateHandler); std::set_terminate(&utils::TerminateHandler);
stats::InitStatsLogging(get_stats_prefix());
utils::OnScopeExit stop_stats([] { stats::StopStatsLogging(); });
// Initialize the communication library. // Initialize the communication library.
communication::Init(); communication::Init();
@ -163,7 +158,6 @@ int WithInit(int argc, char **argv,
} }
void SingleNodeMain() { void SingleNodeMain() {
google::SetUsageMessage("Memgraph single-node database server");
database::SingleNode db; database::SingleNode db;
SessionData session_data{db}; SessionData session_data{db};
@ -206,79 +200,6 @@ void SingleNodeMain() {
// End common stuff for enterprise and community editions // End common stuff for enterprise and community editions
#ifdef MG_COMMUNITY
int main(int argc, char **argv) { int main(int argc, char **argv) {
return WithInit(argc, argv, []() { return "memgraph"; }, SingleNodeMain); return WithInit(argc, argv, SingleNodeMain);
} }
#else // enterprise edition
// Distributed flags.
DEFINE_HIDDEN_bool(
master, false,
"If this Memgraph server is the master in a distributed deployment.");
DEFINE_HIDDEN_bool(
worker, false,
"If this Memgraph server is a worker in a distributed deployment.");
DECLARE_int32(worker_id);
void MasterMain() {
google::SetUsageMessage("Memgraph distributed master");
database::Master db;
SessionData session_data{db};
ServerContext context;
std::string service_name = "Bolt";
if (FLAGS_key_file != "" && FLAGS_cert_file != "") {
context = ServerContext(FLAGS_key_file, FLAGS_cert_file);
service_name = "BoltS";
}
ServerT server({FLAGS_interface, static_cast<uint16_t>(FLAGS_port)},
session_data, &context, FLAGS_session_inactivity_timeout,
service_name, FLAGS_num_workers);
// Handler for regular termination signals
auto shutdown = [&server] {
// Server needs to be shutdown first and then the database. This prevents a
// race condition when a transaction is accepted during server shutdown.
server.Shutdown();
};
InitSignalHandlers(shutdown);
server.AwaitShutdown();
}
void WorkerMain() {
google::SetUsageMessage("Memgraph distributed worker");
database::Worker db;
db.WaitForShutdown();
}
int main(int argc, char **argv) {
auto get_stats_prefix = [&]() -> std::string {
if (FLAGS_master) {
return "master";
} else if (FLAGS_worker) {
return fmt::format("worker-{}", FLAGS_worker_id);
}
return "memgraph";
};
auto memgraph_main = [&]() {
CHECK(!(FLAGS_master && FLAGS_worker))
<< "Can't run Memgraph as worker and master at the same time";
if (FLAGS_master)
MasterMain();
else if (FLAGS_worker)
WorkerMain();
else
SingleNodeMain();
};
return WithInit(argc, argv, get_stats_prefix, memgraph_main);
}
#endif // enterprise edition

Some files were not shown because too many files have changed in this diff Show More