Compare commits

...

1 Commits

Author SHA1 Message Date
Teon Banek
53bca9fa3a Prepare release v0.10.0
Reviewers: buda, teon.banek

Subscribers: mtomic, pullbot

Differential Revision: https://phabricator.memgraph.io/D1375
2018-10-18 10:08:14 +02:00
171 changed files with 158 additions and 16592 deletions

View File

@ -137,9 +137,6 @@ if (USE_READLINE)
endif()
endif()
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost 1.62 REQUIRED COMPONENTS iostreams serialization)
set(libs_dir ${CMAKE_SOURCE_DIR}/libs)
add_subdirectory(libs EXCLUDE_FROM_ALL)
@ -186,7 +183,6 @@ option(EXPERIMENTAL "Build experimental binaries" OFF)
option(CUSTOMERS "Build customer binaries" OFF)
option(TEST_COVERAGE "Generate coverage reports from running memgraph" OFF)
option(TOOLS "Build tools binaries" ON)
option(MG_COMMUNITY "Build Memgraph Community Edition" OFF)
if (TEST_COVERAGE)
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
@ -197,10 +193,6 @@ if (TEST_COVERAGE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()
if (MG_COMMUNITY)
add_definitions(-DMG_COMMUNITY)
endif()
# Add subprojects
include_directories(src)
add_subdirectory(src)

View File

@ -3,7 +3,6 @@
- build_debug/memgraph
- build_release/memgraph
- build_release/tools/src/mg_import_csv
- build_release/tools/src/mg_statsd
- config
filename: binaries.tar.gz

View File

@ -33,13 +33,8 @@
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1000 make -j$THREADS memgraph tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot
# Generate distributed card fraud dataset.
cd ../tests/distributed/card_fraud
./generate_dataset.sh
cd ../../..
# Checkout to parent commit and initialize.
cd ../parent
cd ../../parent
git checkout HEAD~1
TIMEOUT=600 ./init
@ -88,7 +83,3 @@
cd ../../docs/user_technical
# TODO (mferencevic): uncomment this once couscous is replaced with pandoc
#./bundle_community
# Generate distributed card fraud dataset.
cd ../../tests/distributed/card_fraud
./generate_dataset.sh

View File

@ -94,7 +94,7 @@ will load the new dataset.
Use the following command:
```
mg_import_csv --nodes=comment_nodes.csv --nodes=forum_nodes.csv --relationships=relationships.csv
mg_import_csv --overwrite --nodes=comment_nodes.csv --nodes=forum_nodes.csv --relationships=relationships.csv
```
If using Docker, things are a bit more complicated. First you need to move the
@ -110,6 +110,7 @@ Then, run the importer with the following:
```
docker run -v mg_lib:/var/lib/memgraph -v mg_etc:/etc/memgraph -v mg_import:/import-data \
--entrypoint=mg_import_csv memgraph \
--overwrite \
--nodes=/import-data/comment_nodes.csv --nodes=/import-data/forum_nodes.csv \
--relationships=/import-data/relationships.csv
```

View File

@ -4,33 +4,12 @@
set(memgraph_src_files
communication/buffer.cpp
communication/bolt/v1/decoder/decoded_value.cpp
communication/rpc/client.cpp
communication/rpc/protocol.cpp
communication/rpc/server.cpp
data_structures/concurrent/skiplist_gc.cpp
database/config.cpp
database/counters.cpp
database/graph_db.cpp
database/graph_db_accessor.cpp
database/state_delta.cpp
distributed/cluster_discovery_master.cpp
distributed/cluster_discovery_worker.cpp
distributed/coordination.cpp
distributed/coordination_master.cpp
distributed/coordination_worker.cpp
distributed/durability_rpc_clients.cpp
distributed/durability_rpc_server.cpp
distributed/index_rpc_server.cpp
distributed/plan_consumer.cpp
distributed/plan_dispatcher.cpp
distributed/cache.cpp
distributed/data_manager.cpp
distributed/data_rpc_clients.cpp
distributed/data_rpc_server.cpp
distributed/produce_rpc_server.cpp
distributed/pull_rpc_clients.cpp
distributed/updates_rpc_clients.cpp
distributed/updates_rpc_server.cpp
durability/paths.cpp
durability/recovery.cpp
durability/snapshooter.cpp
@ -46,16 +25,11 @@ set(memgraph_src_files
query/frontend/stripped.cpp
query/interpret/awesome_memgraph_functions.cpp
query/interpreter.cpp
query/plan/distributed.cpp
query/plan/operator.cpp
query/plan/preprocess.cpp
query/plan/rule_based_planner.cpp
query/plan/variable_start_planner.cpp
query/typed_value.cpp
stats/metrics.cpp
stats/stats.cpp
storage/concurrent_id_mapper_master.cpp
storage/concurrent_id_mapper_worker.cpp
storage/edge_accessor.cpp
storage/locking/record_lock.cpp
storage/property_value.cpp
@ -63,9 +37,7 @@ set(memgraph_src_files
storage/vertex_accessor.cpp
threading/sync/rwlock.cpp
threading/thread.cpp
transactions/engine_master.cpp
transactions/engine_single_node.cpp
transactions/engine_worker.cpp
utils/demangle.cpp
utils/file.cpp
utils/network.cpp
@ -78,9 +50,7 @@ string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
# memgraph_lib depend on these libraries
set(MEMGRAPH_ALL_LIBS stdc++fs Threads::Threads fmt cppitertools
antlr_opencypher_parser_lib dl glog gflags
${Boost_IOSTREAMS_LIBRARY_RELEASE}
${Boost_SERIALIZATION_LIBRARY_RELEASE})
antlr_opencypher_parser_lib dl glog gflags)
if (USE_LTALLOC)
list(APPEND MEMGRAPH_ALL_LIBS ltalloc)

View File

@ -7,6 +7,8 @@
#include "communication/bolt/v1/encoder/chunked_encoder_buffer.hpp"
#include "communication/bolt/v1/encoder/client_encoder.hpp"
#include "communication/client.hpp"
#include "query/typed_value.hpp"
#include "utils/exceptions.hpp"

View File

@ -1,48 +0,0 @@
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft {
enum class RpcType { REQUEST_VOTE, APPEND_ENTRIES };
template <class State>
struct PeerRpcRequest : public rpc::Message {
RpcType type;
RequestVoteRequest request_vote;
AppendEntriesRequest<State> append_entries;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<rpc::Message>(*this);
ar &type;
ar &request_vote;
ar &append_entries;
}
};
struct PeerRpcReply : public rpc::Message {
RpcType type;
RequestVoteReply request_vote;
AppendEntriesReply append_entries;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<rpc::Message>(*this);
ar &type;
ar &request_vote;
ar &append_entries;
}
};
} // namespace communication::raft

View File

@ -1,699 +0,0 @@
#pragma once
#include <algorithm>
#include "fmt/format.h"
#include "glog/logging.h"
namespace communication::raft {
namespace impl {
template <class State>
RaftMemberImpl<State>::RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id,
const RaftConfig &config)
: network_(network), storage_(storage), id_(id), config_(config) {
std::lock_guard<std::mutex> lock(mutex_);
tie(term_, voted_for_) = storage_.GetTermAndVotedFor();
for (const auto &peer_id : config_.members) {
peer_states_[peer_id] = std::make_unique<RaftPeerState>();
}
SetElectionTimer();
}
template <class State>
RaftMemberImpl<State>::~RaftMemberImpl() {
Stop();
}
template <class State>
void RaftMemberImpl<State>::Stop() {
{
std::lock_guard<std::mutex> lock(mutex_);
if (!exiting_) {
LogInfo("Stopping...");
exiting_ = true;
}
}
state_changed_.notify_all();
}
template <class State>
template <class... Args>
void RaftMemberImpl<State>::LogInfo(const std::string &format,
Args &&... args) {
LOG(INFO) << fmt::format("[id = {}, term = {}] {}", id_, term_,
fmt::format(format, std::forward<Args>(args)...))
<< std::endl;
}
template <class State>
void RaftMemberImpl<State>::TimerThreadMain() {
std::unique_lock<std::mutex> lock(mutex_);
while (!exiting_) {
if (Clock::now() >= next_election_time_) {
StartNewElection();
}
state_changed_.wait_until(lock, next_election_time_);
}
}
template <class State>
void RaftMemberImpl<State>::PeerThreadMain(std::string peer_id) {
RaftPeerState &peer_state = *peer_states_[peer_id];
LogInfo("Peer thread started for {}", peer_id);
std::unique_lock<std::mutex> lock(mutex_);
/* This loop will either call a function that issues an RPC or wait on the
* condition variable. It must not do both! Lock on `mutex_` is released while
* waiting for RPC response, which might cause us to miss a notification on
* `state_changed_` conditional variable and wait indefinitely. The safest
* thing to do is to assume some important part of state was modified while we
* were waiting for the response and loop around to check. */
while (!exiting_) {
TimePoint now = Clock::now();
TimePoint wait_until;
if (mode_ != RaftMode::FOLLOWER && peer_state.backoff_until > now) {
wait_until = peer_state.backoff_until;
} else {
switch (mode_) {
case RaftMode::FOLLOWER:
wait_until = TimePoint::max();
break;
case RaftMode::CANDIDATE:
if (!peer_state.request_vote_done) {
RequestVote(peer_id, peer_state, lock);
continue;
}
break;
case RaftMode::LEADER:
if (peer_state.next_index <= storage_.GetLastLogIndex() ||
now >= peer_state.next_heartbeat_time) {
AppendEntries(peer_id, peer_state, lock);
continue;
} else {
wait_until = peer_state.next_heartbeat_time;
}
break;
}
}
state_changed_.wait_until(lock, wait_until);
}
LogInfo("Peer thread exiting for {}", peer_id);
}
template <class State>
void RaftMemberImpl<State>::CandidateOrLeaderTransitionToFollower() {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderTransitionToFollower` called from follower mode";
mode_ = RaftMode::FOLLOWER;
leader_ = {};
SetElectionTimer();
}
template <class State>
void RaftMemberImpl<State>::CandidateTransitionToLeader() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CandidateTransitionToLeader` called while not in candidate mode";
mode_ = RaftMode::LEADER;
leader_ = id_;
/* We don't want to trigger elections while in leader mode. */
next_election_time_ = TimePoint::max();
/* [Raft thesis, Section 6.4]
* "The Leader Completeness Property guarantees that a leader has all
* committed entries, but at the start of its term, it may not know which
* those are. To find out, it needs to commit an entry from its term. Raft
* handles this by having each leader commit a blank no-op entry into the log
* at the start of its term. As soon as this no-op entry is committed, the
* leaders commit index will be at least as large as any other servers
* during its term." */
LogEntry<State> entry;
entry.term = term_;
entry.command = std::experimental::nullopt;
storage_.AppendLogEntry(entry);
}
template <class State>
bool RaftMemberImpl<State>::CandidateOrLeaderNoteTerm(const TermId new_term) {
DCHECK(mode_ != RaftMode::FOLLOWER)
<< "`CandidateOrLeaderNoteTerm` called from follower mode";
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (term_ < new_term) {
UpdateTermAndVotedFor(new_term, {});
CandidateOrLeaderTransitionToFollower();
return true;
}
return false;
}
template <class State>
void RaftMemberImpl<State>::UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for) {
term_ = new_term;
voted_for_ = new_voted_for;
leader_ = {};
storage_.WriteTermAndVotedFor(term_, voted_for_);
}
template <class State>
void RaftMemberImpl<State>::SetElectionTimer() {
/* [Raft thesis, section 3.4]
* "Raft uses randomized election timeouts to ensure that split votes are rare
* and that they are resolved quickly. To prevent split votes in the first
* place, election timeouts are chosen randomly from a fixed interval (e.g.,
* 150-300 ms)." */
std::uniform_int_distribution<uint64_t> distribution(
config_.leader_timeout_min.count(), config_.leader_timeout_max.count());
Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_));
next_election_time_ = Clock::now() + wait_interval;
}
template <class State>
void RaftMemberImpl<State>::StartNewElection() {
LogInfo("Starting new election");
/* [Raft thesis, section 3.4]
* "To begin an election, a follower increments its current term and
* transitions to candidate state. It then votes for itself and issues
* RequestVote RPCs in parallel to each of the other servers in the cluster."
*/
UpdateTermAndVotedFor(term_ + 1, id_);
mode_ = RaftMode::CANDIDATE;
/* [Raft thesis, section 3.4]
* "Each candidate restarts its randomized election timeout at the start of an
* election, and it waits for that timeout to elapse before starting the next
* election; this reduces the likelihood of another split vote in the new
* election." */
SetElectionTimer();
for (const auto &peer_id : config_.members) {
if (peer_id == id_) {
continue;
}
auto &peer_state = peer_states_[peer_id];
peer_state->request_vote_done = false;
peer_state->voted_for_me = false;
peer_state->match_index = 0;
peer_state->next_index = storage_.GetLastLogIndex() + 1;
/* [Raft thesis, section 3.5]
* "Until the leader has discovered where it and the follower's logs match,
* the leader can send AppendEntries with no entries (like heartbeats) to
* save bandwidth. Then, once the matchIndex immediately precedes the
* nextIndex, the leader should begin to send the actual entries." */
peer_state->suppress_log_entries = true;
/* [Raft thesis, section 3.4]
* "Once a candidate wins an election, it becomes leader. It then sends
* heartbeat messages to all of the other servers to establish its authority
* and prevent new elections."
*
* This will make newly elected leader send heartbeats immediately.
*/
peer_state->next_heartbeat_time = TimePoint::min();
peer_state->backoff_until = TimePoint::min();
}
// We already have the majority if we're in a single node cluster.
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
/* Notify peer threads to start issuing RequestVote RPCs. */
state_changed_.notify_all();
}
template <class State>
bool RaftMemberImpl<State>::CountVotes() {
DCHECK(mode_ == RaftMode::CANDIDATE)
<< "`CountVotes` should only be called from candidate mode";
int num_votes = 0;
for (const auto &peer_id : config_.members) {
if (peer_id == id_ || peer_states_[peer_id]->voted_for_me) {
num_votes++;
}
}
return 2 * num_votes > config_.members.size();
}
template <class State>
void RaftMemberImpl<State>::RequestVote(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Requesting vote from {}", peer_id);
RequestVoteRequest request;
request.candidate_term = term_;
request.candidate_id = id_;
request.last_log_index = storage_.GetLastLogIndex();
request.last_log_term = storage_.GetLogTerm(request.last_log_index);
RequestVoteReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendRequestVote(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.candidate_term || mode_ != RaftMode::CANDIDATE ||
exiting_) {
LogInfo("Ignoring RequestVote RPC reply from {}", peer_id);
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(reply.term == term_) << "Stale RequestVote RPC reply";
peer_state.request_vote_done = true;
if (reply.vote_granted) {
peer_state.voted_for_me = true;
LogInfo("Got vote from {}", peer_id);
if (CountVotes()) {
LogInfo("Elected as leader.");
CandidateTransitionToLeader();
}
} else {
LogInfo("Vote denied from {}", peer_id);
}
state_changed_.notify_all();
}
template <class State>
void RaftMemberImpl<State>::AdvanceCommitIndex() {
DCHECK(mode_ == RaftMode::LEADER)
<< "`AdvanceCommitIndex` can only be called from leader mode";
std::vector<LogIndex> match_indices;
for (const auto &peer : peer_states_) {
match_indices.push_back(peer.second->match_index);
}
match_indices.push_back(storage_.GetLastLogIndex());
std::sort(match_indices.begin(), match_indices.end(),
std::greater<LogIndex>());
LogIndex new_commit_index_ = match_indices[(config_.members.size() - 1) / 2];
LogInfo("Trying to advance commit index {} to {}", commit_index_,
new_commit_index_);
/* This can happen because we reset `match_index` to 0 for every peer when
* elected. */
if (commit_index_ >= new_commit_index_) {
return;
}
/* [Raft thesis, section 3.6.2]
* (...) Raft never commits log entries from previous terms by counting
* replicas. Only log entries from the leader's current term are committed by
* counting replicas; once an entry from the current term has been committed
* in this way, then all prior entries are committed indirectly because of the
* Log Matching Property." */
if (storage_.GetLogTerm(new_commit_index_) != term_) {
LogInfo("Cannot commit log entry from previous term");
return;
}
commit_index_ = std::max(commit_index_, new_commit_index_);
}
template <class State>
void RaftMemberImpl<State>::AppendEntries(const std::string &peer_id,
RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock) {
LogInfo("Appending entries to {}", peer_id);
AppendEntriesRequest<State> request;
request.leader_term = term_;
request.leader_id = id_;
request.prev_log_index = peer_state.next_index - 1;
request.prev_log_term = storage_.GetLogTerm(peer_state.next_index - 1);
if (!peer_state.suppress_log_entries &&
peer_state.next_index <= storage_.GetLastLogIndex()) {
request.entries = storage_.GetLogSuffix(peer_state.next_index);
} else {
request.entries = {};
}
request.leader_commit = commit_index_;
AppendEntriesReply reply;
/* Release lock before issuing RPC and waiting for response. */
/* TODO(mtomic): Revise how this will work with RPC cancellation. */
lock.unlock();
bool ok = network_.SendAppendEntries(peer_id, request, reply);
lock.lock();
/* TODO(mtomic): Maybe implement exponential backoff. */
if (!ok) {
/* There is probably something wrong with this peer, let's avoid sending log
* entries. */
peer_state.suppress_log_entries = true;
peer_state.backoff_until = Clock::now() + config_.rpc_backoff;
return;
}
if (term_ != request.leader_term || exiting_) {
return;
}
if (CandidateOrLeaderNoteTerm(reply.term)) {
state_changed_.notify_all();
return;
}
DCHECK(mode_ == RaftMode::LEADER)
<< "Elected leader for term should never change";
DCHECK(reply.term == term_) << "Got stale AppendEntries reply";
if (reply.success) {
/* We've found a match, we can start sending log entries. */
peer_state.suppress_log_entries = false;
LogIndex new_match_index = request.prev_log_index + request.entries.size();
DCHECK(peer_state.match_index <= new_match_index)
<< "`match_index` should increase monotonically within a term";
peer_state.match_index = new_match_index;
AdvanceCommitIndex();
peer_state.next_index = peer_state.match_index + 1;
peer_state.next_heartbeat_time = Clock::now() + config_.heartbeat_interval;
} else {
DCHECK(peer_state.next_index > 1)
<< "Log replication should not fail for first log entry.";
--peer_state.next_index;
}
state_changed_.notify_all();
}
template <class State>
RequestVoteReply RaftMemberImpl<State>::OnRequestVote(
const RequestVoteRequest &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("RequestVote RPC request from {}", request.candidate_id);
RequestVoteReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.candidate_term < term_) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.candidate_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.candidate_term, {});
}
/* [Raft thesis, Section 3.6.1]
* "Raft uses the voting process to prevent a candidate from winning an
* election unless its log contains all committed entries. (...) The
* RequestVote RPC implements this restriction: the RPC includes information
* about the candidate's log, and the voter denies its vote if its own log is
* more up-to-date than that of the candidate. Raft determines which of two
* logs is more up-to-date by comparing the index and term of the last entries
* in the logs. If the logs have last entries with different terms, then the
* log with the later term is more up-to-date. If the logs end with the same
* term, then whichever log is longer is more up-to-date." */
LogIndex my_last_log_index = storage_.GetLastLogIndex();
TermId my_last_log_term = storage_.GetLogTerm(my_last_log_index);
if (my_last_log_term > request.last_log_term ||
(my_last_log_term == request.last_log_term &&
my_last_log_index > request.last_log_index)) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* [Raft thesis, Section 3.4]
* "Each server will vote for at most one candidate in a given term, on a
* firstcome-first-served basis."
*/
/* We voted for someone else in this term. */
if (request.candidate_term == term_ && voted_for_ &&
*voted_for_ != request.candidate_id) {
reply.term = term_;
reply.vote_granted = false;
return reply;
}
/* Now we know we will vote for this candidate, because it's term is at least
* as big as ours and we haven't voted for anyone else. */
UpdateTermAndVotedFor(request.candidate_term, request.candidate_id);
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = request.candidate_term;
reply.vote_granted = true;
return reply;
}
template <class State>
AppendEntriesReply RaftMemberImpl<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
std::lock_guard<std::mutex> lock(mutex_);
LogInfo("AppendEntries RPC request from {}", request.leader_id);
AppendEntriesReply reply;
/* [Raft thesis, Section 3.3]
* "If a server receives a request with a stale term number, it rejects the
* request." */
if (request.leader_term < term_) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.3]
* "Current terms are exchanged whenever servers communicate; if one server's
* current term is smaller than the other's, then it updates its current term
* to the larger value. If a candidate or leader discovers that its term is
* out of date, it immediately reverts to follower state." */
if (request.leader_term > term_) {
if (mode_ != RaftMode::FOLLOWER) {
CandidateOrLeaderTransitionToFollower();
}
UpdateTermAndVotedFor(request.leader_term, {});
}
/* [Raft thesis, Section 3.4]
* "While waiting for votes, a candidate may receive an AppendEntries RPC from
* another server claiming to be leader. If the leader's term (included in its
* RPC) is at least as large as the candidate's current term, then the
* candidate recognizes the leader as legitimate and returns to follower
* state." */
if (mode_ == RaftMode::CANDIDATE && request.leader_term == term_) {
CandidateOrLeaderTransitionToFollower();
}
DCHECK(mode_ != RaftMode::LEADER)
<< "Leader cannot accept `AppendEntries` RPC";
DCHECK(term_ == request.leader_term) << "Term should be equal to request "
"term when accepting `AppendEntries` "
"RPC";
leader_ = request.leader_id;
/* [Raft thesis, Section 3.4]
* A server remains in follower state as long as it receives valid RPCs from a
* leader or candidate. */
SetElectionTimer();
state_changed_.notify_all();
/* [Raft thesis, Section 3.5]
* "When sending an AppendEntries RPC, the leader includes the index and term
* of the entry in its log that immediately precedes the new entries. If the
* follower does not find an entry in its log with the same index and term,
* then it refuses the new entries." */
if (request.prev_log_index > storage_.GetLastLogIndex() ||
storage_.GetLogTerm(request.prev_log_index) != request.prev_log_term) {
reply.term = term_;
reply.success = false;
return reply;
}
/* [Raft thesis, Section 3.5]
* "To bring a follower's log into consistency with its own, the leader must
* find the latest log entry where the two logs agree, delete any entries in
* the follower's log after that point, and send the follower all of the
* leader's entries after that point." */
/* Entry at `request.prev_log_index` is the last entry where ours and leader's
* logs agree. It's time to replace the tail of the log with new entries from
* the leader. We have to be careful here as duplicated AppendEntries RPCs
* could cause data loss.
*
* There is a possibility that an old AppendEntries RPC is duplicated and
* received after processing newer one. For example, leader appends entry 3
* and then entry 4, but follower recieves entry 3, then entry 4, and then
* entry 3 again. We have to be careful not to delete entry 4 from log when
* processing the last RPC. */
LogIndex index = request.prev_log_index;
auto it = request.entries.begin();
for (; it != request.entries.end(); ++it) {
++index;
if (index > storage_.GetLastLogIndex()) {
break;
}
if (storage_.GetLogTerm(index) != it->term) {
LogInfo("Truncating log suffix from index {}", index);
DCHECK(commit_index_ < index)
<< "Committed entries should never be truncated from the log";
storage_.TruncateLogSuffix(index);
break;
}
}
LogInfo("Appending {} out of {} logs from {}.", request.entries.end() - it,
request.entries.size(), request.leader_id);
for (; it != request.entries.end(); ++it) {
storage_.AppendLogEntry(*it);
}
commit_index_ = std::max(commit_index_, request.leader_commit);
/* Let's bump election timer once again, we don't want to take down the leader
* because of our long disk writes. */
SetElectionTimer();
state_changed_.notify_all();
reply.term = term_;
reply.success = true;
return reply;
}
template <class State>
ClientResult RaftMemberImpl<State>::AddCommand(
const typename State::Change &command, bool blocking) {
std::unique_lock<std::mutex> lock(mutex_);
if (mode_ != RaftMode::LEADER) {
return ClientResult::NOT_LEADER;
}
LogEntry<State> entry;
entry.term = term_;
entry.command = command;
storage_.AppendLogEntry(entry);
// Entry is already replicated if this is a single node cluster.
AdvanceCommitIndex();
state_changed_.notify_all();
if (!blocking) {
return ClientResult::OK;
}
LogIndex index = storage_.GetLastLogIndex();
while (!exiting_ && term_ == entry.term) {
if (commit_index_ >= index) {
return ClientResult::OK;
}
state_changed_.wait(lock);
}
return ClientResult::NOT_LEADER;
}
} // namespace impl
template <class State>
RaftMember<State>::RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config)
: network_(network), impl_(network, storage, id, config) {
timer_thread_ =
std::thread(&impl::RaftMemberImpl<State>::TimerThreadMain, &impl_);
for (const auto &peer_id : config.members) {
if (peer_id != id) {
peer_threads_.emplace_back(&impl::RaftMemberImpl<State>::PeerThreadMain,
&impl_, peer_id);
}
}
network_.Start(*this);
}
template <class State>
RaftMember<State>::~RaftMember() {
impl_.Stop();
timer_thread_.join();
for (auto &peer_thread : peer_threads_) {
peer_thread.join();
}
}
template <class State>
ClientResult RaftMember<State>::AddCommand(
const typename State::Change &command, bool blocking) {
return impl_.AddCommand(command, blocking);
}
template <class State>
RequestVoteReply RaftMember<State>::OnRequestVote(
const RequestVoteRequest &request) {
return impl_.OnRequestVote(request);
}
template <class State>
AppendEntriesReply RaftMember<State>::OnAppendEntries(
const AppendEntriesRequest<State> &request) {
return impl_.OnAppendEntries(request);
}
} // namespace communication::raft

View File

@ -1,277 +0,0 @@
#pragma once
#include <chrono>
#include <condition_variable>
#include <experimental/optional>
#include <map>
#include <mutex>
#include <random>
#include <set>
#include <thread>
#include <vector>
#include "boost/serialization/vector.hpp"
#include "glog/logging.h"
#include "utils/serialization.hpp"
namespace communication::raft {
template <class State>
class RaftMember;
enum class ClientResult { NOT_LEADER, OK };
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
using MemberId = std::string;
using TermId = uint64_t;
using ClientId = uint64_t;
using CommandId = uint64_t;
using LogIndex = uint64_t;
template <class State>
struct LogEntry {
int term;
std::experimental::optional<typename State::Change> command;
bool operator==(const LogEntry &rhs) const {
return term == rhs.term && command == rhs.command;
}
bool operator!=(const LogEntry &rhs) const { return !(*this == rhs); }
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &command;
}
};
/* Raft RPC requests and replies as described in [Raft thesis, Figure 3.1]. */
struct RequestVoteRequest {
TermId candidate_term;
MemberId candidate_id;
LogIndex last_log_index;
TermId last_log_term;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &candidate_term;
ar &candidate_id;
ar &last_log_index;
ar &last_log_term;
}
};
struct RequestVoteReply {
TermId term;
bool vote_granted;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &vote_granted;
}
};
template <class State>
struct AppendEntriesRequest {
TermId leader_term;
MemberId leader_id;
LogIndex prev_log_index;
TermId prev_log_term;
std::vector<LogEntry<State>> entries;
LogIndex leader_commit;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &leader_term;
ar &leader_id;
ar &prev_log_index;
ar &prev_log_term;
ar &entries;
ar &leader_commit;
}
};
struct AppendEntriesReply {
TermId term;
bool success;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term;
ar &success;
}
};
template <class State>
class RaftNetworkInterface {
public:
virtual ~RaftNetworkInterface() = default;
/* These function return false if RPC failed for some reason (e.g. cannot
* establish connection or request cancelled). Otherwise
* `reply` contains response from peer. */
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) = 0;
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) = 0;
/* This will be called once the RaftMember is ready to start receiving RPCs.
*/
virtual void Start(RaftMember<State> &member) = 0;
};
template <class State>
class RaftStorageInterface {
public:
virtual ~RaftStorageInterface() = default;
virtual void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) = 0;
virtual std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() = 0;
virtual void AppendLogEntry(const LogEntry<State> &entry) = 0;
virtual TermId GetLogTerm(const LogIndex index) = 0;
virtual LogEntry<State> GetLogEntry(const LogIndex index) = 0;
virtual std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) = 0;
virtual LogIndex GetLastLogIndex() = 0;
virtual void TruncateLogSuffix(const LogIndex index) = 0;
};
struct RaftConfig {
std::vector<MemberId> members;
std::chrono::milliseconds leader_timeout_min;
std::chrono::milliseconds leader_timeout_max;
std::chrono::milliseconds heartbeat_interval;
std::chrono::milliseconds rpc_backoff;
};
namespace impl {
enum class RaftMode { FOLLOWER, CANDIDATE, LEADER };
struct RaftPeerState {
bool request_vote_done;
bool voted_for_me;
LogIndex match_index;
LogIndex next_index;
bool suppress_log_entries;
Clock::time_point next_heartbeat_time;
Clock::time_point backoff_until;
};
template <class State>
class RaftMemberImpl {
public:
explicit RaftMemberImpl(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage,
const MemberId &id, const RaftConfig &config);
~RaftMemberImpl();
void Stop();
void TimerThreadMain();
void PeerThreadMain(std::string peer_id);
void UpdateTermAndVotedFor(
const TermId new_term,
const std::experimental::optional<MemberId> &new_voted_for);
void CandidateOrLeaderTransitionToFollower();
void CandidateTransitionToLeader();
bool CandidateOrLeaderNoteTerm(const TermId new_term);
void StartNewElection();
void SetElectionTimer();
bool CountVotes();
void RequestVote(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
void AdvanceCommitIndex();
void AppendEntries(const MemberId &peer_id, RaftPeerState &peer_state,
std::unique_lock<std::mutex> &lock);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
ClientResult AddCommand(const typename State::Change &command, bool blocking);
template <class... Args>
void LogInfo(const std::string &, Args &&...);
RaftNetworkInterface<State> &network_;
RaftStorageInterface<State> &storage_;
MemberId id_;
RaftConfig config_;
TermId term_;
RaftMode mode_ = RaftMode::FOLLOWER;
std::experimental::optional<MemberId> voted_for_ = std::experimental::nullopt;
std::experimental::optional<MemberId> leader_ = std::experimental::nullopt;
TimePoint next_election_time_;
LogIndex commit_index_ = 0;
bool exiting_ = false;
std::map<std::string, std::unique_ptr<RaftPeerState>> peer_states_;
/* This mutex protects all of the internal state. */
std::mutex mutex_;
/* Used to notify waiting threads that some of the internal state has changed.
* It is notified when following events occurr:
* - mode change
* - election start
* - `next_election_time_` update on RPC from leader or candidate
* - destructor is called
* - `commit_index_` is advanced
*/
std::condition_variable state_changed_;
std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}());
};
} // namespace impl
template <class State>
class RaftMember final {
public:
explicit RaftMember(RaftNetworkInterface<State> &network,
RaftStorageInterface<State> &storage, const MemberId &id,
const RaftConfig &config);
~RaftMember();
ClientResult AddCommand(const typename State::Change &command, bool blocking);
RequestVoteReply OnRequestVote(const RequestVoteRequest &request);
AppendEntriesReply OnAppendEntries(
const AppendEntriesRequest<State> &request);
private:
RaftNetworkInterface<State> &network_;
impl::RaftMemberImpl<State> impl_;
/* Timer thread for triggering elections. */
std::thread timer_thread_;
/* One thread per peer for outgoing RPCs. */
std::vector<std::thread> peer_threads_;
};
} // namespace communication::raft
#include "raft-inl.hpp"

View File

@ -1,117 +0,0 @@
#pragma once
#include <unordered_map>
#include "glog/logging.h"
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
#include "communication/rpc/client.hpp"
#include "communication/rpc/server.hpp"
#include "io/network/endpoint.hpp"
/* Implementation of `RaftNetworkInterface` using RPC. Raft RPC requests and
* responses are wrapped in `PeerRpcRequest` and `PeerRpcReply`. */
// TODO(mtomic): Unwrap RPCs and use separate request-response protocols instead
// of `PeerProtocol`, or at least use an union to avoid sending unnecessary data
// over the wire.
namespace communication::raft {
template <class State>
using PeerProtocol = rpc::RequestResponse<PeerRpcRequest<State>, PeerRpcReply>;
template <class State>
class RpcNetwork : public RaftNetworkInterface<State> {
public:
RpcNetwork(rpc::Server &server,
std::unordered_map<std::string, io::network::Endpoint> directory)
: server_(server), directory_(std::move(directory)) {}
virtual void Start(RaftMember<State> &member) override {
server_.Register<PeerProtocol<State>>(
[&member](const PeerRpcRequest<State> &request) {
auto reply = std::make_unique<PeerRpcReply>();
reply->type = request.type;
switch (request.type) {
case RpcType::REQUEST_VOTE:
reply->request_vote = member.OnRequestVote(request.request_vote);
break;
case RpcType::APPEND_ENTRIES:
reply->append_entries =
member.OnAppendEntries(request.append_entries);
break;
default:
LOG(ERROR) << "Unknown RPC type: "
<< static_cast<int>(request.type);
}
return reply;
});
}
virtual bool SendRequestVote(const MemberId &recipient,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &recipient,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
PeerRpcReply rep;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
if (!SendRpc(recipient, req, rep)) {
return false;
}
reply = rep.append_entries;
return true;
}
private:
bool SendRpc(const MemberId &recipient, const PeerRpcRequest<State> &request,
PeerRpcReply &reply) {
auto &client = GetClient(recipient);
auto response = client.template Call<PeerProtocol<State>>(request);
if (!response) {
return false;
}
reply = *response;
return true;
}
rpc::Client &GetClient(const MemberId &id) {
auto it = clients_.find(id);
if (it == clients_.end()) {
auto ne = directory_[id];
it = clients_.try_emplace(id, ne).first;
}
return it->second;
}
rpc::Server &server_;
// TODO(mtomic): how to update and distribute this?
std::unordered_map<MemberId, io::network::Endpoint> directory_;
std::unordered_map<MemberId, rpc::Client> clients_;
};
} // namespace communication::raft

View File

@ -1,239 +0,0 @@
/**
* @file
*
* Raft log is stored inside a folder. Each log entry is stored in a file named
* by its index. There is a special file named "metadata" which stores Raft
* metadata and also the last log index, which is used on startup to identify
* which log entry files are valid.
*/
#pragma once
#include <fcntl.h>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/iostreams/device/file_descriptor.hpp"
#include "boost/iostreams/stream.hpp"
#include "communication/raft/raft.hpp"
#include "communication/raft/storage/memory.hpp"
#include "utils/file.hpp"
namespace communication::raft {
struct SimpleFileStorageMetadata {
TermId term;
std::experimental::optional<MemberId> voted_for;
LogIndex last_log_index;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &term &voted_for &last_log_index;
}
};
template <class State>
class SimpleFileStorage : public RaftStorageInterface<State> {
public:
explicit SimpleFileStorage(const fs::path &parent_dir) : memory_storage_() {
try {
dir_ = utils::OpenDir(parent_dir);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Error opening log directory: {}", e.what());
}
auto md = utils::TryOpenFile(dir_, "metadata", O_RDONLY);
if (!md) {
LOG(WARNING) << fmt::format("No metadata file found in directory '{}'",
parent_dir);
return;
}
boost::iostreams::file_descriptor_source src(
md->Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(src);
boost::archive::binary_iarchive iar(is);
SimpleFileStorageMetadata metadata;
try {
iar >> metadata;
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Failed to deserialize Raft metadata: " << e.what();
}
LOG(INFO) << fmt::format(
"Read term = {} and voted_for = {} from storage", metadata.term,
metadata.voted_for ? *metadata.voted_for : "(none)");
memory_storage_.term_ = metadata.term;
memory_storage_.voted_for_ = metadata.voted_for;
memory_storage_.log_.reserve(metadata.last_log_index);
for (LogIndex idx = 1; idx <= metadata.last_log_index; ++idx) {
utils::File entry_file;
try {
entry_file = utils::OpenFile(dir_, fmt::format("{}", idx), O_RDONLY);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open entry file {}: {}", idx,
e.what());
}
boost::iostreams::file_descriptor_source src(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_source> is(
src);
boost::archive::binary_iarchive iar(is);
LogEntry<State> entry;
try {
iar >> entry;
memory_storage_.log_.emplace_back(std::move(entry));
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to deserialize log entry {}: {}", idx,
e.what());
}
}
LOG(INFO) << fmt::format("Read {} log entries", metadata.last_log_index);
}
void WriteTermAndVotedFor(
TermId term,
const std::experimental::optional<MemberId> &voted_for) override {
memory_storage_.WriteTermAndVotedFor(term, voted_for);
WriteMetadata();
// Metadata file might be newly created so we have to fsync the directory.
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
std::pair<TermId, std::experimental::optional<MemberId>> GetTermAndVotedFor()
override {
return memory_storage_.GetTermAndVotedFor();
}
void AppendLogEntry(const LogEntry<State> &entry) override {
memory_storage_.AppendLogEntry(entry);
utils::File entry_file;
try {
entry_file = utils::OpenFile(
dir_, fmt::format("{}", memory_storage_.GetLastLogIndex()),
O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open log entry file: {}", e.what());
}
boost::iostreams::file_descriptor_sink sink(
entry_file.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << entry;
os.flush();
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << fmt::format("Failed to serialize log entry: {}", e.what());
}
try {
utils::Fsync(entry_file);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to write log entry file to disk: {}",
e.what());
}
// We update the metadata only after the log entry file is written to
// disk. This ensures that no file in range [1, last_log_index] is
// corrupted.
WriteMetadata();
try {
utils::Fsync(dir_);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to fsync Raft log directory: {}",
e.what());
}
}
TermId GetLogTerm(const LogIndex index) override {
return memory_storage_.GetLogTerm(index);
}
LogEntry<State> GetLogEntry(const LogIndex index) override {
return memory_storage_.GetLogEntry(index);
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) override {
return memory_storage_.GetLogSuffix(index);
}
LogIndex GetLastLogIndex() override {
return memory_storage_.GetLastLogIndex();
}
void TruncateLogSuffix(const LogIndex index) override {
return memory_storage_.TruncateLogSuffix(index);
}
private:
InMemoryStorage<State> memory_storage_;
utils::File dir_;
void WriteMetadata() {
// We first write data to a temporary file, ensure data is safely written
// to disk, and then rename the file. Since rename is an atomic operation,
// "metadata" file won't get corrupted in case of program crash.
utils::File md_tmp;
try {
md_tmp =
OpenFile(dir_, "metadata.new", O_WRONLY | O_CREAT | O_TRUNC, 0644);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to open temporary metadata file: {}",
e.what());
}
boost::iostreams::file_descriptor_sink sink(
md_tmp.Handle(),
boost::iostreams::file_descriptor_flags::never_close_handle);
boost::iostreams::stream<boost::iostreams::file_descriptor_sink> os(sink);
boost::archive::binary_oarchive oar(os);
try {
oar << SimpleFileStorageMetadata{
memory_storage_.GetTermAndVotedFor().first,
memory_storage_.GetTermAndVotedFor().second,
memory_storage_.GetLastLogIndex()};
} catch (boost::archive::archive_exception &e) {
LOG(FATAL) << "Error serializing Raft metadata";
}
os.flush();
try {
utils::Fsync(md_tmp);
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format(
"Failed to write temporary metadata file to disk: {}", e.what());
}
try {
utils::Rename(dir_, "metadata.new", dir_, "metadata");
} catch (std::system_error &e) {
LOG(FATAL) << fmt::format("Failed to move temporary metadata file: {}",
e.what());
}
}
};
} // namespace communication::raft

View File

@ -1,63 +0,0 @@
#pragma once
#include "communication/raft/raft.hpp"
namespace communication::raft {
template <class State>
class InMemoryStorage : public RaftStorageInterface<State> {
public:
InMemoryStorage()
: term_(0), voted_for_(std::experimental::nullopt), log_() {}
InMemoryStorage(const TermId term,
const std::experimental::optional<std::string> &voted_for,
const std::vector<LogEntry<State>> log)
: term_(term), voted_for_(voted_for), log_(log) {}
void WriteTermAndVotedFor(
const TermId term,
const std::experimental::optional<std::string> &voted_for) {
term_ = term;
voted_for_ = voted_for;
}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {term_, voted_for_};
}
void AppendLogEntry(const LogEntry<State> &entry) { log_.push_back(entry); }
TermId GetLogTerm(const LogIndex index) {
CHECK(0 <= index && index <= log_.size())
<< "Trying to read nonexistent log entry";
return index > 0 ? log_[index - 1].term : 0;
}
LogEntry<State> GetLogEntry(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entry";
return log_[index - 1];
}
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex index) {
CHECK(1 <= index && index <= log_.size())
<< "Trying to get nonexistent log entries";
return std::vector<LogEntry<State>>(log_.begin() + index - 1, log_.end());
}
LogIndex GetLastLogIndex(void) { return log_.size(); }
void TruncateLogSuffix(const LogIndex index) {
CHECK(1 <= index <= log_.size())
<< "Trying to remove nonexistent log entries";
log_.erase(log_.begin() + index - 1, log_.end());
}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft

View File

@ -1,141 +0,0 @@
#include <functional>
#include "communication/raft/network_common.hpp"
#include "communication/raft/raft.hpp"
namespace communication::raft::test_utils {
struct DummyState {
struct Change {
bool operator==(const Change &) const { return true; }
bool operator!=(const Change &) const { return false; }
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
struct IntState {
int x;
struct Change {
enum Type { ADD, SUB, SET };
Type t;
int d;
bool operator==(const Change &rhs) const {
return t == rhs.t && d == rhs.d;
}
bool operator!=(const Change &rhs) const { return !(*this == rhs); };
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &t;
ar &d;
}
};
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &x;
}
};
/* Implementations of `RaftNetworkInterface` for simpler unit testing. */
/* `NoOpNetworkInterface` doesn't do anything -- it's like a server disconnected
* from the network. */
template <class State>
class NoOpNetworkInterface : public RaftNetworkInterface<State> {
public:
~NoOpNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &, const RequestVoteRequest &,
RequestVoteReply &) override {
return false;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &,
AppendEntriesReply &) override {
return false;
}
virtual void Start(RaftMember<State> &) override {}
};
/* `NextReplyNetworkInterface` has two fields: `on_request_` and `next_reply_`
* which is optional. `on_request_` is a callback that will be called before
* processing requets. If `next_reply_` is not set, `Send*` functions will
* return false, otherwise they return that reply. */
template <class State>
class NextReplyNetworkInterface : public RaftNetworkInterface<State> {
public:
~NextReplyNetworkInterface() {}
virtual bool SendRequestVote(const MemberId &,
const RequestVoteRequest &request,
RequestVoteReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::REQUEST_VOTE;
req.request_vote = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::REQUEST_VOTE)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->request_vote;
return true;
}
virtual bool SendAppendEntries(const MemberId &,
const AppendEntriesRequest<State> &request,
AppendEntriesReply &reply) override {
PeerRpcRequest<State> req;
req.type = RpcType::APPEND_ENTRIES;
req.append_entries = request;
on_request_(req);
if (!next_reply_) {
return false;
}
DCHECK(next_reply_->type == RpcType::APPEND_ENTRIES)
<< "`next_reply_` type doesn't match the request type";
reply = next_reply_->append_entries;
return true;
}
virtual void Start(RaftMember<State> &) override {}
std::function<void(const PeerRpcRequest<State> &)> on_request_;
std::experimental::optional<PeerRpcReply> next_reply_;
};
template <class State>
class NoOpStorageInterface : public RaftStorageInterface<State> {
public:
NoOpStorageInterface() {}
void WriteTermAndVotedFor(const TermId,
const std::experimental::optional<std::string> &) {}
std::pair<TermId, std::experimental::optional<MemberId>>
GetTermAndVotedFor() {
return {0, {}};
}
void AppendLogEntry(const LogEntry<State> &) {}
TermId GetLogTerm(const LogIndex) { return 0; }
LogEntry<State> GetLogEntry(const LogIndex) { assert(false); }
std::vector<LogEntry<State>> GetLogSuffix(const LogIndex) { return {}; }
LogIndex GetLastLogIndex() { return 0; }
void TruncateLogSuffix(const LogIndex) {}
TermId term_;
std::experimental::optional<MemberId> voted_for_;
std::vector<LogEntry<State>> log_;
};
} // namespace communication::raft::test_utils

View File

@ -1,116 +0,0 @@
#include <chrono>
#include <thread>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "boost/serialization/export.hpp"
#include "boost/serialization/unique_ptr.hpp"
#include "gflags/gflags.h"
#include "communication/rpc/client.hpp"
DEFINE_HIDDEN_bool(rpc_random_latency, false,
"If a random wait should happen on each RPC call, to "
"simulate network latency.");
namespace communication::rpc {
Client::Client(const io::network::Endpoint &endpoint) : endpoint_(endpoint) {}
std::unique_ptr<Message> Client::Call(const Message &request) {
std::lock_guard<std::mutex> guard(mutex_);
if (FLAGS_rpc_random_latency) {
auto microseconds = (int)(1000 * rand_(gen_));
std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
}
// Check if the connection is broken (if we haven't used the client for a
// long time the server could have died).
if (client_ && client_->ErrorStatus()) {
client_ = std::experimental::nullopt;
}
// Connect to the remote server.
if (!client_) {
client_.emplace();
if (!client_->Connect(endpoint_)) {
LOG(ERROR) << "Couldn't connect to remote address " << endpoint_;
client_ = std::experimental::nullopt;
return nullptr;
}
}
// Serialize and send request.
std::stringstream request_stream(std::ios_base::out | std::ios_base::binary);
{
boost::archive::binary_oarchive request_archive(request_stream);
// Serialize reference as pointer (to serialize the derived class). The
// request is read in protocol.cpp.
request_archive << &request;
// Archive destructor ensures everything is written.
}
const std::string &request_buffer = request_stream.str();
CHECK(request_buffer.size() <= std::numeric_limits<MessageSize>::max())
<< fmt::format(
"Trying to send message of size {}, max message size is {}",
request_buffer.size(), std::numeric_limits<MessageSize>::max());
MessageSize request_data_size = request_buffer.size();
if (!client_->Write(reinterpret_cast<uint8_t *>(&request_data_size),
sizeof(MessageSize), true)) {
LOG(ERROR) << "Couldn't send request size to " << client_->endpoint();
client_ = std::experimental::nullopt;
return nullptr;
}
if (!client_->Write(request_buffer)) {
LOG(ERROR) << "Couldn't send request data to " << client_->endpoint();
client_ = std::experimental::nullopt;
return nullptr;
}
// Receive response data size.
if (!client_->Read(sizeof(MessageSize))) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return nullptr;
}
MessageSize response_data_size =
*reinterpret_cast<MessageSize *>(client_->GetData());
client_->ShiftData(sizeof(MessageSize));
// Receive response data.
if (!client_->Read(response_data_size)) {
LOG(ERROR) << "Couldn't get response from " << client_->endpoint();
client_ = std::experimental::nullopt;
return nullptr;
}
std::unique_ptr<Message> response;
{
std::stringstream response_stream(std::ios_base::in |
std::ios_base::binary);
response_stream.str(std::string(reinterpret_cast<char *>(client_->GetData()),
response_data_size));
boost::archive::binary_iarchive response_archive(response_stream);
response_archive >> response;
}
client_->ShiftData(response_data_size);
return response;
}
void Client::Abort() {
if (!client_) return;
// We need to call Shutdown on the client to abort any pending read or
// write operations.
client_->Shutdown();
client_ = std::experimental::nullopt;
}
} // namespace communication::rpc

View File

@ -1,76 +0,0 @@
#pragma once
#include <experimental/optional>
#include <memory>
#include <mutex>
#include <random>
#include <glog/logging.h>
#include "communication/client.hpp"
#include "communication/rpc/messages.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
// Client is thread safe, but it is recommended to use thread_local clients.
class Client {
public:
Client(const io::network::Endpoint &endpoint);
// Call function can initiate only one request at the time. Function blocks
// until there is a response. If there was an error nullptr is returned.
template <typename TRequestResponse, typename... Args>
std::unique_ptr<typename TRequestResponse::Response> Call(Args &&... args) {
using Req = typename TRequestResponse::Request;
using Res = typename TRequestResponse::Response;
static_assert(std::is_base_of<Message, Req>::value,
"TRequestResponse::Request must be derived from Message");
static_assert(std::is_base_of<Message, Res>::value,
"TRequestResponse::Response must be derived from Message");
auto request = Req(std::forward<Args>(args)...);
if (VLOG_IS_ON(12)) {
auto req_type = utils::Demangle(request.type_index().name());
LOG(INFO) << "[RpcClient] sent " << (req_type ? req_type.value() : "");
}
std::unique_ptr<Message> response = Call(request);
auto *real_response = dynamic_cast<Res *>(response.get());
if (!real_response && response) {
// Since message_id was checked in private Call function, this means
// something is very wrong (probably on the server side).
LOG(ERROR) << "Message response was of unexpected type";
client_ = std::experimental::nullopt;
return nullptr;
}
if (VLOG_IS_ON(12) && response) {
auto res_type = utils::Demangle(response->type_index().name());
LOG(INFO) << "[RpcClient] received "
<< (res_type ? res_type.value() : "");
}
response.release();
return std::unique_ptr<Res>(real_response);
}
// Call this function from another thread to abort a pending RPC call.
void Abort();
private:
std::unique_ptr<Message> Call(const Message &request);
io::network::Endpoint endpoint_;
std::experimental::optional<communication::Client> client_;
std::mutex mutex_;
// Random generator for simulated network latency (enable with a flag).
// Distribution parameters are rule-of-thumb chosen.
std::mt19937 gen_{std::random_device{}()};
std::lognormal_distribution<> rand_{0.0, 1.11};
};
} // namespace communication::rpc

View File

@ -1,46 +0,0 @@
#pragma once
#include <mutex>
#include <stack>
#include "communication/rpc/client.hpp"
namespace communication::rpc {
/**
* A simple client pool that creates new RPC clients on demand. Useful when you
* want to send RPCs to the same server from multiple threads without them
* blocking each other.
*/
class ClientPool {
public:
ClientPool(const io::network::Endpoint &endpoint) : endpoint_(endpoint) {}
template <typename TRequestResponse, typename... Args>
std::unique_ptr<typename TRequestResponse::Response> Call(Args &&... args) {
std::unique_ptr<Client> client;
std::unique_lock<std::mutex> lock(mutex_);
if (unused_clients_.empty()) {
client = std::make_unique<Client>(endpoint_);
} else {
client = std::move(unused_clients_.top());
unused_clients_.pop();
}
lock.unlock();
auto resp = client->Call<TRequestResponse>(std::forward<Args>(args)...);
lock.lock();
unused_clients_.push(std::move(client));
return resp;
};
private:
io::network::Endpoint endpoint_;
std::mutex mutex_;
std::stack<std::unique_ptr<Client>> unused_clients_;
};
} // namespace communication::rpc

View File

@ -1,134 +0,0 @@
#pragma once
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/export.hpp"
#include "database/state_delta.hpp"
#include "distributed/coordination_rpc_messages.hpp"
#include "distributed/data_rpc_messages.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "distributed/index_rpc_messages.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "durability/recovery.hpp"
#include "stats/stats_rpc_messages.hpp"
#include "storage/concurrent_id_mapper_rpc_messages.hpp"
#include "transactions/engine_rpc_messages.hpp"
#define ID_VALUE_EXPORT_BOOST_TYPE(type) \
BOOST_CLASS_EXPORT(storage::type##IdReq); \
BOOST_CLASS_EXPORT(storage::type##IdRes); \
BOOST_CLASS_EXPORT(storage::Id##type##Req); \
BOOST_CLASS_EXPORT(storage::Id##type##Res);
ID_VALUE_EXPORT_BOOST_TYPE(Label)
ID_VALUE_EXPORT_BOOST_TYPE(EdgeType)
ID_VALUE_EXPORT_BOOST_TYPE(Property)
#undef ID_VALUE_EXPORT_BOOST_TYPE
// Distributed transaction engine.
BOOST_CLASS_EXPORT(tx::TxAndSnapshot);
BOOST_CLASS_EXPORT(tx::BeginReq);
BOOST_CLASS_EXPORT(tx::BeginRes);
BOOST_CLASS_EXPORT(tx::AdvanceReq);
BOOST_CLASS_EXPORT(tx::AdvanceRes);
BOOST_CLASS_EXPORT(tx::CommitReq);
BOOST_CLASS_EXPORT(tx::CommitRes);
BOOST_CLASS_EXPORT(tx::AbortReq);
BOOST_CLASS_EXPORT(tx::AbortRes);
BOOST_CLASS_EXPORT(tx::SnapshotReq);
BOOST_CLASS_EXPORT(tx::SnapshotRes);
BOOST_CLASS_EXPORT(tx::CommandReq);
BOOST_CLASS_EXPORT(tx::CommandRes);
BOOST_CLASS_EXPORT(tx::GcSnapshotReq);
BOOST_CLASS_EXPORT(tx::ClogInfoReq);
BOOST_CLASS_EXPORT(tx::ClogInfoRes);
BOOST_CLASS_EXPORT(tx::ActiveTransactionsReq);
BOOST_CLASS_EXPORT(tx::EnsureNextIdGreaterReq);
BOOST_CLASS_EXPORT(tx::EnsureNextIdGreaterRes);
BOOST_CLASS_EXPORT(tx::GlobalLastReq);
BOOST_CLASS_EXPORT(tx::GlobalLastRes);
// Distributed coordination.
BOOST_CLASS_EXPORT(durability::RecoveryInfo);
BOOST_CLASS_EXPORT(distributed::RegisterWorkerReq);
BOOST_CLASS_EXPORT(distributed::RegisterWorkerRes);
BOOST_CLASS_EXPORT(distributed::ClusterDiscoveryReq);
BOOST_CLASS_EXPORT(distributed::ClusterDiscoveryRes);
BOOST_CLASS_EXPORT(distributed::StopWorkerReq);
BOOST_CLASS_EXPORT(distributed::StopWorkerRes);
// Distributed data exchange.
BOOST_CLASS_EXPORT(distributed::EdgeReq);
BOOST_CLASS_EXPORT(distributed::EdgeRes);
BOOST_CLASS_EXPORT(distributed::VertexReq);
BOOST_CLASS_EXPORT(distributed::VertexRes);
BOOST_CLASS_EXPORT(distributed::TxGidPair);
// Distributed plan exchange.
BOOST_CLASS_EXPORT(distributed::DispatchPlanReq);
BOOST_CLASS_EXPORT(distributed::DispatchPlanRes);
BOOST_CLASS_EXPORT(distributed::RemovePlanReq);
BOOST_CLASS_EXPORT(distributed::RemovePlanRes);
// Pull.
BOOST_CLASS_EXPORT(distributed::PullReq);
BOOST_CLASS_EXPORT(distributed::PullRes);
BOOST_CLASS_EXPORT(distributed::TransactionCommandAdvancedReq);
BOOST_CLASS_EXPORT(distributed::TransactionCommandAdvancedRes);
// Distributed indexes.
BOOST_CLASS_EXPORT(distributed::BuildIndexReq);
BOOST_CLASS_EXPORT(distributed::BuildIndexRes);
BOOST_CLASS_EXPORT(distributed::IndexLabelPropertyTx);
// Stats.
BOOST_CLASS_EXPORT(stats::StatsReq);
BOOST_CLASS_EXPORT(stats::StatsRes);
BOOST_CLASS_EXPORT(stats::BatchStatsReq);
BOOST_CLASS_EXPORT(stats::BatchStatsRes);
// Updates.
BOOST_CLASS_EXPORT(database::StateDelta);
BOOST_CLASS_EXPORT(distributed::UpdateReq);
BOOST_CLASS_EXPORT(distributed::UpdateRes);
BOOST_CLASS_EXPORT(distributed::UpdateApplyReq);
BOOST_CLASS_EXPORT(distributed::UpdateApplyRes);
// Creates.
BOOST_CLASS_EXPORT(distributed::CreateResult);
BOOST_CLASS_EXPORT(distributed::CreateVertexReq);
BOOST_CLASS_EXPORT(distributed::CreateVertexReqData);
BOOST_CLASS_EXPORT(distributed::CreateVertexRes);
BOOST_CLASS_EXPORT(distributed::CreateEdgeReqData);
BOOST_CLASS_EXPORT(distributed::CreateEdgeReq);
BOOST_CLASS_EXPORT(distributed::CreateEdgeRes);
BOOST_CLASS_EXPORT(distributed::AddInEdgeReqData);
BOOST_CLASS_EXPORT(distributed::AddInEdgeReq);
BOOST_CLASS_EXPORT(distributed::AddInEdgeRes);
// Removes.
BOOST_CLASS_EXPORT(distributed::RemoveVertexReq);
BOOST_CLASS_EXPORT(distributed::RemoveVertexRes);
BOOST_CLASS_EXPORT(distributed::RemoveEdgeReq);
BOOST_CLASS_EXPORT(distributed::RemoveEdgeRes);
BOOST_CLASS_EXPORT(distributed::RemoveInEdgeData);
BOOST_CLASS_EXPORT(distributed::RemoveInEdgeReq);
BOOST_CLASS_EXPORT(distributed::RemoveInEdgeRes);
// Durability
BOOST_CLASS_EXPORT(distributed::MakeSnapshotReq);
BOOST_CLASS_EXPORT(distributed::MakeSnapshotRes);
// Storage Gc.
BOOST_CLASS_EXPORT(distributed::GcClearedStatusReq);
BOOST_CLASS_EXPORT(distributed::GcClearedStatusRes);
// Transactional Cache Cleaner.
BOOST_CLASS_EXPORT(distributed::WaitOnTransactionEndReq);
BOOST_CLASS_EXPORT(distributed::WaitOnTransactionEndRes);

View File

@ -1,74 +0,0 @@
#pragma once
#include <memory>
#include <type_traits>
#include <typeindex>
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
namespace communication::rpc {
using MessageSize = uint32_t;
/**
* Base class for messages.
*/
class Message {
public:
virtual ~Message() {}
/**
* Run-time type identification that is used for callbacks.
*
* Warning: this works because of the virtual destructor, don't remove it from
* this class
*/
std::type_index type_index() const { return typeid(*this); }
private:
friend boost::serialization::access;
template <class TArchive>
void serialize(TArchive &, unsigned int) {}
};
template <typename TRequest, typename TResponse>
struct RequestResponse {
using Request = TRequest;
using Response = TResponse;
};
} // namespace communication::rpc
// RPC Pimp
#define RPC_NO_MEMBER_MESSAGE(name) \
struct name : public communication::rpc::Message { \
name() {} \
\
private: \
friend class boost::serialization::access; \
\
template <class TArchive> \
void serialize(TArchive &ar, unsigned int) { \
ar &boost::serialization::base_object<communication::rpc::Message>( \
*this); \
} \
}
#define RPC_SINGLE_MEMBER_MESSAGE(name, type) \
struct name : public communication::rpc::Message { \
name() {} \
name(const type &member) : member(member) {} \
type member; \
\
private: \
friend class boost::serialization::access; \
\
template <class TArchive> \
void serialize(TArchive &ar, unsigned int) { \
ar &boost::serialization::base_object<communication::rpc::Message>( \
*this); \
ar &member; \
} \
}

View File

@ -1,92 +0,0 @@
#include <sstream>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/unique_ptr.hpp"
#include "fmt/format.h"
#include "communication/rpc/messages-inl.hpp"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/rpc/server.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
Session::Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream)
: server_(server),
input_stream_(input_stream),
output_stream_(output_stream) {}
void Session::Execute() {
if (input_stream_.size() < sizeof(MessageSize)) return;
MessageSize request_len =
*reinterpret_cast<MessageSize *>(input_stream_.data());
uint64_t request_size = sizeof(MessageSize) + request_len;
input_stream_.Resize(request_size);
if (input_stream_.size() < request_size) return;
// Read the request message.
std::unique_ptr<Message> request([this, request_len]() {
Message *req_ptr = nullptr;
std::stringstream stream(std::ios_base::in | std::ios_base::binary);
stream.str(std::string(
reinterpret_cast<char *>(input_stream_.data() + sizeof(MessageSize)),
request_len));
boost::archive::binary_iarchive archive(stream);
// Sent from client.cpp
archive >> req_ptr;
return req_ptr;
}());
input_stream_.Shift(sizeof(MessageSize) + request_len);
auto callbacks_accessor = server_.callbacks_.access();
auto it = callbacks_accessor.find(request->type_index());
if (it == callbacks_accessor.end()) {
// Throw exception to close the socket and cleanup the session.
throw SessionException(
"Session trying to execute an unregistered RPC call!");
}
if (VLOG_IS_ON(12)) {
auto req_type = utils::Demangle(request->type_index().name());
LOG(INFO) << "[RpcServer] received " << (req_type ? req_type.value() : "");
}
std::unique_ptr<Message> response = it->second(*(request.get()));
if (!response) {
throw SessionException("Trying to send nullptr instead of message");
}
// Serialize and send response
std::stringstream stream(std::ios_base::out | std::ios_base::binary);
{
boost::archive::binary_oarchive archive(stream);
archive << response;
// Archive destructor ensures everything is written.
}
const std::string &buffer = stream.str();
if (buffer.size() > std::numeric_limits<MessageSize>::max()) {
throw SessionException(fmt::format(
"Trying to send response of size {}, max response size is {}",
buffer.size(), std::numeric_limits<MessageSize>::max()));
}
MessageSize input_stream_size = buffer.size();
if (!output_stream_.Write(reinterpret_cast<uint8_t *>(&input_stream_size),
sizeof(MessageSize), true)) {
throw SessionException("Couldn't send response size!");
}
if (!output_stream_.Write(buffer)) {
throw SessionException("Couldn't send response data!");
}
if (VLOG_IS_ON(12)) {
auto res_type = utils::Demangle(response->type_index().name());
LOG(INFO) << "[RpcServer] sent " << (res_type ? res_type.value() : "");
}
}
} // namespace communication::rpc

View File

@ -1,55 +0,0 @@
#pragma once
#include <chrono>
#include <cstdint>
#include <memory>
#include "communication/rpc/messages.hpp"
#include "communication/session.hpp"
/**
* @brief Protocol
*
* Has classes and functions that implement the server side of our
* RPC protocol.
*
* Message layout: MessageSize message_size,
* message_size bytes serialized_message
*/
namespace communication::rpc {
// Forward declaration of class Server
class Server;
/**
* This class is thrown when the Session wants to indicate that a fatal error
* occured during execution.
*/
class SessionException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/**
* Distributed Protocol Session
*
* This class is responsible for handling a single client connection.
*/
class Session {
public:
Session(Server &server, communication::InputStream &input_stream,
communication::OutputStream &output_stream);
/**
* Executes the protocol after data has been read into the stream.
* Goes through the protocol states in order to execute commands from the
* client.
*/
void Execute();
private:
Server &server_;
communication::InputStream &input_stream_;
communication::OutputStream &output_stream_;
};
} // namespace communication::rpc

View File

@ -1,24 +0,0 @@
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "boost/serialization/export.hpp"
#include "boost/serialization/unique_ptr.hpp"
#include "communication/rpc/server.hpp"
namespace communication::rpc {
Server::Server(const io::network::Endpoint &endpoint,
size_t workers_count)
: server_(endpoint, *this, -1, "RPC", workers_count) {}
void Server::StopProcessingCalls() {
server_.Shutdown();
server_.AwaitShutdown();
}
const io::network::Endpoint &Server::endpoint() const {
return server_.endpoint();
}
} // namespace communication::rpc

View File

@ -1,86 +0,0 @@
#pragma once
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "communication/rpc/messages.hpp"
#include "communication/rpc/protocol.hpp"
#include "communication/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "data_structures/queue.hpp"
#include "io/network/endpoint.hpp"
#include "utils/demangle.hpp"
namespace communication::rpc {
class Server {
public:
Server(const io::network::Endpoint &endpoint,
size_t workers_count = std::thread::hardware_concurrency());
Server(const Server &) = delete;
Server(Server &&) = delete;
Server &operator=(const Server &) = delete;
Server &operator=(Server &&) = delete;
void StopProcessingCalls();
const io::network::Endpoint &endpoint() const;
template <typename TRequestResponse>
void Register(
std::function<std::unique_ptr<typename TRequestResponse::Response>(
const typename TRequestResponse::Request &)>
callback) {
static_assert(
std::is_base_of<Message, typename TRequestResponse::Request>::value,
"TRequestResponse::Request must be derived from Message");
static_assert(
std::is_base_of<Message, typename TRequestResponse::Response>::value,
"TRequestResponse::Response must be derived from Message");
auto callbacks_accessor = callbacks_.access();
auto got = callbacks_accessor.insert(
typeid(typename TRequestResponse::Request),
[callback = callback](const Message &base_message) {
const auto &message =
dynamic_cast<const typename TRequestResponse::Request &>(
base_message);
return callback(message);
});
CHECK(got.second) << "Callback for that message type already registered";
if (VLOG_IS_ON(12)) {
auto req_type =
utils::Demangle(typeid(typename TRequestResponse::Request).name());
auto res_type =
utils::Demangle(typeid(typename TRequestResponse::Response).name());
LOG(INFO) << "[RpcServer] register " << (req_type ? req_type.value() : "")
<< " -> " << (res_type ? res_type.value() : "");
}
}
template <typename TRequestResponse>
void UnRegister() {
static_assert(
std::is_base_of<Message, typename TRequestResponse::Request>::value,
"TRequestResponse::Request must be derived from Message");
static_assert(
std::is_base_of<Message, typename TRequestResponse::Response>::value,
"TRequestResponse::Response must be derived from Message");
auto callbacks_accessor = callbacks_.access();
auto deleted =
callbacks_accessor.remove(typeid(typename TRequestResponse::Request));
CHECK(deleted) << "Trying to remove unknown message type callback";
}
private:
friend class Session;
ConcurrentMap<std::type_index,
std::function<std::unique_ptr<Message>(const Message &)>>
callbacks_;
std::mutex mutex_;
communication::Server<Session, Server> server_;
}; // namespace communication::rpc
} // namespace communication::rpc

View File

@ -27,35 +27,6 @@ DEFINE_int32(gc_cycle_sec, 30,
"Amount of time between starts of two cleaning cycles in seconds. "
"-1 to turn off.");
#ifndef MG_COMMUNITY
// Distributed master/worker flags.
DEFINE_VALIDATED_HIDDEN_int32(worker_id, 0,
"ID of a worker in a distributed system. Igored "
"in single-node.",
FLAG_IN_RANGE(0, 1 << gid::kWorkerIdSize));
DEFINE_HIDDEN_string(master_host, "0.0.0.0",
"For master node indicates the host served on. For worker "
"node indicates the master location.");
DEFINE_VALIDATED_HIDDEN_int32(
master_port, 0,
"For master node the port on which to serve. For "
"worker node indicates the master's port.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_HIDDEN_string(worker_host, "0.0.0.0",
"For worker node indicates the host served on. For master "
"node this flag is not used.");
DEFINE_VALIDATED_HIDDEN_int32(
worker_port, 0,
"For master node it's unused. For worker node "
"indicates the port on which to serve. If zero (default value), a port is "
"chosen at random. Sent to the master when registring worker node.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_VALIDATED_HIDDEN_int32(rpc_num_workers,
std::max(std::thread::hardware_concurrency(), 1U),
"Number of workers (RPC)",
FLAG_IN_RANGE(1, INT32_MAX));
#endif
// clang-format off
database::Config::Config()
// Durability flags.
@ -68,15 +39,5 @@ database::Config::Config()
// Misc flags.
gc_cycle_sec{FLAGS_gc_cycle_sec},
query_execution_time_sec{FLAGS_query_execution_time_sec}
#ifndef MG_COMMUNITY
,
// Distributed flags.
rpc_num_workers{FLAGS_rpc_num_workers},
worker_id{FLAGS_worker_id},
master_endpoint{FLAGS_master_host,
static_cast<uint16_t>(FLAGS_master_port)},
worker_endpoint{FLAGS_worker_host,
static_cast<uint16_t>(FLAGS_worker_port)}
#endif
{}
// clang-format on

View File

@ -1,23 +1,7 @@
#include "database/counters.hpp"
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/export.hpp"
#include "boost/serialization/utility.hpp"
namespace database {
RPC_SINGLE_MEMBER_MESSAGE(CountersGetReq, std::string);
RPC_SINGLE_MEMBER_MESSAGE(CountersGetRes, int64_t);
using CountersGetRpc =
communication::rpc::RequestResponse<CountersGetReq, CountersGetRes>;
using CountersSetReqData = std::pair<std::string, int64_t>;
RPC_SINGLE_MEMBER_MESSAGE(CountersSetReq, CountersSetReqData);
RPC_NO_MEMBER_MESSAGE(CountersSetRes);
using CountersSetRpc =
communication::rpc::RequestResponse<CountersSetReq, CountersSetRes>;
int64_t SingleNodeCounters::Get(const std::string &name) {
return counters_.access()
.emplace(name, std::make_tuple(name), std::make_tuple(0))
@ -30,36 +14,4 @@ void SingleNodeCounters::Set(const std::string &name, int64_t value) {
if (!name_counter_pair.second) name_counter_pair.first->second.store(value);
}
MasterCounters::MasterCounters(communication::rpc::Server &server)
: rpc_server_(server) {
rpc_server_.Register<CountersGetRpc>([this](const CountersGetReq &req) {
return std::make_unique<CountersGetRes>(Get(req.member));
});
rpc_server_.Register<CountersSetRpc>([this](const CountersSetReq &req) {
Set(req.member.first, req.member.second);
return std::make_unique<CountersSetRes>();
});
}
WorkerCounters::WorkerCounters(
communication::rpc::ClientPool &master_client_pool)
: master_client_pool_(master_client_pool) {}
int64_t WorkerCounters::Get(const std::string &name) {
auto response = master_client_pool_.Call<CountersGetRpc>(name);
CHECK(response) << "CountersGetRpc failed";
return response->member;
}
void WorkerCounters::Set(const std::string &name, int64_t value) {
auto response =
master_client_pool_.Call<CountersSetRpc>(CountersSetReqData{name, value});
CHECK(response) << "CountersSetRpc failed";
}
} // namespace database
BOOST_CLASS_EXPORT(database::CountersGetReq);
BOOST_CLASS_EXPORT(database::CountersGetRes);
BOOST_CLASS_EXPORT(database::CountersSetReq);
BOOST_CLASS_EXPORT(database::CountersSetRes);

View File

@ -4,9 +4,6 @@
#include <cstdint>
#include <string>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/messages.hpp"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
namespace database {
@ -42,25 +39,4 @@ class SingleNodeCounters : public Counters {
ConcurrentMap<std::string, std::atomic<int64_t>> counters_;
};
/** Implementation for distributed master. */
class MasterCounters : public SingleNodeCounters {
public:
MasterCounters(communication::rpc::Server &server);
private:
communication::rpc::Server &rpc_server_;
};
/** Implementation for distributed worker. */
class WorkerCounters : public Counters {
public:
WorkerCounters(communication::rpc::ClientPool &master_client_pool);
int64_t Get(const std::string &name) override;
void Set(const std::string &name, int64_t value) override;
private:
communication::rpc::ClientPool &master_client_pool_;
};
} // namespace database

View File

@ -2,38 +2,14 @@
#include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
#include "database/storage_gc_master.hpp"
#include "database/graph_db_accessor.hpp"
#include "database/storage_gc_single_node.hpp"
#include "database/storage_gc_worker.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_worker.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_server.hpp"
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "distributed/durability_rpc_server.hpp"
#include "distributed/index_rpc_server.hpp"
#include "distributed/plan_consumer.hpp"
#include "distributed/plan_dispatcher.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "distributed/transactional_cache_cleaner.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "distributed/updates_rpc_server.hpp"
#include "durability/paths.hpp"
#include "durability/recovery.hpp"
#include "durability/snapshooter.hpp"
#include "storage/concurrent_id_mapper_master.hpp"
#include "storage/concurrent_id_mapper_single_node.hpp"
#include "storage/concurrent_id_mapper_worker.hpp"
#include "transactions/engine_master.hpp"
#include "transactions/engine_single_node.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/flag_validation.hpp"
using namespace storage;
@ -69,22 +45,6 @@ class PrivateBase : public GraphDb {
storage_ = std::make_unique<Storage>(WorkerId());
}
distributed::PullRpcClients &pull_clients() override {
LOG(FATAL) << "Remote pull clients only available in master.";
}
distributed::ProduceRpcServer &produce_server() override {
LOG(FATAL) << "Remote produce server only available in worker.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan consumer only available in distributed worker.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan dispatcher only available in distributed master.";
}
distributed::IndexRpcClients &index_rpc_clients() override {
LOG(FATAL) << "Index RPC clients only available in distributed master.";
}
protected:
std::unique_ptr<Storage> storage_ =
std::make_unique<Storage>(config_.worker_id);
@ -121,7 +81,6 @@ struct TypemapPack {
class SingleNode : public PrivateBase {
public:
explicit SingleNode(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override { return GraphDb::Type::SINGLE_NODE; }
IMPL_GETTERS
tx::SingleNodeEngine tx_engine_{&wal_};
@ -131,27 +90,7 @@ class SingleNode : public PrivateBase {
TypemapPack<SingleNodeConcurrentIdMapper> typemap_pack_;
database::SingleNodeCounters counters_;
std::vector<int> GetWorkerIds() const override { return {0}; }
distributed::DataRpcServer &data_server() override {
LOG(FATAL) << "Remote data server not available in single-node.";
}
distributed::DataRpcClients &data_clients() override {
LOG(FATAL) << "Remote data clients not available in single-node.";
}
distributed::PlanDispatcher &plan_dispatcher() override {
LOG(FATAL) << "Plan Dispatcher not available in single-node.";
}
distributed::PlanConsumer &plan_consumer() override {
LOG(FATAL) << "Plan Consumer not available in single-node.";
}
distributed::UpdatesRpcServer &updates_server() override {
LOG(FATAL) << "Remote updates server not available in single-node.";
}
distributed::UpdatesRpcClients &updates_clients() override {
LOG(FATAL) << "Remote updates clients not available in single-node.";
}
distributed::DataManager &data_manager() override {
LOG(FATAL) << "Remote data manager not available in single-node.";
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
@ -161,144 +100,6 @@ class SingleNode : public PrivateBase {
}
};
#define IMPL_DISTRIBUTED_GETTERS \
std::vector<int> GetWorkerIds() const override { \
return coordination_.GetWorkerIds(); \
} \
distributed::DataRpcServer &data_server() override { return data_server_; } \
distributed::DataRpcClients &data_clients() override { \
return data_clients_; \
} \
distributed::UpdatesRpcServer &updates_server() override { \
return updates_server_; \
} \
distributed::UpdatesRpcClients &updates_clients() override { \
return updates_clients_; \
} \
distributed::DataManager &data_manager() override { return data_manager_; }
class Master : public PrivateBase {
public:
explicit Master(const Config &config) : PrivateBase(config) {}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_MASTER;
}
// Makes a local snapshot and forces the workers to do the same. Snapshot is
// written here only if workers sucesfully created their own snapshot
bool MakeSnapshot(GraphDbAccessor &accessor) override {
auto workers_snapshot =
durability_rpc_clients_.MakeSnapshot(accessor.transaction_id());
if (!workers_snapshot.get()) return false;
// This can be further optimized by creating master snapshot at the same
// time as workers snapshots but this forces us to delete the master
// snapshot if we succeed in creating it and workers somehow fail. Because
// we have an assumption that every snapshot that exists on master with some
// tx_id visibility also exists on workers
return PrivateBase::MakeSnapshot(accessor);
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanDispatcher &plan_dispatcher() override {
return plan_dispatcher_;
}
distributed::PullRpcClients &pull_clients() override { return pull_clients_; }
distributed::IndexRpcClients &index_rpc_clients() override {
return index_rpc_clients_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
}
communication::rpc::Server server_{
config_.master_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
tx::MasterEngine tx_engine_{server_, rpc_worker_clients_, &wal_};
distributed::MasterCoordination coordination_{server_.endpoint()};
std::unique_ptr<StorageGcMaster> storage_gc_ =
std::make_unique<StorageGcMaster>(
*storage_, tx_engine_, config_.gc_cycle_sec, server_, coordination_);
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
TypemapPack<MasterConcurrentIdMapper> typemap_pack_{server_};
database::MasterCounters counters_{server_};
distributed::DurabilityRpcClients durability_rpc_clients_{
rpc_worker_clients_};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanDispatcher plan_dispatcher_{rpc_worker_clients_};
distributed::PullRpcClients pull_clients_{rpc_worker_clients_};
distributed::IndexRpcClients index_rpc_clients_{rpc_worker_clients_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::TransactionalCacheCleaner cache_cleaner_{
tx_engine_, updates_server_, data_manager_};
distributed::ClusterDiscoveryMaster cluster_discovery_{server_, coordination_,
rpc_worker_clients_};
};
class Worker : public PrivateBase {
public:
explicit Worker(const Config &config) : PrivateBase(config) {
cluster_discovery_.RegisterWorker(config.worker_id);
}
GraphDb::Type type() const override {
return GraphDb::Type::DISTRIBUTED_WORKER;
}
IMPL_GETTERS
IMPL_DISTRIBUTED_GETTERS
distributed::PlanConsumer &plan_consumer() override { return plan_consumer_; }
distributed::ProduceRpcServer &produce_server() override {
return produce_server_;
}
void ReinitializeStorage() override {
// Release gc scheduler to stop it from touching storage
storage_gc_ = nullptr;
PrivateBase::ReinitializeStorage();
storage_gc_ = std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
}
communication::rpc::Server server_{
config_.worker_endpoint, static_cast<size_t>(config_.rpc_num_workers)};
distributed::WorkerCoordination coordination_{server_,
config_.master_endpoint};
distributed::RpcWorkerClients rpc_worker_clients_{coordination_};
tx::WorkerEngine tx_engine_{rpc_worker_clients_.GetClientPool(0)};
std::unique_ptr<StorageGcWorker> storage_gc_ =
std::make_unique<StorageGcWorker>(
*storage_, tx_engine_, config_.gc_cycle_sec,
rpc_worker_clients_.GetClientPool(0), config_.worker_id);
TypemapPack<WorkerConcurrentIdMapper> typemap_pack_{
rpc_worker_clients_.GetClientPool(0)};
database::WorkerCounters counters_{rpc_worker_clients_.GetClientPool(0)};
distributed::DataRpcServer data_server_{*this, server_};
distributed::DataRpcClients data_clients_{rpc_worker_clients_};
distributed::PlanConsumer plan_consumer_{server_};
distributed::ProduceRpcServer produce_server_{*this, tx_engine_, server_,
plan_consumer_};
distributed::IndexRpcServer index_rpc_server_{*this, server_};
distributed::UpdatesRpcServer updates_server_{*this, server_};
distributed::UpdatesRpcClients updates_clients_{rpc_worker_clients_};
distributed::DataManager data_manager_{*this, data_clients_};
distributed::WorkerTransactionalCacheCleaner cache_cleaner_{
tx_engine_, server_, produce_server_, updates_server_, data_manager_};
distributed::DurabilityRpcServer durability_rpc_server_{*this, server_};
distributed::ClusterDiscoveryWorker cluster_discovery_{
server_, coordination_, rpc_worker_clients_.GetClientPool(0)};
};
#undef IMPL_GETTERS
PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
: impl_(std::move(impl)) {
if (impl_->config_.durability_enabled)
@ -306,41 +107,18 @@ PublicBase::PublicBase(std::unique_ptr<PrivateBase> impl)
// Durability recovery.
{
auto db_type = impl_->type();
// What we should recover.
std::experimental::optional<durability::RecoveryInfo>
required_recovery_info;
if (db_type == Type::DISTRIBUTED_WORKER) {
required_recovery_info = dynamic_cast<impl::Worker *>(impl_.get())
->cluster_discovery_.recovery_info();
}
// What we recover.
std::experimental::optional<durability::RecoveryInfo> recovery_info;
// Recover only if necessary.
if ((db_type != Type::DISTRIBUTED_WORKER &&
impl_->config_.db_recover_on_startup) ||
(db_type == Type::DISTRIBUTED_WORKER && required_recovery_info)) {
if (impl_->config_.db_recover_on_startup) {
recovery_info = durability::Recover(impl_->config_.durability_directory,
*impl_, required_recovery_info);
}
// Post-recovery setup and checking.
switch (db_type) {
case Type::DISTRIBUTED_MASTER:
dynamic_cast<impl::Master *>(impl_.get())
->coordination_.SetRecoveryInfo(recovery_info);
break;
case Type::DISTRIBUTED_WORKER:
if (required_recovery_info != recovery_info)
LOG(FATAL) << "Memgraph worker failed to recover the database state "
"recovered on the master";
break;
case Type::SINGLE_NODE:
break;
}
}
if (impl_->config_.durability_enabled) {
@ -374,14 +152,12 @@ PublicBase::~PublicBase() {
// If we are not a worker we can do a snapshot on exit if it's enabled. Doing
// this on the master forces workers to do the same through rpcs
if (impl_->config_.snapshot_on_exit &&
impl_->type() != Type::DISTRIBUTED_WORKER) {
if (impl_->config_.snapshot_on_exit) {
GraphDbAccessor dba(*this);
MakeSnapshot(dba);
}
}
GraphDb::Type PublicBase::type() const { return impl_->type(); }
Storage &PublicBase::storage() { return impl_->storage(); }
durability::WriteAheadLog &PublicBase::wal() { return impl_->wal(); }
tx::Engine &PublicBase::tx_engine() { return impl_->tx_engine(); }
@ -400,36 +176,6 @@ int PublicBase::WorkerId() const { return impl_->WorkerId(); }
std::vector<int> PublicBase::GetWorkerIds() const {
return impl_->GetWorkerIds();
}
distributed::DataRpcServer &PublicBase::data_server() {
return impl_->data_server();
}
distributed::DataRpcClients &PublicBase::data_clients() {
return impl_->data_clients();
}
distributed::PlanDispatcher &PublicBase::plan_dispatcher() {
return impl_->plan_dispatcher();
}
distributed::IndexRpcClients &PublicBase::index_rpc_clients() {
return impl_->index_rpc_clients();
}
distributed::PlanConsumer &PublicBase::plan_consumer() {
return impl_->plan_consumer();
}
distributed::PullRpcClients &PublicBase::pull_clients() {
return impl_->pull_clients();
}
distributed::ProduceRpcServer &PublicBase::produce_server() {
return impl_->produce_server();
}
distributed::UpdatesRpcServer &PublicBase::updates_server() {
return impl_->updates_server();
}
distributed::UpdatesRpcClients &PublicBase::updates_clients() {
return impl_->updates_clients();
}
distributed::DataManager &PublicBase::data_manager() {
return impl_->data_manager();
}
bool PublicBase::MakeSnapshot(GraphDbAccessor &accessor) {
return impl_->MakeSnapshot(accessor);
@ -457,32 +203,4 @@ MasterBase::~MasterBase() { snapshot_creator_ = nullptr; }
SingleNode::SingleNode(Config config)
: MasterBase(std::make_unique<impl::SingleNode>(config)) {}
Master::Master(Config config)
: MasterBase(std::make_unique<impl::Master>(config)) {}
io::network::Endpoint Master::endpoint() const {
return dynamic_cast<impl::Master *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Master::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Master *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
Worker::Worker(Config config)
: PublicBase(std::make_unique<impl::Worker>(config)) {}
io::network::Endpoint Worker::endpoint() const {
return dynamic_cast<impl::Worker *>(impl_.get())->server_.endpoint();
}
io::network::Endpoint Worker::GetEndpoint(int worker_id) {
return dynamic_cast<impl::Worker *>(impl_.get())
->coordination_.GetEndpoint(worker_id);
}
void Worker::WaitForShutdown() {
dynamic_cast<impl::Worker *>(impl_.get())->coordination_.WaitForShutdown();
}
} // namespace database

View File

@ -14,19 +14,6 @@
#include "transactions/engine.hpp"
#include "utils/scheduler.hpp"
namespace distributed {
class DataRpcServer;
class DataRpcClients;
class PlanDispatcher;
class PlanConsumer;
class PullRpcClients;
class ProduceRpcServer;
class UpdatesRpcServer;
class UpdatesRpcClients;
class DataManager;
class IndexRpcClients;
} // namespace distributed
namespace database {
/// Database configuration. Initialized from flags, but modifiable.
@ -76,12 +63,9 @@ struct Config {
*/
class GraphDb {
public:
enum class Type { SINGLE_NODE, DISTRIBUTED_MASTER, DISTRIBUTED_WORKER };
GraphDb() {}
virtual ~GraphDb() {}
virtual Type type() const = 0;
virtual Storage &storage() = 0;
virtual durability::WriteAheadLog &wal() = 0;
virtual tx::Engine &tx_engine() = 0;
@ -94,23 +78,6 @@ class GraphDb {
virtual int WorkerId() const = 0;
virtual std::vector<int> GetWorkerIds() const = 0;
// Supported only in distributed master and worker, not in single-node.
virtual distributed::DataRpcServer &data_server() = 0;
virtual distributed::DataRpcClients &data_clients() = 0;
virtual distributed::UpdatesRpcServer &updates_server() = 0;
virtual distributed::UpdatesRpcClients &updates_clients() = 0;
virtual distributed::DataManager &data_manager() = 0;
// Supported only in distributed master.
virtual distributed::PullRpcClients &pull_clients() = 0;
virtual distributed::PlanDispatcher &plan_dispatcher() = 0;
virtual distributed::IndexRpcClients &index_rpc_clients() = 0;
// Supported only in distributed worker.
// TODO remove once end2end testing is possible.
virtual distributed::ProduceRpcServer &produce_server() = 0;
virtual distributed::PlanConsumer &plan_consumer() = 0;
// Makes a snapshot from the visibility of the given accessor
virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0;
@ -136,7 +103,6 @@ class PrivateBase;
// initialization and cleanup.
class PublicBase : public GraphDb {
public:
Type type() const override;
Storage &storage() override;
durability::WriteAheadLog &wal() override;
tx::Engine &tx_engine() override;
@ -147,16 +113,6 @@ class PublicBase : public GraphDb {
void CollectGarbage() override;
int WorkerId() const override;
std::vector<int> GetWorkerIds() const override;
distributed::DataRpcServer &data_server() override;
distributed::DataRpcClients &data_clients() override;
distributed::PlanDispatcher &plan_dispatcher() override;
distributed::IndexRpcClients &index_rpc_clients() override;
distributed::PlanConsumer &plan_consumer() override;
distributed::PullRpcClients &pull_clients() override;
distributed::ProduceRpcServer &produce_server() override;
distributed::UpdatesRpcServer &updates_server() override;
distributed::UpdatesRpcClients &updates_clients() override;
distributed::DataManager &data_manager() override;
bool is_accepting_transactions() const { return is_accepting_transactions_; }
bool MakeSnapshot(GraphDbAccessor &accessor) override;
@ -188,25 +144,4 @@ class SingleNode : public MasterBase {
public:
explicit SingleNode(Config config = Config());
};
class Master : public MasterBase {
public:
explicit Master(Config config = Config());
/** Gets this master's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
};
class Worker : public impl::PublicBase {
public:
explicit Worker(Config config = Config());
/** Gets this worker's endpoint. */
io::network::Endpoint endpoint() const;
/** Gets the endpoint of the worker with the given id. */
// TODO make const once Coordination::GetEndpoint is const.
io::network::Endpoint GetEndpoint(int worker_id);
void WaitForShutdown();
};
} // namespace database

View File

@ -4,9 +4,6 @@
#include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "storage/address_types.hpp"
#include "storage/edge.hpp"
#include "storage/edge_accessor.hpp"
@ -77,26 +74,6 @@ VertexAccessor GraphDbAccessor::InsertVertex(
return VertexAccessor(vertex_vlist, *this);
}
VertexAccessor GraphDbAccessor::InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
CHECK(worker_id != db().WorkerId())
<< "Not allowed to call InsertVertexIntoRemote for local worker";
gid::Gid gid = db().updates_clients().CreateVertex(
worker_id, transaction_id(), labels, properties);
auto vertex = std::make_unique<Vertex>();
vertex->labels_ = labels;
for (auto &kv : properties) vertex->properties_.set(kv.first, kv.second);
db().data_manager()
.Elements<Vertex>(transaction_id())
.emplace(gid, nullptr, std::move(vertex));
return VertexAccessor({gid, worker_id}, *this);
}
std::experimental::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
gid::Gid gid, bool current_state) {
VertexAccessor record_accessor(db_.storage().LocalAddress<Vertex>(gid),
@ -129,8 +106,6 @@ EdgeAccessor GraphDbAccessor::FindEdge(gid::Gid gid, bool current_state) {
void GraphDbAccessor::BuildIndex(storage::Label label,
storage::Property property) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_.type() != GraphDb::Type::DISTRIBUTED_WORKER)
<< "BuildIndex invoked on worker";
db_.storage().index_build_tx_in_progress_.access().insert(transaction_.id_);
@ -174,16 +149,6 @@ void GraphDbAccessor::BuildIndex(storage::Label label,
// CreateIndex.
GraphDbAccessor dba(db_);
std::experimental::optional<std::vector<utils::Future<bool>>>
index_rpc_completions;
// Notify all workers to start building an index if we are the master since
// they don't have to wait anymore
if (db_.type() == GraphDb::Type::DISTRIBUTED_MASTER) {
index_rpc_completions.emplace(db_.index_rpc_clients().GetBuildIndexFutures(
label, property, transaction_id(), this->db_.WorkerId()));
}
// Add transaction to the build_tx_in_progress as this transaction doesn't
// change data and shouldn't block other parallel index creations
auto read_transaction_id = dba.transaction().id_;
@ -198,21 +163,6 @@ void GraphDbAccessor::BuildIndex(storage::Label label,
dba.PopulateIndex(key);
// Check if all workers sucesfully built their indexes and after this we can
// set the index as built
if (index_rpc_completions) {
// Wait first, check later - so that every thread finishes and none
// terminates - this can probably be optimized in case we fail early so that
// we notify other workers to stop building indexes
for (auto &index_built : *index_rpc_completions) index_built.wait();
for (auto &index_built : *index_rpc_completions) {
if (!index_built.get()) {
db_.storage().label_property_index_.DeleteIndex(key);
throw IndexCreationOnWorkerException("Index exists on a worker");
}
}
}
dba.EnableIndex(key);
dba.Commit();
}
@ -246,7 +196,6 @@ void GraphDbAccessor::UpdateLabelIndices(storage::Label label,
const VertexAccessor &vertex_accessor,
const Vertex *const vertex) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(vertex_accessor.is_local()) << "Only local vertices belong in indexes";
auto *vlist_ptr = vertex_accessor.address().local();
db_.storage().labels_index_.Update(label, vlist_ptr, vertex);
db_.storage().label_property_index_.UpdateOnLabel(label, vlist_ptr, vertex);
@ -256,7 +205,6 @@ void GraphDbAccessor::UpdatePropertyIndex(
storage::Property property, const RecordAccessor<Vertex> &vertex_accessor,
const Vertex *const vertex) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(vertex_accessor.is_local()) << "Only local vertices belong in indexes";
db_.storage().label_property_index_.UpdateOnProperty(
property, vertex_accessor.address().local(), vertex);
}
@ -337,14 +285,6 @@ bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
bool check_empty) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (!vertex_accessor.is_local()) {
auto address = vertex_accessor.address();
db().updates_clients().RemoveVertex(address.worker_id(), transaction_id(),
address.gid(), check_empty);
// We can't know if we are going to be able to remove vertex until deferred
// updates on a remote worker are executed
return true;
}
vertex_accessor.SwitchNew();
// it's possible the vertex was removed already in this transaction
// due to it getting matched multiple times by some patterns
@ -387,66 +327,37 @@ EdgeAccessor GraphDbAccessor::InsertEdge(
storage::EdgeAddress edge_address;
Vertex *from_updated;
if (from.is_local()) {
auto gid = db_.storage().edge_generator_.Next(requested_gid);
edge_address = new mvcc::VersionList<Edge>(
transaction_, gid, from.address(), to.address(), edge_type);
// We need to insert edge_address to edges_ before calling update since
// update can throw and edge_vlist will not be garbage collected if it is
// not in edges_ skiplist.
bool success =
db_.storage().edges_.access().insert(gid, edge_address.local()).second;
CHECK(success) << "Attempting to insert an edge with an existing GID: "
<< gid;
auto gid = db_.storage().edge_generator_.Next(requested_gid);
edge_address = new mvcc::VersionList<Edge>(transaction_, gid, from.address(),
to.address(), edge_type);
// We need to insert edge_address to edges_ before calling update since
// update can throw and edge_vlist will not be garbage collected if it is
// not in edges_ skiplist.
bool success =
db_.storage().edges_.access().insert(gid, edge_address.local()).second;
CHECK(success) << "Attempting to insert an edge with an existing GID: "
<< gid;
from.SwitchNew();
from_updated = &from.update();
from.SwitchNew();
from_updated = &from.update();
// TODO when preparing WAL for distributed, most likely never use
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
// in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, gid, from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
// TODO when preparing WAL for distributed, most likely never use
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
// in/out modification).
wal().Emplace(database::StateDelta::CreateEdge(
transaction_.id_, gid, from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
} else {
edge_address = db().updates_clients().CreateEdge(transaction_id(), from, to,
edge_type);
from_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(from.gid());
// Create an Edge and insert it into the Cache so we see it locally.
db().data_manager()
.Elements<Edge>(transaction_id())
.emplace(
edge_address.gid(), nullptr,
std::make_unique<Edge>(from.address(), to.address(), edge_type));
}
from_updated->out_.emplace(
db_.storage().LocalizedAddressIfPossible(to.address()), edge_address,
edge_type);
Vertex *to_updated;
if (to.is_local()) {
// ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew();
to_updated = &to.update();
} else {
// The RPC call for the `to` side is already handled if `from` is not local.
if (from.is_local() ||
from.address().worker_id() != to.address().worker_id()) {
db().updates_clients().AddInEdge(
transaction_id(), from,
db().storage().GlobalizedAddress(edge_address), to, edge_type);
}
to_updated = db().data_manager()
.Elements<Vertex>(transaction_id())
.FindNew(to.gid());
}
// ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew();
to_updated = &to.update();
to_updated->in_.emplace(
db_.storage().LocalizedAddressIfPossible(from.address()), edge_address,
edge_type);
@ -479,35 +390,16 @@ int64_t GraphDbAccessor::EdgesCount() const {
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
bool remove_in_edge) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
if (edge.is_local()) {
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
edge.address().local()->remove(edge.current_, transaction_);
wal().Emplace(
database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
} else {
auto edge_addr = edge.GlobalAddress();
auto from_addr = db().storage().GlobalizedAddress(edge.from_addr());
CHECK(edge_addr.worker_id() == from_addr.worker_id())
<< "Edge and it's 'from' vertex not on the same worker";
auto to_addr = db().storage().GlobalizedAddress(edge.to_addr());
db().updates_clients().RemoveEdge(transaction_id(), edge_addr.worker_id(),
edge_addr.gid(), from_addr.gid(),
to_addr);
// Another RPC is necessary only if the first did not handle vertices on
// both sides.
if (edge_addr.worker_id() != to_addr.worker_id()) {
db().updates_clients().RemoveInEdge(transaction_id(), to_addr.worker_id(),
to_addr.gid(), edge_addr);
}
}
edge.address().local()->remove(edge.current_, transaction_);
wal().Emplace(database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
}
storage::Label GraphDbAccessor::Label(const std::string &label_name) {

View File

@ -9,7 +9,6 @@
#include "glog/logging.h"
#include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "query/typed_value.hpp"
#include "storage/address_types.hpp"
#include "storage/edge_accessor.hpp"
@ -78,13 +77,6 @@ class GraphDbAccessor {
VertexAccessor InsertVertex(std::experimental::optional<gid::Gid>
requested_gid = std::experimental::nullopt);
/** Creates a new Vertex on the given worker. It is NOT allowed to call this
* function with this worker's id. */
VertexAccessor InsertVertexIntoRemote(
int worker_id, const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/**
* Removes the vertex of the given accessor. If the vertex has any outgoing or
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to

View File

@ -7,7 +7,6 @@
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "storage/property_value.hpp"
#include "utils/serialization.hpp"
namespace database {
/** Describes single change to the database state. Used for durability (WAL) and
@ -132,52 +131,5 @@ struct StateDelta {
storage::Label label;
std::string label_name;
bool check_empty;
private:
friend class boost::serialization::access;
BOOST_SERIALIZATION_SPLIT_MEMBER();
template <class TArchive>
void save(TArchive &ar, const unsigned int) const {
ar &type;
ar &transaction_id;
ar &vertex_id;
ar &edge_id;
ar &edge_address;
ar &vertex_from_id;
ar &vertex_from_address;
ar &vertex_to_id;
ar &vertex_to_address;
ar &edge_type;
ar &edge_type_name;
ar &property;
ar &property_name;
utils::SaveTypedValue(ar, value);
ar &label;
ar &label_name;
ar &check_empty;
}
template <class TArchive>
void load(TArchive &ar, const unsigned int) {
ar &type;
ar &transaction_id;
ar &vertex_id;
ar &edge_id;
ar &edge_address;
ar &vertex_from_id;
ar &vertex_from_address;
ar &vertex_to_id;
ar &vertex_to_address;
ar &edge_type;
ar &edge_type_name;
ar &property;
ar &property_name;
query::TypedValue tv;
utils::LoadTypedValue(ar, tv);
value = tv;
ar &label;
ar &label_name;
ar &check_empty;
}
};
} // namespace database

View File

@ -6,7 +6,6 @@
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/storage.hpp"
#include "mvcc/version_list.hpp"
#include "stats/metrics.hpp"
#include "storage/deferred_deleter.hpp"
#include "storage/edge.hpp"
#include "storage/garbage_collector.hpp"

View File

@ -1,67 +0,0 @@
#pragma once
#include <mutex>
#include "database/storage_gc.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
namespace database {
class StorageGcMaster : public StorageGc {
public:
using StorageGc::StorageGc;
StorageGcMaster(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::Server &rpc_server,
distributed::MasterCoordination &coordination)
: StorageGc(storage, tx_engine, pause_sec),
rpc_server_(rpc_server),
coordination_(coordination) {
rpc_server_.Register<distributed::RanLocalGcRpc>(
[this](const distributed::GcClearedStatusReq &req) {
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
worker_safe_transaction_[req.worker_id] = req.local_oldest_active;
return std::make_unique<distributed::GcClearedStatusRes>();
});
}
~StorageGcMaster() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
rpc_server_.UnRegister<distributed::RanLocalGcRpc>();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// Workers are sending information when it's safe to delete every
// transaction older than oldest_active from their perspective i.e. there
// won't exist another transaction in the future with id larger than or
// equal to oldest_active that might trigger a query into a commit log about
// the state of transactions which we are deleting.
auto safe_transaction = GetClogSafeTransaction(oldest_active);
if (safe_transaction) {
tx::TransactionId min_safe = *safe_transaction;
{
std::unique_lock<std::mutex> lock(worker_safe_transaction_mutex_);
for (auto worker_id : coordination_.GetWorkerIds()) {
// Skip itself
if (worker_id == 0) continue;
min_safe = std::min(min_safe, worker_safe_transaction_[worker_id]);
}
}
// All workers reported back at least once
if (min_safe > 0) {
tx_engine_.GarbageCollectCommitLog(min_safe);
LOG(INFO) << "Clearing master commit log with tx: " << min_safe;
}
}
}
communication::rpc::Server &rpc_server_;
distributed::MasterCoordination &coordination_;
// Mapping of worker ids and oldest active transaction which is safe for
// deletion from worker perspective
std::unordered_map<int, tx::TransactionId> worker_safe_transaction_;
std::mutex worker_safe_transaction_mutex_;
};
} // namespace database

View File

@ -1,46 +0,0 @@
#pragma once
#include "communication/rpc/client_pool.hpp"
#include "database/storage_gc.hpp"
#include "distributed/storage_gc_rpc_messages.hpp"
#include "transactions/engine_worker.hpp"
#include "transactions/transaction.hpp"
namespace database {
class StorageGcWorker : public StorageGc {
public:
StorageGcWorker(Storage &storage, tx::Engine &tx_engine, int pause_sec,
communication::rpc::ClientPool &master_client_pool,
int worker_id)
: StorageGc(storage, tx_engine, pause_sec),
master_client_pool_(master_client_pool),
worker_id_(worker_id) {}
~StorageGcWorker() {
// We have to stop scheduler before destroying this class because otherwise
// a task might try to utilize methods in this class which might cause pure
// virtual method called since they are not implemented for the base class.
scheduler_.Stop();
}
void CollectCommitLogGarbage(tx::TransactionId oldest_active) final {
// We first need to delete transactions that we can delete to be sure that
// the locks are released as well. Otherwise some new transaction might
// try to acquire a lock which hasn't been released (if the transaction
// cache cleaner was not scheduled at this time), and take a look into the
// commit log which no longer contains that transaction id.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.ClearTransactionalCache(oldest_active);
auto safe_to_delete = GetClogSafeTransaction(oldest_active);
if (safe_to_delete) {
master_client_pool_.Call<distributed::RanLocalGcRpc>(*safe_to_delete,
worker_id_);
tx_engine_.GarbageCollectCommitLog(*safe_to_delete);
}
}
communication::rpc::ClientPool &master_client_pool_;
int worker_id_;
};
} // namespace database

View File

@ -1,99 +0,0 @@
#include "glog/logging.h"
#include "database/storage.hpp"
#include "distributed/cache.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <typename TRecord>
TRecord *Cache<TRecord>::FindNew(gid::Gid gid) {
std::lock_guard<std::mutex> guard{lock_};
auto found = cache_.find(gid);
DCHECK(found != cache_.end())
<< "FindNew for uninitialized remote Vertex/Edge";
auto &pair = found->second;
if (!pair.second) {
pair.second = std::unique_ptr<TRecord>(pair.first->CloneData());
}
return pair.second.get();
}
template <typename TRecord>
void Cache<TRecord>::FindSetOldNew(tx::TransactionId tx_id, int worker_id,
gid::Gid gid, TRecord *&old_record,
TRecord *&new_record) {
{
std::lock_guard<std::mutex> guard(lock_);
auto found = cache_.find(gid);
if (found != cache_.end()) {
old_record = found->second.first.get();
new_record = found->second.second.get();
return;
}
}
auto remote = data_clients_.RemoteElement<TRecord>(worker_id, tx_id, gid);
LocalizeAddresses(*remote);
// This logic is a bit strange because we need to make sure that someone
// else didn't get a response and updated the cache before we did and we
// need a lock for that, but we also need to check if we can now return
// that result - otherwise we could get incosistent results for remote
// FindSetOldNew
std::lock_guard<std::mutex> guard(lock_);
auto it_pair = cache_.emplace(
gid, std::make_pair<rec_uptr, rec_uptr>(std::move(remote), nullptr));
old_record = it_pair.first->second.first.get();
new_record = it_pair.first->second.second.get();
}
template <typename TRecord>
void Cache<TRecord>::emplace(gid::Gid gid, rec_uptr old_record,
rec_uptr new_record) {
if (old_record) LocalizeAddresses(*old_record);
if (new_record) LocalizeAddresses(*new_record);
std::lock_guard<std::mutex> guard{lock_};
// We can't replace existing data because some accessors might be using
// it.
// TODO - consider if it's necessary and OK to copy just the data content.
auto found = cache_.find(gid);
if (found != cache_.end())
return;
else
cache_[gid] = std::make_pair(std::move(old_record), std::move(new_record));
}
template <typename TRecord>
void Cache<TRecord>::ClearCache() {
std::lock_guard<std::mutex> guard{lock_};
cache_.clear();
}
template <>
void Cache<Vertex>::LocalizeAddresses(Vertex &vertex) {
auto localize_edges = [this](auto &edges) {
for (auto &element : edges) {
element.vertex = storage_.LocalizedAddressIfPossible(element.vertex);
element.edge = storage_.LocalizedAddressIfPossible(element.edge);
}
};
localize_edges(vertex.in_.storage());
localize_edges(vertex.out_.storage());
}
template <>
void Cache<Edge>::LocalizeAddresses(Edge &edge) {
edge.from_ = storage_.LocalizedAddressIfPossible(edge.from_);
edge.to_ = storage_.LocalizedAddressIfPossible(edge.to_);
}
template class Cache<Vertex>;
template class Cache<Edge>;
} // namespace distributed

View File

@ -1,62 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "distributed/data_rpc_clients.hpp"
#include "storage/gid.hpp"
namespace database {
class Storage;
}
namespace distributed {
/**
* Used for caching Vertices and Edges that are stored on another worker in a
* distributed system. Maps global IDs to (old, new) Vertex/Edge pointer
* pairs. It is possible that either "old" or "new" are nullptrs, but at
* least one must be not-null. The Cache is the owner of TRecord
* objects it points to.
*
* @tparam TRecord - Edge or Vertex
*/
template <typename TRecord>
class Cache {
using rec_uptr = std::unique_ptr<TRecord>;
public:
Cache(database::Storage &storage, distributed::DataRpcClients &data_clients)
: storage_(storage), data_clients_(data_clients) {}
/// Returns the new data for the given ID. Creates it (as copy of old) if
/// necessary.
TRecord *FindNew(gid::Gid gid);
/// For the Vertex/Edge with the given global ID, looks for the data visible
/// from the given transaction's ID and command ID, and caches it. Sets the
/// given pointers to point to the fetched data. Analogue to
/// mvcc::VersionList::find_set_old_new.
void FindSetOldNew(tx::TransactionId tx_id, int worker_id, gid::Gid gid,
TRecord *&old_record, TRecord *&new_record);
/// Sets the given records as (new, old) data for the given gid.
void emplace(gid::Gid gid, rec_uptr old_record, rec_uptr new_record);
/// Removes all the data from the cache.
void ClearCache();
private:
database::Storage &storage_;
std::mutex lock_;
distributed::DataRpcClients &data_clients_;
// TODO it'd be better if we had VertexData and EdgeData in here, as opposed
// to Vertex and Edge.
std::unordered_map<gid::Gid, std::pair<rec_uptr, rec_uptr>> cache_;
// Localizes all the addresses in the record.
void LocalizeAddresses(TRecord &record);
};
} // namespace distributed

View File

@ -1,33 +0,0 @@
#include "distributed/cluster_discovery_master.hpp"
#include "communication/rpc/client_pool.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryMaster::ClusterDiscoveryMaster(
Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients)
: server_(server),
coordination_(coordination),
rpc_worker_clients_(rpc_worker_clients) {
server_.Register<RegisterWorkerRpc>([this](const RegisterWorkerReq &req) {
bool registration_successful =
this->coordination_.RegisterWorker(req.desired_worker_id, req.endpoint);
if (registration_successful) {
rpc_worker_clients_.ExecuteOnWorkers<void>(
0, [req](communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<ClusterDiscoveryRpc>(
req.desired_worker_id, req.endpoint);
CHECK(result) << "ClusterDiscoveryRpc failed";
});
}
return std::make_unique<RegisterWorkerRes>(
registration_successful, this->coordination_.RecoveryInfo(),
this->coordination_.GetWorkers());
});
}
} // namespace distributed

View File

@ -1,27 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/rpc_worker_clients.hpp"
namespace distributed {
using Server = communication::rpc::Server;
/** Handle cluster discovery on master.
*
* Cluster discovery on master handles worker registration and broadcasts new
* worker information to already registered workers, and already registered
* worker information to the new worker.
*/
class ClusterDiscoveryMaster final {
public:
ClusterDiscoveryMaster(Server &server, MasterCoordination &coordination,
RpcWorkerClients &rpc_worker_clients);
private:
Server &server_;
MasterCoordination &coordination_;
RpcWorkerClients &rpc_worker_clients_;
};
} // namespace distributed

View File

@ -1,30 +0,0 @@
#include "distributed/cluster_discovery_worker.hpp"
#include "distributed/coordination_rpc_messages.hpp"
namespace distributed {
using Server = communication::rpc::Server;
ClusterDiscoveryWorker::ClusterDiscoveryWorker(
Server &server, WorkerCoordination &coordination,
communication::rpc::ClientPool &client_pool)
: server_(server), coordination_(coordination), client_pool_(client_pool) {
server_.Register<ClusterDiscoveryRpc>([this](const ClusterDiscoveryReq &req) {
this->coordination_.RegisterWorker(req.worker_id, req.endpoint);
return std::make_unique<ClusterDiscoveryRes>();
});
}
void ClusterDiscoveryWorker::RegisterWorker(int worker_id) {
auto result =
client_pool_.Call<RegisterWorkerRpc>(worker_id, server_.endpoint());
CHECK(result) << "RegisterWorkerRpc failed";
CHECK(result->registration_successful) << "Unable to assign requested ID ("
<< worker_id << ") to worker!";
for (auto &kv : result->workers) {
coordination_.RegisterWorker(kv.first, kv.second);
}
recovery_info_ = result->recovery_info;
}
} // namespace distributed

View File

@ -1,43 +0,0 @@
#pragma once
#include <experimental/optional>
#include "communication/rpc/client_pool.hpp"
#include "communication/rpc/server.hpp"
#include "distributed/coordination_worker.hpp"
#include "durability/recovery.hpp"
namespace distributed {
using Server = communication::rpc::Server;
using ClientPool = communication::rpc::ClientPool;
/** Handle cluster discovery on worker.
*
* Cluster discovery on worker handles worker registration by sending an rpc
* request to master and processes received rpc response with other worker
* information.
*/
class ClusterDiscoveryWorker final {
public:
ClusterDiscoveryWorker(Server &server, WorkerCoordination &coordination,
ClientPool &client_pool);
/**
* Registers a worker with the master.
*
* @param worker_id - Desired ID. If master can't assign the desired worker
* id, worker will exit.
*/
void RegisterWorker(int worker_id);
/** Returns the recovery info. Valid only after registration. */
auto recovery_info() const { return recovery_info_; }
private:
Server &server_;
WorkerCoordination &coordination_;
communication::rpc::ClientPool &client_pool_;
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,34 +0,0 @@
#include "glog/logging.h"
#include "distributed/coordination.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
Coordination::Coordination(const Endpoint &master_endpoint) {
// The master is always worker 0.
workers_.emplace(0, master_endpoint);
}
Endpoint Coordination::GetEndpoint(int worker_id) {
auto found = workers_.find(worker_id);
CHECK(found != workers_.end()) << "No endpoint registered for worker id: "
<< worker_id;
return found->second;
}
std::vector<int> Coordination::GetWorkerIds() const {
std::vector<int> worker_ids;
for (auto worker : workers_) worker_ids.push_back(worker.first);
return worker_ids;
}
void Coordination::AddWorker(int worker_id, Endpoint endpoint) {
workers_.emplace(worker_id, endpoint);
}
std::unordered_map<int, Endpoint> Coordination::GetWorkers() {
return workers_;
}
} // namespace distributed

View File

@ -1,36 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "io/network/endpoint.hpp"
namespace distributed {
/** Coordination base class. This class is not thread safe. */
class Coordination {
public:
explicit Coordination(const io::network::Endpoint &master_endpoint);
/** Gets the endpoint for the given worker ID from the master. */
io::network::Endpoint GetEndpoint(int worker_id);
/** Returns all workers id, this includes master id(0). */
std::vector<int> GetWorkerIds() const;
/** Gets the mapping of worker id to worker endpoint including master (worker
* id = 0).
*/
std::unordered_map<int, io::network::Endpoint> GetWorkers();
protected:
~Coordination() {}
/** Adds a worker to coordination. */
void AddWorker(int worker_id, io::network::Endpoint endpoint);
private:
std::unordered_map<int, io::network::Endpoint> workers_;
};
} // namespace distributed

View File

@ -1,83 +0,0 @@
#include <chrono>
#include <thread>
#include "glog/logging.h"
#include "communication/rpc/client.hpp"
#include "distributed/coordination_master.hpp"
#include "distributed/coordination_rpc_messages.hpp"
#include "utils/network.hpp"
namespace distributed {
MasterCoordination::MasterCoordination(const Endpoint &master_endpoint)
: Coordination(master_endpoint) {}
bool MasterCoordination::RegisterWorker(int desired_worker_id,
Endpoint endpoint) {
// Worker's can't register before the recovery phase on the master is done to
// ensure the whole cluster is in a consistent state.
while (true) {
{
std::lock_guard<std::mutex> guard(lock_);
if (recovery_done_) break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
// Check if the desired worker id already exists.
if (workers.find(desired_worker_id) != workers.end()) {
LOG(WARNING) << "Unable to assign requested ID (" << desired_worker_id
<< ") to worker at: " << endpoint;
// If the desired worker ID is already assigned, return -1 and don't add
// that worker to master coordination.
return false;
}
AddWorker(desired_worker_id, endpoint);
return true;
}
Endpoint MasterCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
MasterCoordination::~MasterCoordination() {
using namespace std::chrono_literals;
std::lock_guard<std::mutex> guard(lock_);
auto workers = GetWorkers();
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
communication::rpc::Client client(kv.second);
auto result = client.Call<StopWorkerRpc>();
CHECK(result) << "StopWorkerRpc failed for worker: " << kv.first;
}
// Make sure all workers have died.
for (const auto &kv : workers) {
// Skip master (self).
if (kv.first == 0) continue;
while (utils::CanEstablishConnection(kv.second))
std::this_thread::sleep_for(0.5s);
}
}
void MasterCoordination::SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info) {
std::lock_guard<std::mutex> guard(lock_);
recovery_done_ = true;
recovery_info_ = info;
}
std::experimental::optional<durability::RecoveryInfo>
MasterCoordination::RecoveryInfo() const {
std::lock_guard<std::mutex> guard(lock_);
CHECK(recovery_done_) << "RecoveryInfo requested before it's available";
return recovery_info_;
}
} // namespace distributed

View File

@ -1,50 +0,0 @@
#pragma once
#include <experimental/optional>
#include <mutex>
#include <unordered_map>
#include "distributed/coordination.hpp"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
namespace distributed {
using Endpoint = io::network::Endpoint;
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Master side. */
class MasterCoordination final : public Coordination {
public:
explicit MasterCoordination(const Endpoint &master_endpoint);
/** Shuts down all the workers and this master server. */
~MasterCoordination();
/** Registers a new worker with this master coordination.
*
* @param desired_worker_id - The ID the worker would like to have.
* @return True if the desired ID for the worker is available, or false
* if the desired ID is already taken.
*/
bool RegisterWorker(int desired_worker_id, Endpoint endpoint);
Endpoint GetEndpoint(int worker_id);
/// Sets the recovery info. nullopt indicates nothing was recovered.
void SetRecoveryInfo(
std::experimental::optional<durability::RecoveryInfo> info);
std::experimental::optional<durability::RecoveryInfo> RecoveryInfo() const;
private:
// Most master functions aren't thread-safe.
mutable std::mutex lock_;
/// Durabiliry recovery info.
/// Indicates if the recovery phase is done.
bool recovery_done_{false};
/// If nullopt nothing was recovered.
std::experimental::optional<durability::RecoveryInfo> recovery_info_;
};
} // namespace distributed

View File

@ -1,95 +0,0 @@
#pragma once
#include <experimental/optional>
#include <unordered_map>
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "boost/serialization/unordered_map.hpp"
#include "communication/rpc/messages.hpp"
#include "durability/recovery.hpp"
#include "io/network/endpoint.hpp"
namespace distributed {
using communication::rpc::Message;
using Endpoint = io::network::Endpoint;
struct RegisterWorkerReq : public Message {
// Set desired_worker_id to -1 to get an automatically assigned ID.
RegisterWorkerReq(int desired_worker_id, const Endpoint &endpoint)
: desired_worker_id(desired_worker_id), endpoint(endpoint) {}
int desired_worker_id;
Endpoint endpoint;
private:
friend class boost::serialization::access;
RegisterWorkerReq() {}
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<Message>(*this);
ar &desired_worker_id;
ar &endpoint;
}
};
struct RegisterWorkerRes : public Message {
RegisterWorkerRes(
bool registration_successful,
std::experimental::optional<durability::RecoveryInfo> recovery_info,
std::unordered_map<int, Endpoint> workers)
: registration_successful(registration_successful),
recovery_info(recovery_info),
workers(std::move(workers)) {}
bool registration_successful;
std::experimental::optional<durability::RecoveryInfo> recovery_info;
std::unordered_map<int, Endpoint> workers;
private:
friend class boost::serialization::access;
RegisterWorkerRes() {}
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<Message>(*this);
ar &registration_successful;
ar &recovery_info;
ar &workers;
}
};
struct ClusterDiscoveryReq : public Message {
ClusterDiscoveryReq(int worker_id, Endpoint endpoint)
: worker_id(worker_id), endpoint(endpoint) {}
int worker_id;
Endpoint endpoint;
private:
friend class boost::serialization::access;
ClusterDiscoveryReq() {}
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<Message>(*this);
ar &worker_id;
ar &endpoint;
}
};
RPC_NO_MEMBER_MESSAGE(ClusterDiscoveryRes);
RPC_NO_MEMBER_MESSAGE(StopWorkerReq);
RPC_NO_MEMBER_MESSAGE(StopWorkerRes);
using RegisterWorkerRpc =
communication::rpc::RequestResponse<RegisterWorkerReq, RegisterWorkerRes>;
using StopWorkerRpc =
communication::rpc::RequestResponse<StopWorkerReq, StopWorkerRes>;
using ClusterDiscoveryRpc =
communication::rpc::RequestResponse<ClusterDiscoveryReq,
ClusterDiscoveryRes>;
} // namespace distributed

View File

@ -1,47 +0,0 @@
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include "glog/logging.h"
#include "distributed/coordination_rpc_messages.hpp"
#include "distributed/coordination_worker.hpp"
namespace distributed {
using namespace std::literals::chrono_literals;
WorkerCoordination::WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint)
: Coordination(master_endpoint), server_(server) {}
void WorkerCoordination::RegisterWorker(int worker_id, Endpoint endpoint) {
std::lock_guard<std::mutex> guard(lock_);
AddWorker(worker_id, endpoint);
}
void WorkerCoordination::WaitForShutdown() {
using namespace std::chrono_literals;
std::mutex mutex;
std::condition_variable cv;
bool shutdown = false;
server_.Register<StopWorkerRpc>([&](const StopWorkerReq &) {
std::unique_lock<std::mutex> lk(mutex);
shutdown = true;
lk.unlock();
cv.notify_one();
return std::make_unique<StopWorkerRes>();
});
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&shutdown] { return shutdown; });
}
Endpoint WorkerCoordination::GetEndpoint(int worker_id) {
std::lock_guard<std::mutex> guard(lock_);
return Coordination::GetEndpoint(worker_id);
}
} // namespace distributed

View File

@ -1,33 +0,0 @@
#pragma once
#include <mutex>
#include <unordered_map>
#include "communication/rpc/server.hpp"
#include "distributed/coordination.hpp"
namespace distributed {
/** Handles worker registration, getting of other workers' endpoints and
* coordinated shutdown in a distributed memgraph. Worker side. */
class WorkerCoordination final : public Coordination {
using Endpoint = io::network::Endpoint;
public:
WorkerCoordination(communication::rpc::Server &server,
const Endpoint &master_endpoint);
/** Registers the worker with the given endpoint. */
void RegisterWorker(int worker_id, Endpoint endpoint);
/** Starts listening for a remote shutdown command (issued by the master).
* Blocks the calling thread until that has finished. */
void WaitForShutdown();
Endpoint GetEndpoint(int worker_id);
private:
communication::rpc::Server &server_;
mutable std::mutex lock_;
};
} // namespace distributed

View File

@ -1,54 +0,0 @@
#include "database/storage.hpp"
#include "distributed/data_manager.hpp"
namespace distributed {
template <typename TRecord>
Cache<TRecord> &DataManager::GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found != access.end()) return found->second;
return access
.emplace(
tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_.storage()), std::ref(data_clients_)))
.first->second;
}
template <>
Cache<Vertex> &DataManager::Elements<Vertex>(tx::TransactionId tx_id) {
return GetCache(vertices_caches_, tx_id);
}
template <>
Cache<Edge> &DataManager::Elements<Edge>(tx::TransactionId tx_id) {
return GetCache(edges_caches_, tx_id);
}
DataManager::DataManager(database::GraphDb &db,
distributed::DataRpcClients &data_clients)
: db_(db), data_clients_(data_clients) {}
void DataManager::ClearCacheForSingleTransaction(tx::TransactionId tx_id) {
Elements<Vertex>(tx_id).ClearCache();
Elements<Edge>(tx_id).ClearCache();
}
void DataManager::ClearTransactionalCache(tx::TransactionId oldest_active) {
auto vertex_access = vertices_caches_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edges_caches_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
} // namespace distributed

View File

@ -1,45 +0,0 @@
#pragma once
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "distributed/cache.hpp"
#include "distributed/data_rpc_clients.hpp"
#include "transactions/type.hpp"
class Vertex;
class Edge;
namespace distributed {
/// Handles remote data caches for edges and vertices, per transaction.
class DataManager {
template <typename TRecord>
using CacheT = ConcurrentMap<tx::TransactionId, Cache<TRecord>>;
// Helper, gets or inserts a data cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &GetCache(CacheT<TRecord> &collection,
tx::TransactionId tx_id);
public:
DataManager(database::GraphDb &db, distributed::DataRpcClients &data_clients);
/// Gets or creates the remote vertex/edge cache for the given transaction.
template <typename TRecord>
Cache<TRecord> &Elements(tx::TransactionId tx_id);
/// Removes all the caches for a single transaction.
void ClearCacheForSingleTransaction(tx::TransactionId tx_id);
/// Clears the cache of local transactions that have expired. The signature of
/// this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
DataRpcClients &data_clients_;
CacheT<Vertex> vertices_caches_;
CacheT<Edge> edges_caches_;
};
} // namespace distributed

View File

@ -1,27 +0,0 @@
#include "distributed/data_rpc_clients.hpp"
#include "distributed/data_rpc_messages.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
template <>
std::unique_ptr<Edge> DataRpcClients::RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<EdgeRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "EdgeRpc failed";
return std::move(response->name_output_);
}
template <>
std::unique_ptr<Vertex> DataRpcClients::RemoteElement(
int worker_id, tx::TransactionId tx_id, gid::Gid gid) {
auto response =
clients_.GetClientPool(worker_id).Call<VertexRpc>(TxGidPair{tx_id, gid});
CHECK(response) << "VertexRpc failed";
return std::move(response->name_output_);
}
} // namespace distributed

View File

@ -1,28 +0,0 @@
#pragma once
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides access to other worker's data.
class DataRpcClients {
public:
DataRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Returns a remote worker's record (vertex/edge) data for the given params.
/// That worker must own the vertex/edge for the given id, and that vertex
/// must be visible in given transaction.
template <typename TRecord>
std::unique_ptr<TRecord> RemoteElement(int worker_id,
tx::TransactionId tx_id,
gid::Gid gid);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,68 +0,0 @@
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "storage/edge.hpp"
#include "storage/gid.hpp"
#include "storage/vertex.hpp"
#include "transactions/type.hpp"
namespace distributed {
struct TxGidPair {
tx::TransactionId tx_id;
gid::Gid gid;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &tx_id;
ar &gid;
}
};
#define MAKE_RESPONSE(type, name) \
class type##Res : public communication::rpc::Message { \
public: \
type##Res() {} \
type##Res(const type *name, int worker_id) \
: name_input_(name), worker_id_(worker_id) {} \
\
template <class TArchive> \
void save(TArchive &ar, unsigned int) const { \
ar << boost::serialization::base_object< \
const communication::rpc::Message>(*this); \
Save##type(ar, *name_input_, worker_id_); \
} \
\
template <class TArchive> \
void load(TArchive &ar, unsigned int) { \
ar >> boost::serialization::base_object<communication::rpc::Message>( \
*this); \
auto v = Load##type(ar); \
v.swap(name_output_); \
} \
BOOST_SERIALIZATION_SPLIT_MEMBER() \
\
const type *name_input_; \
int worker_id_; \
std::unique_ptr<type> name_output_; \
};
MAKE_RESPONSE(Vertex, vertex)
MAKE_RESPONSE(Edge, edge)
#undef MAKE_RESPONSE
RPC_SINGLE_MEMBER_MESSAGE(VertexReq, TxGidPair);
RPC_SINGLE_MEMBER_MESSAGE(EdgeReq, TxGidPair);
using VertexRpc = communication::rpc::RequestResponse<VertexReq, VertexRes>;
using EdgeRpc = communication::rpc::RequestResponse<EdgeReq, EdgeRes>;
} // namespace distributed

View File

@ -1,29 +0,0 @@
#include <memory>
#include "data_rpc_server.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/data_rpc_messages.hpp"
namespace distributed {
DataRpcServer::DataRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<VertexRpc>(
[this](const VertexReq &req) {
database::GraphDbAccessor dba(db_, req.member.tx_id);
auto vertex = dba.FindVertex(req.member.gid, false);
CHECK(vertex.GetOld())
<< "Old record must exist when sending vertex by RPC";
return std::make_unique<VertexRes>(vertex.GetOld(), db_.WorkerId());
});
rpc_server_.Register<EdgeRpc>([this](const EdgeReq &req) {
database::GraphDbAccessor dba(db_, req.member.tx_id);
auto edge = dba.FindEdge(req.member.gid, false);
CHECK(edge.GetOld()) << "Old record must exist when sending edge by RPC";
return std::make_unique<EdgeRes>(edge.GetOld(), db_.WorkerId());
});
}
} // namespace distributed

View File

@ -1,17 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
namespace distributed {
/// Serves this worker's data to others.
class DataRpcServer {
public:
DataRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,25 +0,0 @@
#include "distributed/durability_rpc_clients.hpp"
#include "distributed/durability_rpc_messages.hpp"
#include "transactions/transaction.hpp"
#include "utils/future.hpp"
namespace distributed {
utils::Future<bool> DurabilityRpcClients::MakeSnapshot(tx::TransactionId tx) {
return utils::make_future(std::async(std::launch::async, [this, tx] {
auto futures = clients_.ExecuteOnWorkers<bool>(
0, [tx](communication::rpc::ClientPool &client_pool) {
auto res = client_pool.Call<MakeSnapshotRpc>(tx);
if (res == nullptr) return false;
return res->member;
});
bool created = true;
for (auto &future : futures) {
created &= future.get();
}
return created;
}));
}
} // namespace distributed

View File

@ -1,28 +0,0 @@
#pragma once
#include <future>
#include <mutex>
#include <utility>
#include "distributed/rpc_worker_clients.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Provides an ability to trigger snapshooting on other workers.
class DurabilityRpcClients {
public:
DurabilityRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
// Sends a snapshot request to workers and returns a future which becomes true
// if all workers sucesfully completed their snapshot creation, false
// otherwise
// @param tx - transaction from which to take db snapshot
utils::Future<bool> MakeSnapshot(tx::TransactionId tx);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,17 +0,0 @@
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "transactions/transaction.hpp"
namespace distributed {
RPC_SINGLE_MEMBER_MESSAGE(MakeSnapshotReq, tx::TransactionId);
RPC_SINGLE_MEMBER_MESSAGE(MakeSnapshotRes, bool);
using MakeSnapshotRpc =
communication::rpc::RequestResponse<MakeSnapshotReq, MakeSnapshotRes>;
} // namespace distributed

View File

@ -1,18 +0,0 @@
#include "distributed/durability_rpc_server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/durability_rpc_messages.hpp"
namespace distributed {
DurabilityRpcServer::DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<MakeSnapshotRpc>([this](const MakeSnapshotReq &req) {
database::GraphDbAccessor dba(this->db_, req.member);
return std::make_unique<MakeSnapshotRes>(this->db_.MakeSnapshot(dba));
});
}
} // namespace distributed

View File

@ -1,21 +0,0 @@
#pragma once
#include "communication/rpc/server.hpp"
namespace database {
class GraphDb;
};
namespace distributed {
class DurabilityRpcServer {
public:
DurabilityRpcServer(database::GraphDb &db,
communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,32 +0,0 @@
#pragma once
#include <memory>
#include <string>
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
namespace distributed {
struct IndexLabelPropertyTx {
storage::Label label;
storage::Property property;
tx::TransactionId tx_id;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &label;
ar &property;
ar &tx_id;
}
};
RPC_SINGLE_MEMBER_MESSAGE(BuildIndexReq, IndexLabelPropertyTx);
RPC_NO_MEMBER_MESSAGE(BuildIndexRes);
using BuildIndexRpc =
communication::rpc::RequestResponse<BuildIndexReq, BuildIndexRes>;
} // namespace distributed

View File

@ -1,33 +0,0 @@
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/index_rpc_server.hpp"
namespace distributed {
IndexRpcServer::IndexRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db), rpc_server_(server) {
rpc_server_.Register<BuildIndexRpc>([this](const BuildIndexReq &req) {
database::LabelPropertyIndex::Key key{req.member.label,
req.member.property};
database::GraphDbAccessor dba(db_, req.member.tx_id);
if (db_.storage().label_property_index_.CreateIndex(key) == false) {
// If we are a distributed worker we just have to wait till the index
// (which should be in progress of being created) is created so that our
// return guarantess that the index has been built - this assumes that
// no worker thread that is creating an index will fail
while (!dba.LabelPropertyIndexExists(key.label_, key.property_)) {
// TODO reconsider this constant, currently rule-of-thumb chosen
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
} else {
dba.PopulateIndex(key);
dba.EnableIndex(key);
}
return std::make_unique<BuildIndexRes>();
});
}
} // namespace distributed

View File

@ -1,22 +0,0 @@
#pragma once
namespace communication::rpc {
class Server;
}
namespace database {
class GraphDb;
}
namespace distributed {
class IndexRpcServer {
public:
IndexRpcServer(database::GraphDb &db, communication::rpc::Server &server);
private:
database::GraphDb &db_;
communication::rpc::Server &rpc_server_;
};
} // namespace distributed

View File

@ -1,39 +0,0 @@
#include "distributed/plan_consumer.hpp"
namespace distributed {
PlanConsumer::PlanConsumer(communication::rpc::Server &server)
: server_(server) {
server_.Register<DistributedPlanRpc>([this](const DispatchPlanReq &req) {
plan_cache_.access().insert(
req.plan_id_,
std::make_unique<PlanPack>(
req.plan_, req.symbol_table_,
std::move(const_cast<DispatchPlanReq &>(req).storage_)));
return std::make_unique<DispatchPlanRes>();
});
server_.Register<RemovePlanRpc>([this](const RemovePlanReq &req) {
plan_cache_.access().remove(req.member);
return std::make_unique<RemovePlanRes>();
});
}
PlanConsumer::PlanPack &PlanConsumer::PlanForId(int64_t plan_id) const {
auto accessor = plan_cache_.access();
auto found = accessor.find(plan_id);
CHECK(found != accessor.end())
<< "Missing plan and symbol table for plan id: " << plan_id;
return *found->second;
}
std::vector<int64_t> PlanConsumer::CachedPlanIds() const {
std::vector<int64_t> plan_ids;
auto access = plan_cache_.access();
plan_ids.reserve(access.size());
for (auto &kv : access) plan_ids.emplace_back(kv.first);
return plan_ids;
}
} // namespace distributed

View File

@ -1,44 +0,0 @@
#pragma once
#include <vector>
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan consumption from master. Creates and holds a local cache of
* plans. Worker side. */
class PlanConsumer {
public:
struct PlanPack {
PlanPack(std::shared_ptr<query::plan::LogicalOperator> plan,
SymbolTable symbol_table, AstTreeStorage storage)
: plan(plan),
symbol_table(std::move(symbol_table)),
storage(std::move(storage)) {}
std::shared_ptr<query::plan::LogicalOperator> plan;
SymbolTable symbol_table;
const AstTreeStorage storage;
};
explicit PlanConsumer(communication::rpc::Server &server);
/** Return cached plan and symbol table for a given plan id. */
PlanPack &PlanForId(int64_t plan_id) const;
/** Return the ids of all the cached plans. For testing. */
std::vector<int64_t> CachedPlanIds() const;
private:
communication::rpc::Server &server_;
// TODO remove unique_ptr. This is to get it to work, emplacing into a
// ConcurrentMap is tricky.
mutable ConcurrentMap<int64_t, std::unique_ptr<PlanPack>> plan_cache_;
};
} // namespace distributed

View File

@ -1,35 +0,0 @@
#include <distributed/plan_dispatcher.hpp>
namespace distributed {
PlanDispatcher::PlanDispatcher(RpcWorkerClients &clients) : clients_(clients) {}
void PlanDispatcher::DispatchPlan(
int64_t plan_id, std::shared_ptr<query::plan::LogicalOperator> plan,
const SymbolTable &symbol_table) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id, plan,
symbol_table](communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<DistributedPlanRpc>(plan_id, plan, symbol_table);
CHECK(result) << "DistributedPlanRpc failed";
});
for (auto &future : futures) {
future.wait();
}
}
void PlanDispatcher::RemovePlan(int64_t plan_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [plan_id](communication::rpc::ClientPool &client_pool) {
auto result = client_pool.Call<RemovePlanRpc>(plan_id);
CHECK(result) << "Failed to remove plan from worker";
});
for (auto &future : futures) {
future.wait();
}
}
} // namespace distributed

View File

@ -1,30 +0,0 @@
#pragma once
#include "distributed/coordination.hpp"
#include "distributed/plan_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
/** Handles plan dispatching to all workers. Uses MasterCoordination to
* acomplish that. Master side.
*/
class PlanDispatcher {
public:
explicit PlanDispatcher(RpcWorkerClients &clients);
/** Dispatch a plan to all workers and wait for their acknowledgement. */
void DispatchPlan(int64_t plan_id,
std::shared_ptr<query::plan::LogicalOperator> plan,
const SymbolTable &symbol_table);
/** Remove a plan from all workers and wait for their acknowledgement. */
void RemovePlan(int64_t plan_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,63 +0,0 @@
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp"
namespace distributed {
using communication::rpc::Message;
using SymbolTable = query::SymbolTable;
using AstTreeStorage = query::AstTreeStorage;
struct DispatchPlanReq : public Message {
DispatchPlanReq() {}
DispatchPlanReq(int64_t plan_id,
std::shared_ptr<query::plan::LogicalOperator> plan,
SymbolTable symbol_table)
: plan_id_(plan_id), plan_(plan), symbol_table_(symbol_table) {}
int64_t plan_id_;
std::shared_ptr<query::plan::LogicalOperator> plan_;
SymbolTable symbol_table_;
AstTreeStorage storage_;
private:
friend class boost::serialization::access;
BOOST_SERIALIZATION_SPLIT_MEMBER();
template <class TArchive>
void save(TArchive &ar, const unsigned int) const {
ar &boost::serialization::base_object<Message>(*this);
ar &plan_id_;
ar &plan_;
ar &symbol_table_;
}
template <class TArchive>
void load(TArchive &ar, const unsigned int) {
ar &boost::serialization::base_object<Message>(*this);
ar &plan_id_;
ar &plan_;
ar &symbol_table_;
storage_ = std::move(
ar.template get_helper<AstTreeStorage>(AstTreeStorage::kHelperId));
}
};
RPC_NO_MEMBER_MESSAGE(DispatchPlanRes);
using DistributedPlanRpc =
communication::rpc::RequestResponse<DispatchPlanReq, DispatchPlanRes>;
RPC_SINGLE_MEMBER_MESSAGE(RemovePlanReq, int64_t);
RPC_NO_MEMBER_MESSAGE(RemovePlanRes);
using RemovePlanRpc =
communication::rpc::RequestResponse<RemovePlanReq, RemovePlanRes>;
} // namespace distributed

View File

@ -1,169 +0,0 @@
#include "distributed/produce_rpc_server.hpp"
#include "distributed/data_manager.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "transactions/engine_worker.hpp"
namespace distributed {
ProduceRpcServer::OngoingProduce::OngoingProduce(
database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols)
: dba_{db, tx_id},
context_(dba_),
pull_symbols_(std::move(pull_symbols)),
frame_(symbol_table.max_position()),
cursor_(op->MakeCursor(dba_)) {
context_.symbol_table_ = std::move(symbol_table);
context_.parameters_ = std::move(parameters);
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::Pull() {
if (!accumulation_.empty()) {
auto results = std::move(accumulation_.back());
accumulation_.pop_back();
for (auto &element : results) {
try {
query::ReconstructTypedValue(element);
} catch (query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
return std::make_pair(std::move(results), cursor_state_);
}
}
return std::make_pair(std::move(results), PullState::CURSOR_IN_PROGRESS);
}
return PullOneFromCursor();
}
PullState ProduceRpcServer::OngoingProduce::Accumulate() {
while (true) {
auto result = PullOneFromCursor();
if (result.second != PullState::CURSOR_IN_PROGRESS)
return result.second;
else
accumulation_.emplace_back(std::move(result.first));
}
}
std::pair<std::vector<query::TypedValue>, PullState>
ProduceRpcServer::OngoingProduce::PullOneFromCursor() {
std::vector<query::TypedValue> results;
// Check if we already exhausted this cursor (or it entered an error
// state). This happens when we accumulate before normal pull.
if (cursor_state_ != PullState::CURSOR_IN_PROGRESS) {
return std::make_pair(results, cursor_state_);
}
try {
if (cursor_->Pull(frame_, context_)) {
results.reserve(pull_symbols_.size());
for (const auto &symbol : pull_symbols_) {
results.emplace_back(std::move(frame_[symbol]));
}
} else {
cursor_state_ = PullState::CURSOR_EXHAUSTED;
}
} catch (const mvcc::SerializationError &) {
cursor_state_ = PullState::SERIALIZATION_ERROR;
} catch (const LockTimeoutException &) {
cursor_state_ = PullState::LOCK_TIMEOUT_ERROR;
} catch (const RecordDeletedError &) {
cursor_state_ = PullState::UPDATE_DELETED_ERROR;
} catch (const query::ReconstructionException &) {
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
} catch (const query::RemoveAttachedVertexException &) {
cursor_state_ = PullState::UNABLE_TO_DELETE_VERTEX_ERROR;
} catch (const query::QueryRuntimeException &) {
cursor_state_ = PullState::QUERY_ERROR;
} catch (const query::HintedAbortError &) {
cursor_state_ = PullState::HINTED_ABORT_ERROR;
}
return std::make_pair(std::move(results), cursor_state_);
}
ProduceRpcServer::ProduceRpcServer(
database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer)
: db_(db),
produce_rpc_server_(server),
plan_consumer_(plan_consumer),
tx_engine_(tx_engine) {
produce_rpc_server_.Register<PullRpc>([this](const PullReq &req) {
return std::make_unique<PullRes>(Pull(req));
});
produce_rpc_server_.Register<TransactionCommandAdvancedRpc>(
[this](const TransactionCommandAdvancedReq &req) {
tx_engine_.UpdateCommand(req.member);
db_.data_manager().ClearCacheForSingleTransaction(req.member);
return std::make_unique<TransactionCommandAdvancedRes>();
});
}
void ProduceRpcServer::FinishAndClearOngoingProducePlans(
tx::TransactionId tx_id) {
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
for (auto it = ongoing_produces_.begin(); it != ongoing_produces_.end();) {
if (it->first.first == tx_id) {
it = ongoing_produces_.erase(it);
} else {
++it;
}
}
}
ProduceRpcServer::OngoingProduce &ProduceRpcServer::GetOngoingProduce(
const PullReq &req) {
auto key_pair = std::make_pair(req.tx_id, req.plan_id);
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
auto found = ongoing_produces_.find(key_pair);
if (found != ongoing_produces_.end()) {
return found->second;
}
if (db_.type() == database::GraphDb::Type::DISTRIBUTED_WORKER) {
// On the worker cache the snapshot to have one RPC less.
dynamic_cast<tx::WorkerEngine &>(tx_engine_)
.RunningTransaction(req.tx_id, req.tx_snapshot);
}
auto &plan_pack = plan_consumer_.PlanForId(req.plan_id);
return ongoing_produces_
.emplace(std::piecewise_construct, std::forward_as_tuple(key_pair),
std::forward_as_tuple(db_, req.tx_id, plan_pack.plan,
plan_pack.symbol_table, req.params,
req.symbols))
.first->second;
}
PullResData ProduceRpcServer::Pull(const PullReq &req) {
auto &ongoing_produce = GetOngoingProduce(req);
PullResData result{db_.WorkerId(), req.send_old, req.send_new};
result.state_and_frames.pull_state = PullState::CURSOR_IN_PROGRESS;
if (req.accumulate) {
result.state_and_frames.pull_state = ongoing_produce.Accumulate();
// If an error ocurred, we need to return that error.
if (result.state_and_frames.pull_state != PullState::CURSOR_EXHAUSTED) {
return result;
}
}
for (int i = 0; i < req.batch_size; ++i) {
auto pull_result = ongoing_produce.Pull();
result.state_and_frames.pull_state = pull_result.second;
if (pull_result.second != PullState::CURSOR_IN_PROGRESS) break;
result.state_and_frames.frames.emplace_back(std::move(pull_result.first));
}
return result;
}
} // namespace distributed

View File

@ -1,89 +0,0 @@
#pragma once
#include <cstdint>
#include <map>
#include <mutex>
#include <utility>
#include <vector>
#include "communication/rpc/server.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "distributed/plan_consumer.hpp"
#include "query/context.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/interpret/frame.hpp"
#include "query/parameters.hpp"
#include "query/plan/operator.hpp"
#include "query/typed_value.hpp"
#include "transactions/engine.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// Handles the execution of a plan on the worker, requested by the remote
/// master. Assumes that (tx_id, plan_id) uniquely identifies an execution, and
/// that there will never be parallel requests for the same execution thus
/// identified.
class ProduceRpcServer {
/// Encapsulates a Cursor execution in progress. Can be used for pulling a
/// single result from the execution, or pulling all and accumulating the
/// results. Accumulations are used for synchronizing updates in distributed
/// MG (see query::plan::Synchronize).
class OngoingProduce {
public:
OngoingProduce(database::GraphDb &db, tx::TransactionId tx_id,
std::shared_ptr<query::plan::LogicalOperator> op,
query::SymbolTable symbol_table, Parameters parameters,
std::vector<query::Symbol> pull_symbols);
/// Returns a vector of typed values (one for each `pull_symbol`), and an
/// indication of the pull result. The result data is valid only if the
/// returned state is CURSOR_IN_PROGRESS.
std::pair<std::vector<query::TypedValue>, PullState> Pull();
/// Accumulates all the frames pulled from the cursor and returns
/// CURSOR_EXHAUSTED. If an error occurs, an appropriate value is returned.
PullState Accumulate();
private:
database::GraphDbAccessor dba_;
query::Context context_;
std::vector<query::Symbol> pull_symbols_;
query::Frame frame_;
PullState cursor_state_{PullState::CURSOR_IN_PROGRESS};
std::vector<std::vector<query::TypedValue>> accumulation_;
std::unique_ptr<query::plan::Cursor> cursor_;
/// Pulls and returns a single result from the cursor.
std::pair<std::vector<query::TypedValue>, PullState> PullOneFromCursor();
};
public:
ProduceRpcServer(database::GraphDb &db, tx::Engine &tx_engine,
communication::rpc::Server &server,
const distributed::PlanConsumer &plan_consumer);
/// Finish and clear ongoing produces for all plans that are tied to a
/// transaction with tx_id.
void FinishAndClearOngoingProducePlans(tx::TransactionId tx_id);
private:
std::mutex ongoing_produces_lock_;
/// Mapping of (tx id, plan id) to OngoingProduce.
std::map<std::pair<tx::TransactionId, int64_t>, OngoingProduce>
ongoing_produces_;
database::GraphDb &db_;
communication::rpc::Server &produce_rpc_server_;
const distributed::PlanConsumer &plan_consumer_;
tx::Engine &tx_engine_;
/// Gets an ongoing produce for the given pull request. Creates a new one if
/// there is none currently existing.
OngoingProduce &GetOngoingProduce(const PullReq &req);
/// Performs a single remote pull for the given request.
PullResData Pull(const PullReq &req);
};
} // namespace distributed

View File

@ -1,376 +0,0 @@
#pragma once
#include <cstdint>
#include <functional>
#include <string>
#include "boost/serialization/utility.hpp"
#include "boost/serialization/vector.hpp"
#include "communication/rpc/messages.hpp"
#include "distributed/serialization.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "storage/address_types.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
namespace distributed {
/// The default number of results returned via RPC from remote execution to the
/// master that requested it.
constexpr int kDefaultBatchSize = 20;
/// Returnd along with a batch of results in the remote-pull RPC. Indicates the
/// state of execution on the worker.
enum class PullState {
CURSOR_EXHAUSTED,
CURSOR_IN_PROGRESS,
SERIALIZATION_ERROR,
LOCK_TIMEOUT_ERROR,
UPDATE_DELETED_ERROR,
RECONSTRUCTION_ERROR,
UNABLE_TO_DELETE_VERTEX_ERROR,
HINTED_ABORT_ERROR,
QUERY_ERROR
};
struct PullReq : public communication::rpc::Message {
PullReq() {}
PullReq(tx::TransactionId tx_id, tx::Snapshot tx_snapshot, int64_t plan_id,
const Parameters &params, std::vector<query::Symbol> symbols,
bool accumulate, int batch_size, bool send_old, bool send_new)
: tx_id(tx_id),
tx_snapshot(tx_snapshot),
plan_id(plan_id),
params(params),
symbols(symbols),
accumulate(accumulate),
batch_size(batch_size),
send_old(send_old),
send_new(send_new) {}
tx::TransactionId tx_id;
tx::Snapshot tx_snapshot;
int64_t plan_id;
Parameters params;
std::vector<query::Symbol> symbols;
bool accumulate;
int batch_size;
// Indicates which of (old, new) records of a graph element should be sent.
bool send_old;
bool send_new;
private:
friend class boost::serialization::access;
template <class TArchive>
void save(TArchive &ar, unsigned int) const {
ar << boost::serialization::base_object<communication::rpc::Message>(*this);
ar << tx_id;
ar << tx_snapshot;
ar << plan_id;
ar << params.size();
for (auto &kv : params) {
ar << kv.first;
// Params never contain a vertex/edge, so save plan TypedValue.
utils::SaveTypedValue(ar, kv.second);
}
ar << symbols;
ar << accumulate;
ar << batch_size;
ar << send_old;
ar << send_new;
}
template <class TArchive>
void load(TArchive &ar, unsigned int) {
ar >> boost::serialization::base_object<communication::rpc::Message>(*this);
ar >> tx_id;
ar >> tx_snapshot;
ar >> plan_id;
size_t params_size;
ar >> params_size;
for (size_t i = 0; i < params_size; ++i) {
int token_pos;
ar >> token_pos;
query::TypedValue param;
// Params never contain a vertex/edge, so load plan TypedValue.
utils::LoadTypedValue(ar, param);
params.Add(token_pos, param);
}
ar >> symbols;
ar >> accumulate;
ar >> batch_size;
ar >> send_old;
ar >> send_new;
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
};
/// The data returned to the end consumer (the Pull operator). Contains
/// only the relevant parts of the response, ready for use.
struct PullData {
PullState pull_state;
std::vector<std::vector<query::TypedValue>> frames;
};
/// The data of the remote pull response. Post-processing is required after
/// deserialization to initialize Vertex/Edge typed values in the frames
/// (possibly encapsulated in lists/maps) to their proper values. This requires
/// a GraphDbAccessor and therefore can't be done as part of deserialization.
///
/// TODO - make it possible to inject a &GraphDbAcessor from the Pull
/// layer
/// all the way into RPC data deserialization to remove the requirement for
/// post-processing. The current approach of holding references to parts of the
/// frame (potentially embedded in lists/maps) is too error-prone.
struct PullResData {
private:
// Temp cache for deserialized vertices and edges. These objects are created
// during deserialization. They are used immediatelly after during
// post-processing. The vertex/edge data ownership gets transfered to the
// Cache, and the `element_in_frame` reference is used to set the
// appropriate accessor to the appropriate value. Not used on side that
// generates the response.
template <typename TRecord>
struct GraphElementData {
using AddressT = storage::Address<mvcc::VersionList<TRecord>>;
using PtrT = std::unique_ptr<TRecord>;
GraphElementData(AddressT address, PtrT old_record, PtrT new_record,
query::TypedValue *element_in_frame)
: global_address(address),
old_record(std::move(old_record)),
new_record(std::move(new_record)),
element_in_frame(element_in_frame) {}
storage::Address<mvcc::VersionList<TRecord>> global_address;
std::unique_ptr<TRecord> old_record;
std::unique_ptr<TRecord> new_record;
// The position in frame is optional. This same structure is used for
// deserializing path elements, in which case the vertex/edge in question is
// not directly part of the frame.
query::TypedValue *element_in_frame;
};
// Same like `GraphElementData`, but for paths.
struct PathData {
PathData(query::TypedValue &path_in_frame) : path_in_frame(path_in_frame) {}
std::vector<GraphElementData<Vertex>> vertices;
std::vector<GraphElementData<Edge>> edges;
query::TypedValue &path_in_frame;
};
public:
PullResData() {} // Default constructor required for serialization.
PullResData(int worker_id, bool send_old, bool send_new)
: worker_id(worker_id), send_old(send_old), send_new(send_new) {}
PullResData(const PullResData &) = delete;
PullResData &operator=(const PullResData &) = delete;
PullResData(PullResData &&) = default;
PullResData &operator=(PullResData &&) = default;
PullData state_and_frames;
// Id of the worker on which the response is created, used for serializing
// vertices (converting local to global addresses).
int worker_id;
// Indicates which of (old, new) records of a graph element should be sent.
bool send_old;
bool send_new;
// Temporary caches used between deserialization and post-processing
// (transfering the ownership of this data to a Cache).
std::vector<GraphElementData<Vertex>> vertices;
std::vector<GraphElementData<Edge>> edges;
std::vector<PathData> paths;
/// Saves a typed value that is a vertex/edge/path.
template <class TArchive>
void SaveGraphElement(TArchive &ar, const query::TypedValue &value) const {
// Helper template function for storing a vertex or an edge.
auto save_element = [&ar, this](auto element_accessor) {
ar << element_accessor.GlobalAddress().raw();
// If both old and new are null, we need to reconstruct.
if (!(element_accessor.GetOld() || element_accessor.GetNew())) {
bool result = element_accessor.Reconstruct();
CHECK(result) << "Attempting to serialize an element not visible to "
"current transaction.";
}
auto *old_rec = element_accessor.GetOld();
if (send_old && old_rec) {
ar << true;
distributed::SaveElement(ar, *old_rec, worker_id);
} else {
ar << false;
}
if (send_new) {
// Must call SwitchNew as that will trigger a potentially necesary
// Reconstruct.
element_accessor.SwitchNew();
auto *new_rec = element_accessor.GetNew();
if (new_rec) {
ar << true;
distributed::SaveElement(ar, *new_rec, worker_id);
} else {
ar << false;
}
} else {
ar << false;
}
};
switch (value.type()) {
case query::TypedValue::Type::Vertex:
save_element(value.ValueVertex());
break;
case query::TypedValue::Type::Edge:
save_element(value.ValueEdge());
break;
case query::TypedValue::Type::Path: {
auto &path = value.ValuePath();
ar << path.size();
save_element(path.vertices()[0]);
for (size_t i = 0; i < path.size(); ++i) {
save_element(path.edges()[i]);
save_element(path.vertices()[i + 1]);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << value.type();
}
}
/// Loads a typed value that is a vertex/edge/path. Part of the
/// deserialization process, populates the temporary data caches which are
/// processed later.
template <class TArchive>
void LoadGraphElement(TArchive &ar, query::TypedValue::Type type,
query::TypedValue &value) {
auto load_edge = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadEdge(ar) : nullptr;
};
auto load_vertex = [](auto &ar) {
bool exists;
ar >> exists;
return exists ? LoadVertex(ar) : nullptr;
};
switch (type) {
case query::TypedValue::Type::Vertex: {
storage::VertexAddress::StorageT address;
ar >> address;
vertices.emplace_back(storage::VertexAddress(address), load_vertex(ar),
load_vertex(ar), &value);
break;
}
case query::TypedValue::Type::Edge: {
storage::VertexAddress::StorageT address;
ar >> address;
edges.emplace_back(storage::EdgeAddress(address), load_edge(ar),
load_edge(ar), &value);
break;
}
case query::TypedValue::Type::Path: {
size_t path_size;
ar >> path_size;
paths.emplace_back(value);
auto &path_data = paths.back();
storage::VertexAddress::StorageT vertex_address;
storage::EdgeAddress::StorageT edge_address;
ar >> vertex_address;
path_data.vertices.emplace_back(storage::VertexAddress(vertex_address),
load_vertex(ar), load_vertex(ar),
nullptr);
for (size_t i = 0; i < path_size; ++i) {
ar >> edge_address;
path_data.edges.emplace_back(storage::EdgeAddress(edge_address),
load_edge(ar), load_edge(ar), nullptr);
ar >> vertex_address;
path_data.vertices.emplace_back(
storage::VertexAddress(vertex_address), load_vertex(ar),
load_vertex(ar), nullptr);
}
break;
}
default:
LOG(FATAL) << "Unsupported graph element type: " << type;
}
}
};
class PullRes : public communication::rpc::Message {
public:
PullRes() {}
PullRes(PullResData data) : data(std::move(data)) {}
PullResData data;
private:
friend class boost::serialization::access;
template <class TArchive>
void save(TArchive &ar, unsigned int) const {
ar << boost::serialization::base_object<communication::rpc::Message>(*this);
ar << data.state_and_frames.pull_state;
ar << data.state_and_frames.frames.size();
// We need to indicate how many values are in each frame.
// Assume all the frames have an equal number of elements.
ar << (data.state_and_frames.frames.size() == 0
? 0
: data.state_and_frames.frames[0].size());
for (const auto &frame : data.state_and_frames.frames)
for (const auto &value : frame) {
utils::SaveTypedValue<TArchive>(
ar, value, [this](TArchive &ar, const query::TypedValue &value) {
data.SaveGraphElement(ar, value);
});
}
}
template <class TArchive>
void load(TArchive &ar, unsigned int) {
ar >> boost::serialization::base_object<communication::rpc::Message>(*this);
ar >> data.state_and_frames.pull_state;
size_t frame_count;
ar >> frame_count;
data.state_and_frames.frames.reserve(frame_count);
size_t frame_size;
ar >> frame_size;
for (size_t i = 0; i < frame_count; ++i) {
data.state_and_frames.frames.emplace_back();
auto &current_frame = data.state_and_frames.frames.back();
current_frame.reserve(frame_size);
for (size_t j = 0; j < frame_size; ++j) {
current_frame.emplace_back();
utils::LoadTypedValue<TArchive>(
ar, current_frame.back(),
[this](TArchive &ar, query::TypedValue::TypedValue::Type type,
query::TypedValue &value) {
data.LoadGraphElement(ar, type, value);
});
}
}
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
};
using PullRpc = communication::rpc::RequestResponse<PullReq, PullRes>;
// TODO make a separate RPC for the continuation of an existing pull, as an
// optimization not to have to send the full PullReqData pack every
// time.
RPC_SINGLE_MEMBER_MESSAGE(TransactionCommandAdvancedReq, tx::TransactionId);
RPC_NO_MEMBER_MESSAGE(TransactionCommandAdvancedRes);
using TransactionCommandAdvancedRpc =
communication::rpc::RequestResponse<TransactionCommandAdvancedReq,
TransactionCommandAdvancedRes>;
} // namespace distributed

View File

@ -1,72 +0,0 @@
#include <functional>
#include "distributed/data_manager.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
namespace distributed {
utils::Future<PullData> PullRpcClients::Pull(
database::GraphDbAccessor &dba, int worker_id, int64_t plan_id,
const Parameters &params, const std::vector<query::Symbol> &symbols,
bool accumulate, int batch_size) {
return clients_.ExecuteOnWorker<PullData>(
worker_id, [&dba, plan_id, params, symbols, accumulate,
batch_size](ClientPool &client_pool) {
auto result = client_pool.Call<PullRpc>(
dba.transaction_id(), dba.transaction().snapshot(), plan_id, params,
symbols, accumulate, batch_size, true, true);
auto handle_vertex = [&dba](auto &v) {
dba.db()
.data_manager()
.Elements<Vertex>(dba.transaction_id())
.emplace(v.global_address.gid(), std::move(v.old_record),
std::move(v.new_record));
if (v.element_in_frame) {
VertexAccessor va(v.global_address, dba);
*v.element_in_frame = va;
}
};
auto handle_edge = [&dba](auto &e) {
dba.db()
.data_manager()
.Elements<Edge>(dba.transaction_id())
.emplace(e.global_address.gid(), std::move(e.old_record),
std::move(e.new_record));
if (e.element_in_frame) {
EdgeAccessor ea(e.global_address, dba);
*e.element_in_frame = ea;
}
};
for (auto &v : result->data.vertices) handle_vertex(v);
for (auto &e : result->data.edges) handle_edge(e);
for (auto &p : result->data.paths) {
handle_vertex(p.vertices[0]);
p.path_in_frame =
query::Path(VertexAccessor(p.vertices[0].global_address, dba));
query::Path &path_in_frame = p.path_in_frame.ValuePath();
for (size_t i = 0; i < p.edges.size(); ++i) {
handle_edge(p.edges[i]);
path_in_frame.Expand(EdgeAccessor(p.edges[i].global_address, dba));
handle_vertex(p.vertices[i + 1]);
path_in_frame.Expand(
VertexAccessor(p.vertices[i + 1].global_address, dba));
}
}
return std::move(result->data.state_and_frames);
});
}
std::vector<utils::Future<void>>
PullRpcClients::NotifyAllTransactionCommandAdvanced(
tx::TransactionId tx_id) {
return clients_.ExecuteOnWorkers<void>(0, [tx_id](auto &client) {
auto res = client.template Call<TransactionCommandAdvancedRpc>(tx_id);
CHECK(res) << "TransactionCommandAdvanceRpc failed";
});
}
} // namespace distributed

View File

@ -1,47 +0,0 @@
#pragma once
#include <vector>
#include "database/graph_db_accessor.hpp"
#include "distributed/pull_produce_rpc_messages.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "query/frontend/semantic/symbol.hpp"
#include "query/parameters.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Provides means of calling for the execution of a plan on some remote worker,
/// and getting the results of that execution. The results are returned in
/// batches and are therefore accompanied with an enum indicator of the state of
/// remote execution.
class PullRpcClients {
using ClientPool = communication::rpc::ClientPool;
public:
PullRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
/// Calls a remote pull asynchroniously. IMPORTANT: take care not to call this
/// function for the same (tx_id, worker_id, plan_id) before the previous call
/// has ended.
///
/// @todo: it might be cleaner to split Pull into {InitRemoteCursor,
/// Pull, RemoteAccumulate}, but that's a lot of refactoring and more
/// RPC calls.
utils::Future<PullData> Pull(database::GraphDbAccessor &dba, int worker_id,
int64_t plan_id, const Parameters &params,
const std::vector<query::Symbol> &symbols,
bool accumulate,
int batch_size = kDefaultBatchSize);
auto GetWorkerIds() { return clients_.GetWorkerIds(); }
std::vector<utils::Future<void>> NotifyAllTransactionCommandAdvanced(
tx::TransactionId tx_id);
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,133 +0,0 @@
#pragma once
#include <functional>
#include <type_traits>
#include <unordered_map>
#include "communication/rpc/client_pool.hpp"
#include "distributed/coordination.hpp"
#include "distributed/index_rpc_messages.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "storage/types.hpp"
#include "transactions/transaction.hpp"
#include "threading/thread_pool.hpp"
#include "utils/future.hpp"
namespace distributed {
/** A cache of RPC clients (of the given name/kind) per MG distributed worker.
* Thread safe. */
class RpcWorkerClients {
public:
RpcWorkerClients(Coordination &coordination)
: coordination_(coordination),
thread_pool_(std::thread::hardware_concurrency()) {}
RpcWorkerClients(const RpcWorkerClients &) = delete;
RpcWorkerClients(RpcWorkerClients &&) = delete;
RpcWorkerClients &operator=(const RpcWorkerClients &) = delete;
RpcWorkerClients &operator=(RpcWorkerClients &&) = delete;
auto &GetClientPool(int worker_id) {
std::lock_guard<std::mutex> guard{lock_};
auto found = client_pools_.find(worker_id);
if (found != client_pools_.end()) return found->second;
return client_pools_
.emplace(std::piecewise_construct, std::forward_as_tuple(worker_id),
std::forward_as_tuple(coordination_.GetEndpoint(worker_id)))
.first->second;
}
auto GetWorkerIds() { return coordination_.GetWorkerIds(); }
/** Asynchroniously executes the given function on the rpc client for the
* given worker id. Returns an `utils::Future` of the given `execute`
* function's
* return type. */
template <typename TResult>
auto ExecuteOnWorker(
int worker_id,
std::function<TResult(communication::rpc::ClientPool &)> execute) {
auto &client_pool = GetClientPool(worker_id);
return thread_pool_.Run(execute, std::ref(client_pool));
}
/** Asynchroniously executes the `execute` function on all worker rpc clients
* except the one whose id is `skip_worker_id`. Returns a vectore of futures
* contaning the results of the `execute` function. */
template <typename TResult>
auto ExecuteOnWorkers(
int skip_worker_id,
std::function<TResult(communication::rpc::ClientPool &)> execute) {
std::vector<utils::Future<TResult>> futures;
for (auto &worker_id : coordination_.GetWorkerIds()) {
if (worker_id == skip_worker_id) continue;
futures.emplace_back(std::move(ExecuteOnWorker(worker_id, execute)));
}
return futures;
}
private:
// TODO make Coordination const, it's member GetEndpoint must be const too.
Coordination &coordination_;
std::unordered_map<int, communication::rpc::ClientPool> client_pools_;
std::mutex lock_;
threading::ThreadPool thread_pool_;
};
/** Wrapper class around a RPC call to build indices.
*/
class IndexRpcClients {
public:
IndexRpcClients(RpcWorkerClients &clients) : clients_(clients) {}
auto GetBuildIndexFutures(const storage::Label &label,
const storage::Property &property,
tx::TransactionId transaction_id,
int worker_id) {
return clients_.ExecuteOnWorkers<bool>(
worker_id, [label, property, transaction_id](
communication::rpc::ClientPool &client_pool) {
return client_pool.Call<BuildIndexRpc>(
distributed::IndexLabelPropertyTx{
label, property, transaction_id}) != nullptr;
});
}
private:
RpcWorkerClients &clients_;
};
/** Join ongoing produces on all workers.
*
* Sends a RPC request to all workers when a transaction is ending, notifying
* them to end all ongoing produces tied to that transaction.
*/
class OngoingProduceJoinerRpcClients {
public:
OngoingProduceJoinerRpcClients(RpcWorkerClients &clients)
: clients_(clients) {}
void JoinOngoingProduces(tx::TransactionId tx_id) {
auto futures = clients_.ExecuteOnWorkers<void>(
0, [tx_id](communication::rpc::ClientPool &client_pool) {
auto result =
client_pool.Call<distributed::WaitOnTransactionEndRpc>(tx_id);
CHECK(result)
<< "[WaitOnTransactionEndRpc] failed to notify that transaction "
<< tx_id << " ended";
});
// We need to wait for all workers to destroy pending futures to avoid using
// already destroyed (released) transaction objects.
for (auto &future : futures) {
future.wait();
}
}
private:
RpcWorkerClients &clients_;
};
} // namespace distributed

View File

@ -1,183 +0,0 @@
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "storage/address_types.hpp"
#include "storage/edge.hpp"
#include "storage/types.hpp"
#include "storage/vertex.hpp"
#include "utils/serialization.hpp"
namespace distributed {
namespace impl {
// Saves the given address into the given archive. Converts a local address to a
// global one, using the given worker_id.
template <typename TArchive, typename TAddress>
void SaveAddress(TArchive &ar, TAddress address, int worker_id) {
if (address.is_local()) {
ar << address.local()->gid_;
ar << worker_id;
} else {
ar << address.gid();
ar << address.worker_id();
}
};
// Saves the given properties into the given archive.
template <typename TArchive>
void SaveProperties(TArchive &ar, const PropertyValueStore &props) {
ar << props.size();
for (auto &kv : props) {
ar << kv.first.storage();
utils::SaveTypedValue(ar, kv.second);
}
}
} // namespace impl
/**
* Saves the given vertex into the given Boost archive.
*
* @param ar - Archive into which to serialize.
* @param vertex - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveVertex(TArchive &ar, const Vertex &vertex, int worker_id) {
auto save_edges = [&ar, worker_id](auto &edges) {
ar << edges.size();
for (auto &edge_struct : edges) {
impl::SaveAddress(ar, edge_struct.vertex, worker_id);
impl::SaveAddress(ar, edge_struct.edge, worker_id);
ar << edge_struct.edge_type.storage();
}
};
save_edges(vertex.out_);
save_edges(vertex.in_);
ar << vertex.labels_.size();
for (auto &label : vertex.labels_) {
ar << label.storage();
}
impl::SaveProperties(ar, vertex.properties_);
}
/**
* Saves the given edge into the given Boost archive.
*
* @param - Archive into which to serialize.
* @param edge - Getting serialized.
* @param worker_id - ID of the worker this is happening on. Necessary for local
* to global address conversion.
* @tparam TArchive - type of archive.
*/
template <typename TArchive>
void SaveEdge(TArchive &ar, const Edge &edge, int worker_id) {
impl::SaveAddress(ar, edge.from_, worker_id);
impl::SaveAddress(ar, edge.to_, worker_id);
ar << edge.edge_type_.storage();
impl::SaveProperties(ar, edge.properties_);
}
/// Alias for `SaveEdge` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Edge &record, int worker_id) {
return SaveEdge(ar, record, worker_id);
}
/// Alias for `SaveVertex` allowing for param type resolution.
template <typename TArchive>
void SaveElement(TArchive &ar, const Vertex &record, int worker_id) {
return SaveVertex(ar, record, worker_id);
}
namespace impl {
template <typename TArchive>
storage::VertexAddress LoadVertexAddress(TArchive &ar) {
gid::Gid vertex_id;
ar >> vertex_id;
int worker_id;
ar >> worker_id;
return {vertex_id, worker_id};
}
template <typename TArchive>
void LoadProperties(TArchive &ar, PropertyValueStore &store) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Property::StorageT prop;
ar >> prop;
query::TypedValue value;
utils::LoadTypedValue(ar, value);
store.set(storage::Property(prop), static_cast<PropertyValue>(value));
}
}
} // namespace impl
/**
* Loads a Vertex from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Vertex> LoadVertex(TArchive &ar) {
auto vertex = std::make_unique<Vertex>();
auto decode_edges = [&ar](Edges &edges) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
auto vertex_address = impl::LoadVertexAddress(ar);
storage::EdgeType::StorageT edge_type;
gid::Gid edge_id;
ar >> edge_id;
int edge_worker_id;
ar >> edge_worker_id;
ar >> edge_type;
edges.emplace(vertex_address, {edge_id, edge_worker_id},
storage::EdgeType(edge_type));
}
};
decode_edges(vertex->out_);
decode_edges(vertex->in_);
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Label::StorageT label;
ar >> label;
vertex->labels_.emplace_back(label);
}
impl::LoadProperties(ar, vertex->properties_);
return vertex;
}
/**
* Loads an Edge from the given archive and returns it.
*
* @param ar - The archive to load from.
* @tparam TArchive - archive type.
*/
template <typename TArchive>
std::unique_ptr<Edge> LoadEdge(TArchive &ar) {
auto from = impl::LoadVertexAddress(ar);
auto to = impl::LoadVertexAddress(ar);
storage::EdgeType::StorageT edge_type;
ar >> edge_type;
auto edge = std::make_unique<Edge>(from, to, storage::EdgeType{edge_type});
impl::LoadProperties(ar, edge->properties_);
return edge;
}
} // namespace distributed

View File

@ -1,39 +0,0 @@
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "communication/rpc/messages.hpp"
#include "io/network/endpoint.hpp"
#include "transactions/transaction.hpp"
namespace distributed {
using communication::rpc::Message;
using Endpoint = io::network::Endpoint;
struct GcClearedStatusReq : public Message {
GcClearedStatusReq() {}
GcClearedStatusReq(tx::TransactionId local_oldest_active, int worker_id)
: local_oldest_active(local_oldest_active), worker_id(worker_id) {}
tx::TransactionId local_oldest_active;
int worker_id;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<Message>(*this);
ar &local_oldest_active;
ar &worker_id;
}
};
RPC_NO_MEMBER_MESSAGE(GcClearedStatusRes);
using RanLocalGcRpc =
communication::rpc::RequestResponse<GcClearedStatusReq, GcClearedStatusRes>;
} // namespace distributed

View File

@ -1,87 +0,0 @@
#pragma once
#include <functional>
#include <vector>
#include "communication/rpc/server.hpp"
#include "distributed/produce_rpc_server.hpp"
#include "distributed/transactional_cache_cleaner_rpc_messages.hpp"
#include "transactions/engine.hpp"
#include "transactions/engine_worker.hpp"
#include "utils/scheduler.hpp"
namespace distributed {
/// Periodically calls `ClearTransactionalCache(oldest_transaction)` on all
/// registered objects.
class TransactionalCacheCleaner {
/// The wait time between two releases of local transaction objects that have
/// expired on the master.
static constexpr std::chrono::seconds kCacheReleasePeriod{1};
public:
template <typename... T>
TransactionalCacheCleaner(tx::Engine &tx_engine, T &... caches)
: tx_engine_(tx_engine) {
Register(caches...);
cache_clearing_scheduler_.Run(
"DistrTxCacheGc", kCacheReleasePeriod,
[this]() { this->Clear(tx_engine_.GlobalGcSnapshot().back()); });
}
protected:
/// Registers the given object for transactional cleaning. The object will
/// periodically get it's `ClearCache(tx::TransactionId)` method called
/// with the oldest active transaction id. Note that the ONLY guarantee for
/// the call param is that there are no transactions alive that have an id
/// lower than it.
template <typename TCache>
void Register(TCache &cache) {
functions_.emplace_back([&cache](tx::TransactionId oldest_active) {
cache.ClearTransactionalCache(oldest_active);
});
}
private:
template <typename TCache, typename... T>
void Register(TCache &cache, T &... caches) {
Register(cache);
Register(caches...);
}
void Clear(tx::TransactionId oldest_active) {
for (auto &f : functions_) f(oldest_active);
}
tx::Engine &tx_engine_;
std::vector<std::function<void(tx::TransactionId &oldest_active)>> functions_;
utils::Scheduler cache_clearing_scheduler_;
};
/// Registers a RPC server that listens for `WaitOnTransactionEnd` requests
/// that require all ongoing produces to finish. It also periodically calls
/// `ClearTransactionalCache` on all registered objects.
class WorkerTransactionalCacheCleaner : public TransactionalCacheCleaner {
public:
template <class... T>
WorkerTransactionalCacheCleaner(tx::WorkerEngine &tx_engine,
communication::rpc::Server &server,
ProduceRpcServer &produce_server,
T &... caches)
: TransactionalCacheCleaner(tx_engine, caches...),
rpc_server_(server),
produce_server_(produce_server) {
Register(tx_engine);
rpc_server_.Register<WaitOnTransactionEndRpc>(
[this](const WaitOnTransactionEndReq &req) {
produce_server_.FinishAndClearOngoingProducePlans(req.member);
return std::make_unique<WaitOnTransactionEndRes>();
});
}
private:
communication::rpc::Server &rpc_server_;
ProduceRpcServer &produce_server_;
};
} // namespace distributed

View File

@ -1,13 +0,0 @@
#pragma once
#include "communication/rpc/messages.hpp"
#include "transactions/type.hpp"
namespace distributed {
RPC_SINGLE_MEMBER_MESSAGE(WaitOnTransactionEndReq, tx::TransactionId);
RPC_NO_MEMBER_MESSAGE(WaitOnTransactionEndRes);
using WaitOnTransactionEndRpc =
communication::rpc::RequestResponse<WaitOnTransactionEndReq,
WaitOnTransactionEndRes>;
};

View File

@ -1,116 +0,0 @@
#include <unordered_map>
#include <vector>
#include "distributed/updates_rpc_clients.hpp"
#include "query/exceptions.hpp"
namespace distributed {
namespace {
void RaiseIfRemoteError(UpdateResult result) {
switch (result) {
case UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
throw query::RemoveAttachedVertexException();
case UpdateResult::SERIALIZATION_ERROR:
throw mvcc::SerializationError();
case UpdateResult::LOCK_TIMEOUT_ERROR:
throw LockTimeoutException(
"Remote LockTimeoutError during edge creation");
case UpdateResult::UPDATE_DELETED_ERROR:
throw RecordDeletedError();
case UpdateResult::DONE:
break;
}
}
}
UpdateResult UpdatesRpcClients::Update(int worker_id,
const database::StateDelta &delta) {
auto res = worker_clients_.GetClientPool(worker_id).Call<UpdateRpc>(delta);
CHECK(res) << "UpdateRpc failed on worker: " << worker_id;
return res->member;
}
gid::Gid UpdatesRpcClients::CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto res = worker_clients_.GetClientPool(worker_id).Call<CreateVertexRpc>(
CreateVertexReqData{tx_id, labels, properties});
CHECK(res) << "CreateVertexRpc failed on worker: " << worker_id;
CHECK(res->member.result == UpdateResult::DONE)
<< "Remote Vertex creation result not UpdateResult::DONE";
return res->member.gid;
}
storage::EdgeAddress UpdatesRpcClients::CreateEdge(
tx::TransactionId tx_id, VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(from.address().is_remote()) << "In CreateEdge `from` must be remote";
int from_worker = from.address().worker_id();
auto res = worker_clients_.GetClientPool(from_worker)
.Call<CreateEdgeRpc>(CreateEdgeReqData{
from.gid(), to.GlobalAddress(), edge_type, tx_id});
CHECK(res) << "CreateEdge RPC failed on worker: " << from_worker;
RaiseIfRemoteError(res->member.result);
return {res->member.gid, from_worker};
}
void UpdatesRpcClients::AddInEdge(tx::TransactionId tx_id,
VertexAccessor &from,
storage::EdgeAddress edge_address,
VertexAccessor &to,
storage::EdgeType edge_type) {
CHECK(to.address().is_remote() && edge_address.is_remote() &&
(from.GlobalAddress().worker_id() != to.address().worker_id()))
<< "AddInEdge should only be called when `to` is remote and "
"`from` is not on the same worker as `to`.";
auto worker_id = to.GlobalAddress().worker_id();
auto res = worker_clients_.GetClientPool(worker_id).Call<AddInEdgeRpc>(
AddInEdgeReqData{from.GlobalAddress(), edge_address, to.gid(), edge_type,
tx_id});
CHECK(res) << "AddInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveVertex(int worker_id, tx::TransactionId tx_id,
gid::Gid gid, bool check_empty) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveVertexRpc>(
RemoveVertexReqData{gid, tx_id, check_empty});
CHECK(res) << "RemoveVertex RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid edge_gid, gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr) {
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveEdgeRpc>(
RemoveEdgeData{tx_id, edge_gid, vertex_from_id, vertex_to_addr});
CHECK(res) << "RemoveEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
void UpdatesRpcClients::RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id,
storage::EdgeAddress edge_address) {
CHECK(edge_address.is_remote()) << "RemoveInEdge edge_address is local.";
auto res = worker_clients_.GetClientPool(worker_id).Call<RemoveInEdgeRpc>(
RemoveInEdgeData{tx_id, vertex_id, edge_address});
CHECK(res) << "RemoveInEdge RPC failed on worker: " << worker_id;
RaiseIfRemoteError(res->member);
}
std::vector<utils::Future<UpdateResult>> UpdatesRpcClients::UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id) {
return worker_clients_.ExecuteOnWorkers<UpdateResult>(
skip_worker_id, [tx_id](auto &client) {
auto res = client.template Call<UpdateApplyRpc>(tx_id);
CHECK(res) << "UpdateApplyRpc failed";
return res->member;
});
}
} // namespace distributed

View File

@ -1,76 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "database/state_delta.hpp"
#include "distributed/rpc_worker_clients.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "transactions/type.hpp"
#include "utils/future.hpp"
namespace distributed {
/// Exposes the functionality to send updates to other workers (that own the
/// graph element we are updating). Also enables us to call for a worker to
/// apply the accumulated deferred updates, or discard them.
class UpdatesRpcClients {
public:
explicit UpdatesRpcClients(RpcWorkerClients &clients)
: worker_clients_(clients) {}
/// Sends an update delta to the given worker.
UpdateResult Update(int worker_id, const database::StateDelta &delta);
/// Creates a vertex on the given worker and returns it's id.
gid::Gid CreateVertex(
int worker_id, tx::TransactionId tx_id,
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates an edge on the given worker and returns it's address. If the `to`
/// vertex is on the same worker as `from`, then all remote CRUD will be
/// handled by a call to this function. Otherwise a separate call to
/// `AddInEdge` might be necessary. Throws all the exceptions that can
/// occur remotely as a result of updating a vertex.
storage::EdgeAddress CreateEdge(tx::TransactionId tx_id,
VertexAccessor &from, VertexAccessor &to,
storage::EdgeType edge_type);
/// Adds the edge with the given address to the `to` vertex as an incoming
/// edge. Only used when `to` is remote and not on the same worker as `from`.
void AddInEdge(tx::TransactionId tx_id, VertexAccessor &from,
storage::EdgeAddress edge_address, VertexAccessor &to,
storage::EdgeType edge_type);
/// Removes a vertex from the other worker.
void RemoveVertex(int worker_id, tx::TransactionId tx_id, gid::Gid gid,
bool check_empty);
/// Removes an edge on another worker. This also handles the `from` vertex
/// outgoing edge, as that vertex is on the same worker as the edge. If the
/// `to` vertex is on the same worker, then that side is handled too by the
/// single RPC call, otherwise a separate call has to be made to
/// RemoveInEdge.
void RemoveEdge(tx::TransactionId tx_id, int worker_id, gid::Gid edge_gid,
gid::Gid vertex_from_id,
storage::VertexAddress vertex_to_addr);
void RemoveInEdge(tx::TransactionId tx_id, int worker_id,
gid::Gid vertex_id, storage::EdgeAddress edge_address);
/// Calls for all the workers (except the given one) to apply their updates
/// and returns the future results.
std::vector<utils::Future<UpdateResult>> UpdateApplyAll(
int skip_worker_id, tx::TransactionId tx_id);
private:
RpcWorkerClients &worker_clients_;
};
} // namespace distributed

View File

@ -1,203 +0,0 @@
#pragma once
#include <unordered_map>
#include "boost/serialization/vector.hpp"
#include "communication/rpc/messages.hpp"
#include "database/state_delta.hpp"
#include "storage/address_types.hpp"
#include "storage/gid.hpp"
#include "transactions/type.hpp"
#include "utils/serialization.hpp"
namespace distributed {
/// The result of sending or applying a deferred update to a worker.
enum class UpdateResult {
DONE,
SERIALIZATION_ERROR,
LOCK_TIMEOUT_ERROR,
UPDATE_DELETED_ERROR,
UNABLE_TO_DELETE_VERTEX_ERROR
};
RPC_SINGLE_MEMBER_MESSAGE(UpdateReq, database::StateDelta);
RPC_SINGLE_MEMBER_MESSAGE(UpdateRes, UpdateResult);
using UpdateRpc = communication::rpc::RequestResponse<UpdateReq, UpdateRes>;
RPC_SINGLE_MEMBER_MESSAGE(UpdateApplyReq, tx::TransactionId);
RPC_SINGLE_MEMBER_MESSAGE(UpdateApplyRes, UpdateResult);
using UpdateApplyRpc =
communication::rpc::RequestResponse<UpdateApplyReq, UpdateApplyRes>;
struct CreateResult {
UpdateResult result;
// Only valid if creation was successful.
gid::Gid gid;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &result;
ar &gid;
}
};
struct CreateVertexReqData {
tx::TransactionId tx_id;
std::vector<storage::Label> labels;
std::unordered_map<storage::Property, query::TypedValue> properties;
private:
friend class boost::serialization::access;
template <class TArchive>
void save(TArchive &ar, unsigned int) const {
ar << tx_id;
ar << labels;
ar << properties.size();
for (auto &kv : properties) {
ar << kv.first;
utils::SaveTypedValue(ar, kv.second);
}
}
template <class TArchive>
void load(TArchive &ar, unsigned int) {
ar >> tx_id;
ar >> labels;
size_t props_size;
ar >> props_size;
for (size_t i = 0; i < props_size; ++i) {
storage::Property p;
ar >> p;
query::TypedValue tv;
utils::LoadTypedValue(ar, tv);
properties.emplace(p, std::move(tv));
}
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
};
RPC_SINGLE_MEMBER_MESSAGE(CreateVertexReq, CreateVertexReqData);
RPC_SINGLE_MEMBER_MESSAGE(CreateVertexRes, CreateResult);
using CreateVertexRpc =
communication::rpc::RequestResponse<CreateVertexReq, CreateVertexRes>;
struct CreateEdgeReqData {
gid::Gid from;
storage::VertexAddress to;
storage::EdgeType edge_type;
tx::TransactionId tx_id;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &from;
ar &to;
ar &edge_type;
ar &tx_id;
}
};
RPC_SINGLE_MEMBER_MESSAGE(CreateEdgeReq, CreateEdgeReqData);
RPC_SINGLE_MEMBER_MESSAGE(CreateEdgeRes, CreateResult);
using CreateEdgeRpc =
communication::rpc::RequestResponse<CreateEdgeReq, CreateEdgeRes>;
struct AddInEdgeReqData {
storage::VertexAddress from;
storage::EdgeAddress edge_address;
gid::Gid to;
storage::EdgeType edge_type;
tx::TransactionId tx_id;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &from;
ar &edge_address;
ar &to;
ar &edge_type;
ar &tx_id;
}
};
RPC_SINGLE_MEMBER_MESSAGE(AddInEdgeReq, AddInEdgeReqData);
RPC_SINGLE_MEMBER_MESSAGE(AddInEdgeRes, UpdateResult);
using AddInEdgeRpc =
communication::rpc::RequestResponse<AddInEdgeReq, AddInEdgeRes>;
struct RemoveVertexReqData {
gid::Gid gid;
tx::TransactionId tx_id;
bool check_empty;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &gid;
ar &tx_id;
ar &check_empty;
}
};
RPC_SINGLE_MEMBER_MESSAGE(RemoveVertexReq, RemoveVertexReqData);
RPC_SINGLE_MEMBER_MESSAGE(RemoveVertexRes, UpdateResult);
using RemoveVertexRpc =
communication::rpc::RequestResponse<RemoveVertexReq, RemoveVertexRes>;
struct RemoveEdgeData {
tx::TransactionId tx_id;
gid::Gid edge_id;
gid::Gid vertex_from_id;
storage::VertexAddress vertex_to_address;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &tx_id;
ar &edge_id;
ar &vertex_from_id;
ar &vertex_to_address;
}
};
RPC_SINGLE_MEMBER_MESSAGE(RemoveEdgeReq, RemoveEdgeData);
RPC_SINGLE_MEMBER_MESSAGE(RemoveEdgeRes, UpdateResult);
using RemoveEdgeRpc =
communication::rpc::RequestResponse<RemoveEdgeReq, RemoveEdgeRes>;
struct RemoveInEdgeData {
tx::TransactionId tx_id;
gid::Gid vertex;
storage::EdgeAddress edge_address;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &tx_id;
ar &vertex;
ar &edge_address;
}
};
RPC_SINGLE_MEMBER_MESSAGE(RemoveInEdgeReq, RemoveInEdgeData);
RPC_SINGLE_MEMBER_MESSAGE(RemoveInEdgeRes, UpdateResult);
using RemoveInEdgeRpc =
communication::rpc::RequestResponse<RemoveInEdgeReq, RemoveInEdgeRes>;
} // namespace distributed

View File

@ -1,349 +0,0 @@
#include <utility>
#include "glog/logging.h"
#include "distributed/updates_rpc_server.hpp"
#include "threading/sync/lock_timeout_exception.hpp"
namespace distributed {
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Emplace(
const database::StateDelta &delta) {
auto gid = std::is_same<TRecordAccessor, VertexAccessor>::value
? delta.vertex_id
: delta.edge_id;
std::lock_guard<SpinLock> guard{lock_};
auto found = deltas_.find(gid);
if (found == deltas_.end()) {
found =
deltas_
.emplace(gid, std::make_pair(FindAccessor(gid),
std::vector<database::StateDelta>{}))
.first;
}
found->second.second.emplace_back(delta);
// TODO call `RecordAccessor::update` to force serialization errors to
// fail-fast (as opposed to when all the deltas get applied).
//
// This is problematic because `VersionList::update` needs to become
// thread-safe within the same transaction. Note that the concurrency is
// possible both between the owner worker interpretation thread and an RPC
// thread (current thread), as well as multiple RPC threads if this
// object's lock is released (perhaps desirable).
//
// A potential solution *might* be that `LockStore::Lock` returns a `bool`
// indicating if the caller was the one obtaining the lock (not the same
// as lock already being held by the same transaction).
//
// Another thing that needs to be done (if we do this) is ensuring that
// `LockStore::Take` is thread-safe when called in parallel in the same
// transaction. Currently it's thread-safe only when called in parallel
// from different transactions (only one manages to take the RecordLock).
//
// Deferring the implementation of this as it's tricky, and essentially an
// optimization.
//
// try {
// found->second.first.update();
// } catch (const mvcc::SerializationError &) {
// return UpdateResult::SERIALIZATION_ERROR;
// } catch (const RecordDeletedError &) {
// return UpdateResult::UPDATE_DELETED_ERROR;
// } catch (const LockTimeoutException &) {
// return UpdateResult::LOCK_TIMEOUT_ERROR;
// }
return UpdateResult::DONE;
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties) {
auto result = db_accessor_.InsertVertex();
for (auto &label : labels) result.add_label(label);
for (auto &kv : properties) result.PropsSet(kv.first, kv.second);
std::lock_guard<SpinLock> guard{lock_};
deltas_.emplace(result.gid(),
std::make_pair(result, std::vector<database::StateDelta>{}));
return result.gid();
}
template <typename TRecordAccessor>
gid::Gid UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateEdge(
gid::Gid from, storage::VertexAddress to, storage::EdgeType edge_type) {
auto &db = db_accessor_.db();
auto edge = db_accessor_.InsertOnlyEdge(
{from, db.WorkerId()}, db.storage().LocalizedAddressIfPossible(to),
edge_type);
std::lock_guard<SpinLock> guard{lock_};
deltas_.emplace(edge.gid(),
std::make_pair(edge, std::vector<database::StateDelta>{}));
return edge.gid();
}
template <typename TRecordAccessor>
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Apply() {
std::lock_guard<SpinLock> guard{lock_};
for (auto &kv : deltas_) {
auto &record_accessor = kv.second.first;
// We need to reconstruct the record as in the meantime some local
// update might have updated it.
record_accessor.Reconstruct();
for (database::StateDelta &delta : kv.second.second) {
try {
auto &dba = db_accessor_;
switch (delta.type) {
case database::StateDelta::Type::TRANSACTION_BEGIN:
case database::StateDelta::Type::TRANSACTION_COMMIT:
case database::StateDelta::Type::TRANSACTION_ABORT:
case database::StateDelta::Type::CREATE_VERTEX:
case database::StateDelta::Type::CREATE_EDGE:
case database::StateDelta::Type::BUILD_INDEX:
LOG(FATAL) << "Can only apply record update deltas for remote "
"graph element";
case database::StateDelta::Type::REMOVE_VERTEX:
if (!db_accessor().RemoveVertex(
reinterpret_cast<VertexAccessor &>(record_accessor),
delta.check_empty)) {
return UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR;
}
break;
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
case database::StateDelta::Type::SET_PROPERTY_EDGE:
record_accessor.PropsSet(delta.property, delta.value);
break;
case database::StateDelta::Type::ADD_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.add_label(delta.label);
break;
case database::StateDelta::Type::REMOVE_LABEL:
reinterpret_cast<VertexAccessor &>(record_accessor)
.remove_label(delta.label);
break;
case database::StateDelta::Type::ADD_OUT_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.out_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_to_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::ADD_IN_EDGE:
reinterpret_cast<Vertex &>(record_accessor.update())
.in_.emplace(dba.db().storage().LocalizedAddressIfPossible(
delta.vertex_from_address),
dba.db().storage().LocalizedAddressIfPossible(
delta.edge_address),
delta.edge_type);
dba.wal().Emplace(delta);
break;
case database::StateDelta::Type::REMOVE_EDGE:
// We only remove the edge as a result of this StateDelta,
// because the removal of edge from vertex in/out is performed
// in REMOVE_[IN/OUT]_EDGE deltas.
db_accessor_.RemoveEdge(
reinterpret_cast<EdgeAccessor &>(record_accessor), false,
false);
break;
case database::StateDelta::Type::REMOVE_OUT_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveOutEdge(delta.edge_address);
break;
case database::StateDelta::Type::REMOVE_IN_EDGE:
reinterpret_cast<VertexAccessor &>(record_accessor)
.RemoveInEdge(delta.edge_address);
break;
}
} catch (const mvcc::SerializationError &) {
return UpdateResult::SERIALIZATION_ERROR;
} catch (const RecordDeletedError &) {
return UpdateResult::UPDATE_DELETED_ERROR;
} catch (const LockTimeoutException &) {
return UpdateResult::LOCK_TIMEOUT_ERROR;
}
}
}
return UpdateResult::DONE;
}
UpdatesRpcServer::UpdatesRpcServer(database::GraphDb &db,
communication::rpc::Server &server)
: db_(db) {
server.Register<UpdateRpc>([this](const UpdateReq &req) {
using DeltaType = database::StateDelta::Type;
auto &delta = req.member;
switch (delta.type) {
case DeltaType::SET_PROPERTY_VERTEX:
case DeltaType::ADD_LABEL:
case DeltaType::REMOVE_LABEL:
case database::StateDelta::Type::REMOVE_OUT_EDGE:
case database::StateDelta::Type::REMOVE_IN_EDGE:
return std::make_unique<UpdateRes>(
GetUpdates(vertex_updates_, delta.transaction_id).Emplace(delta));
case DeltaType::SET_PROPERTY_EDGE:
return std::make_unique<UpdateRes>(
GetUpdates(edge_updates_, delta.transaction_id).Emplace(delta));
default:
LOG(FATAL) << "Can't perform a remote update with delta type: "
<< static_cast<int>(req.member.type);
}
});
server.Register<UpdateApplyRpc>([this](const UpdateApplyReq &req) {
return std::make_unique<UpdateApplyRes>(Apply(req.member));
});
server.Register<CreateVertexRpc>([this](const CreateVertexReq &req) {
gid::Gid gid = GetUpdates(vertex_updates_, req.member.tx_id)
.CreateVertex(req.member.labels, req.member.properties);
return std::make_unique<CreateVertexRes>(
CreateResult{UpdateResult::DONE, gid});
});
server.Register<CreateEdgeRpc>([this](const CreateEdgeReq &req) {
auto data = req.member;
auto creation_result = CreateEdge(data);
// If `from` and `to` are both on this worker, we handle it in this
// RPC call. Do it only if CreateEdge succeeded.
if (creation_result.result == UpdateResult::DONE &&
data.to.worker_id() == db_.WorkerId()) {
auto to_delta = database::StateDelta::AddInEdge(
data.tx_id, data.to.gid(), {data.from, db_.WorkerId()},
{creation_result.gid, db_.WorkerId()}, data.edge_type);
creation_result.result =
GetUpdates(vertex_updates_, data.tx_id).Emplace(to_delta);
}
return std::make_unique<CreateEdgeRes>(creation_result);
});
server.Register<AddInEdgeRpc>([this](const AddInEdgeReq &req) {
auto to_delta = database::StateDelta::AddInEdge(
req.member.tx_id, req.member.to, req.member.from,
req.member.edge_address, req.member.edge_type);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
return std::make_unique<AddInEdgeRes>(result);
});
server.Register<RemoveVertexRpc>([this](const RemoveVertexReq &req) {
auto to_delta = database::StateDelta::RemoveVertex(
req.member.tx_id, req.member.gid, req.member.check_empty);
auto result =
GetUpdates(vertex_updates_, req.member.tx_id).Emplace(to_delta);
return std::make_unique<RemoveVertexRes>(result);
});
server.Register<RemoveEdgeRpc>([this](const RemoveEdgeReq &req) {
return std::make_unique<RemoveEdgeRes>(RemoveEdge(req.member));
});
server.Register<RemoveInEdgeRpc>([this](const RemoveInEdgeReq &req) {
auto data = req.member;
return std::make_unique<RemoveInEdgeRes>(
GetUpdates(vertex_updates_, data.tx_id)
.Emplace(database::StateDelta::RemoveInEdge(data.tx_id, data.vertex,
data.edge_address)));
});
}
UpdateResult UpdatesRpcServer::Apply(tx::TransactionId tx_id) {
auto apply = [tx_id](auto &collection) {
auto access = collection.access();
auto found = access.find(tx_id);
if (found == access.end()) {
return UpdateResult::DONE;
}
auto result = found->second.Apply();
access.remove(tx_id);
return result;
};
auto vertex_result = apply(vertex_updates_);
auto edge_result = apply(edge_updates_);
if (vertex_result != UpdateResult::DONE) return vertex_result;
if (edge_result != UpdateResult::DONE) return edge_result;
return UpdateResult::DONE;
}
void UpdatesRpcServer::ClearTransactionalCache(
tx::TransactionId oldest_active) {
auto vertex_access = vertex_updates_.access();
for (auto &kv : vertex_access) {
if (kv.first < oldest_active) {
vertex_access.remove(kv.first);
}
}
auto edge_access = edge_updates_.access();
for (auto &kv : edge_access) {
if (kv.first < oldest_active) {
edge_access.remove(kv.first);
}
}
}
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
UpdatesRpcServer::TransactionUpdates<TAccessor> &UpdatesRpcServer::GetUpdates(
MapT<TAccessor> &updates, tx::TransactionId tx_id) {
return updates.access()
.emplace(tx_id, std::make_tuple(tx_id),
std::make_tuple(std::ref(db_), tx_id))
.first->second;
}
CreateResult UpdatesRpcServer::CreateEdge(const CreateEdgeReqData &req) {
auto gid = GetUpdates(edge_updates_, req.tx_id)
.CreateEdge(req.from, req.to, req.edge_type);
auto from_delta = database::StateDelta::AddOutEdge(
req.tx_id, req.from, req.to, {gid, db_.WorkerId()}, req.edge_type);
auto result = GetUpdates(vertex_updates_, req.tx_id).Emplace(from_delta);
return {result, gid};
}
UpdateResult UpdatesRpcServer::RemoveEdge(const RemoveEdgeData &data) {
// Edge removal.
auto deletion_delta =
database::StateDelta::RemoveEdge(data.tx_id, data.edge_id);
auto result = GetUpdates(edge_updates_, data.tx_id).Emplace(deletion_delta);
// Out-edge removal, for sure is local.
if (result == UpdateResult::DONE) {
auto remove_out_delta = database::StateDelta::RemoveOutEdge(
data.tx_id, data.vertex_from_id, {data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_out_delta);
}
// In-edge removal, might not be local.
if (result == UpdateResult::DONE &&
data.vertex_to_address.worker_id() == db_.WorkerId()) {
auto remove_in_delta = database::StateDelta::RemoveInEdge(
data.tx_id, data.vertex_to_address.gid(),
{data.edge_id, db_.WorkerId()});
result = GetUpdates(vertex_updates_, data.tx_id).Emplace(remove_in_delta);
}
return result;
}
template <>
VertexAccessor UpdatesRpcServer::TransactionUpdates<
VertexAccessor>::FindAccessor(gid::Gid gid) {
return db_accessor_.FindVertex(gid, false);
}
template <>
EdgeAccessor UpdatesRpcServer::TransactionUpdates<EdgeAccessor>::FindAccessor(
gid::Gid gid) {
return db_accessor_.FindEdge(gid, false);
}
} // namespace distributed

View File

@ -1,104 +0,0 @@
#pragma once
#include <unordered_map>
#include <vector>
#include "glog/logging.h"
#include "communication/rpc/server.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "database/state_delta.hpp"
#include "distributed/updates_rpc_messages.hpp"
#include "query/typed_value.hpp"
#include "storage/edge_accessor.hpp"
#include "storage/gid.hpp"
#include "storage/types.hpp"
#include "storage/vertex_accessor.hpp"
#include "threading/sync/spinlock.hpp"
#include "transactions/type.hpp"
namespace distributed {
/// An RPC server that accepts and holds deferred updates (deltas) until it's
/// told to apply or discard them. The updates are organized and applied per
/// transaction in this single updates server.
///
/// Attempts to get serialization and update-after-delete errors to happen as
/// soon as possible during query execution (fail fast).
class UpdatesRpcServer {
// Remote updates for one transaction.
template <typename TRecordAccessor>
class TransactionUpdates {
public:
TransactionUpdates(database::GraphDb &db, tx::TransactionId tx_id)
: db_accessor_(db, tx_id) {}
/// Adds a delta and returns the result. Does not modify the state (data) of
/// the graph element the update is for, but calls the `update` method to
/// fail-fast on serialization and update-after-delete errors.
UpdateResult Emplace(const database::StateDelta &delta);
/// Creates a new vertex and returns it's gid.
gid::Gid CreateVertex(
const std::vector<storage::Label> &labels,
const std::unordered_map<storage::Property, query::TypedValue>
&properties);
/// Creates a new edge and returns it's gid. Does not update vertices at the
/// end of the edge.
gid::Gid CreateEdge(gid::Gid from, storage::VertexAddress to,
storage::EdgeType edge_type);
/// Applies all the deltas on the record.
UpdateResult Apply();
auto &db_accessor() { return db_accessor_; }
private:
database::GraphDbAccessor db_accessor_;
std::unordered_map<
gid::Gid, std::pair<TRecordAccessor, std::vector<database::StateDelta>>>
deltas_;
// Multiple workers might be sending remote updates concurrently.
SpinLock lock_;
// Helper method specialized for [Vertex|Edge]Accessor.
TRecordAccessor FindAccessor(gid::Gid gid);
};
public:
UpdatesRpcServer(database::GraphDb &db, communication::rpc::Server &server);
/// Applies all existsing updates for the given transaction ID. If there are
/// no updates for that transaction, nothing happens. Clears the updates cache
/// after applying them, regardless of the result.
UpdateResult Apply(tx::TransactionId tx_id);
/// Clears the cache of local transactions that are completed. The signature
/// of this method is dictated by `distributed::TransactionalCacheCleaner`.
void ClearTransactionalCache(tx::TransactionId oldest_active);
private:
database::GraphDb &db_;
template <typename TAccessor>
using MapT =
ConcurrentMap<tx::TransactionId, TransactionUpdates<TAccessor>>;
MapT<VertexAccessor> vertex_updates_;
MapT<EdgeAccessor> edge_updates_;
// Gets/creates the TransactionUpdates for the given transaction.
template <typename TAccessor>
TransactionUpdates<TAccessor> &GetUpdates(MapT<TAccessor> &updates,
tx::TransactionId tx_id);
// Performs edge creation for the given request.
CreateResult CreateEdge(const CreateEdgeReqData &req);
// Performs edge removal for the given request.
UpdateResult RemoveEdge(const RemoveEdgeData &data);
};
} // namespace distributed

View File

@ -27,15 +27,6 @@ struct RecoveryInfo {
max_wal_tx_id == other.max_wal_tx_id;
}
bool operator!=(const RecoveryInfo &other) const { return !(*this == other); }
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &snapshot_tx_id;
ar &max_wal_tx_id;
}
};
/** Reads snapshot metadata from the end of the file without messing up the

View File

@ -5,8 +5,6 @@
#include <iostream>
#include <string>
#include "boost/serialization/access.hpp"
#include "utils/exceptions.hpp"
namespace io::network {
@ -29,15 +27,6 @@ class Endpoint {
friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint);
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &address_;
ar &port_;
ar &family_;
}
std::string address_;
uint16_t port_{0};
unsigned char family_{0};

View File

@ -12,9 +12,9 @@
#include <glog/logging.h>
#include "communication/bolt/v1/session.hpp"
#include "communication/server.hpp"
#include "config.hpp"
#include "database/graph_db.hpp"
#include "stats/stats.hpp"
#include "utils/flag_validation.hpp"
#include "utils/signals.hpp"
#include "utils/sysinfo/memory.hpp"
@ -118,6 +118,7 @@ void InitSignalHandlers(const std::function<void()> &shutdown_fun) {
int WithInit(int argc, char **argv,
const std::function<std::string()> &get_stats_prefix,
const std::function<void()> &memgraph_main) {
google::SetUsageMessage("Memgraph database server");
gflags::SetVersionString(version_string);
// Load config before parsing arguments, so that flags from the command line
@ -132,9 +133,6 @@ int WithInit(int argc, char **argv,
// Unhandled exception handler init.
std::set_terminate(&utils::TerminateHandler);
stats::InitStatsLogging(get_stats_prefix());
utils::OnScopeExit stop_stats([] { stats::StopStatsLogging(); });
// Start memory warning logger.
utils::Scheduler mem_log_scheduler;
if (FLAGS_memory_warning_threshold > 0) {
@ -150,7 +148,6 @@ int WithInit(int argc, char **argv,
}
void SingleNodeMain() {
google::SetUsageMessage("Memgraph single-node database server");
database::SingleNode db;
SessionData session_data{db};
ServerT server({FLAGS_interface, static_cast<uint16_t>(FLAGS_port)},
@ -170,71 +167,6 @@ void SingleNodeMain() {
// End common stuff for enterprise and community editions
#ifdef MG_COMMUNITY
int main(int argc, char **argv) {
return WithInit(argc, argv, []() { return "memgraph"; }, SingleNodeMain);
}
#else // enterprise edition
// Distributed flags.
DEFINE_HIDDEN_bool(
master, false,
"If this Memgraph server is the master in a distributed deployment.");
DEFINE_HIDDEN_bool(
worker, false,
"If this Memgraph server is a worker in a distributed deployment.");
DECLARE_int32(worker_id);
void MasterMain() {
google::SetUsageMessage("Memgraph distributed master");
database::Master db;
SessionData session_data{db};
ServerT server({FLAGS_interface, static_cast<uint16_t>(FLAGS_port)},
session_data, FLAGS_session_inactivity_timeout, "Bolt",
FLAGS_num_workers);
// Handler for regular termination signals
auto shutdown = [&server] {
// Server needs to be shutdown first and then the database. This prevents a
// race condition when a transaction is accepted during server shutdown.
server.Shutdown();
};
InitSignalHandlers(shutdown);
server.AwaitShutdown();
}
void WorkerMain() {
google::SetUsageMessage("Memgraph distributed worker");
database::Worker db;
db.WaitForShutdown();
}
int main(int argc, char **argv) {
auto get_stats_prefix = [&]() -> std::string {
if (FLAGS_master) {
return "master";
} else if (FLAGS_worker) {
return fmt::format("worker-{}", FLAGS_worker_id);
}
return "memgraph";
};
auto memgraph_main = [&]() {
CHECK(!(FLAGS_master && FLAGS_worker))
<< "Can't run Memgraph as worker and master at the same time";
if (FLAGS_master)
MasterMain();
else if (FLAGS_worker)
WorkerMain();
else
SingleNodeMain();
};
return WithInit(argc, argv, get_stats_prefix, memgraph_main);
}
#endif // enterprise edition

View File

@ -3,7 +3,6 @@
#include <cstdint>
#include <string>
#include "boost/serialization/serialization.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/typed_value.hpp"
@ -54,12 +53,6 @@ class TypedValueVectorCompare {
private:
std::vector<Ordering> ordering_;
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, const unsigned int) {
ar &ordering_;
}
// Custom comparison for TypedValue objects.
//
// Behaves generally like Neo's ORDER BY comparison operator:

View File

@ -1,10 +1,5 @@
#include "query/frontend/ast/ast.hpp"
// Include archives before registering most derived types.
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/export.hpp"
namespace query {
// Id for boost's archive get_helper needs to be unique among all ids. If it
@ -43,59 +38,3 @@ ReturnBody CloneReturnBody(AstTreeStorage &storage, const ReturnBody &body) {
}
} // namespace query
BOOST_CLASS_EXPORT_IMPLEMENT(query::Query);
BOOST_CLASS_EXPORT_IMPLEMENT(query::SingleQuery);
BOOST_CLASS_EXPORT_IMPLEMENT(query::CypherUnion);
BOOST_CLASS_EXPORT_IMPLEMENT(query::NamedExpression);
BOOST_CLASS_EXPORT_IMPLEMENT(query::OrOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::XorOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::AndOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::NotOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::AdditionOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::SubtractionOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::MultiplicationOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::DivisionOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::ModOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::NotEqualOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::EqualOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::LessOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::GreaterOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::LessEqualOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::GreaterEqualOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::InListOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::ListMapIndexingOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::ListSlicingOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::IfOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::UnaryPlusOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::UnaryMinusOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::IsNullOperator);
BOOST_CLASS_EXPORT_IMPLEMENT(query::ListLiteral);
BOOST_CLASS_EXPORT_IMPLEMENT(query::MapLiteral);
BOOST_CLASS_EXPORT_IMPLEMENT(query::PropertyLookup);
BOOST_CLASS_EXPORT_IMPLEMENT(query::LabelsTest);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Aggregation);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Function);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Reduce);
BOOST_CLASS_EXPORT_IMPLEMENT(query::All);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Single);
BOOST_CLASS_EXPORT_IMPLEMENT(query::ParameterLookup);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Create);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Match);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Return);
BOOST_CLASS_EXPORT_IMPLEMENT(query::With);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Pattern);
BOOST_CLASS_EXPORT_IMPLEMENT(query::NodeAtom);
BOOST_CLASS_EXPORT_IMPLEMENT(query::EdgeAtom);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Delete);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Where);
BOOST_CLASS_EXPORT_IMPLEMENT(query::SetProperty);
BOOST_CLASS_EXPORT_IMPLEMENT(query::SetProperties);
BOOST_CLASS_EXPORT_IMPLEMENT(query::SetLabels);
BOOST_CLASS_EXPORT_IMPLEMENT(query::RemoveProperty);
BOOST_CLASS_EXPORT_IMPLEMENT(query::RemoveLabels);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Merge);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Unwind);
BOOST_CLASS_EXPORT_IMPLEMENT(query::Identifier);
BOOST_CLASS_EXPORT_IMPLEMENT(query::PrimitiveLiteral);
BOOST_CLASS_EXPORT_IMPLEMENT(query::CreateIndex);

File diff suppressed because it is too large Load Diff

View File

@ -2,9 +2,6 @@
#include <string>
#include "boost/serialization/serialization.hpp"
#include "boost/serialization/string.hpp"
namespace query {
class Symbol {
@ -46,17 +43,6 @@ class Symbol {
bool user_declared_ = true;
Type type_ = Type::Any;
int token_position_ = -1;
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, const unsigned int) {
ar & name_;
ar & position_;
ar & user_declared_;
ar & type_;
ar & token_position_;
}
};
} // namespace query

View File

@ -3,9 +3,6 @@
#include <map>
#include <string>
#include "boost/serialization/map.hpp"
#include "boost/serialization/serialization.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/semantic/symbol.hpp"
@ -33,14 +30,6 @@ class SymbolTable final {
private:
int position_{0};
std::map<int, Symbol> table_;
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, const unsigned int) {
ar &position_;
ar &table_;
}
};
} // namespace query

View File

@ -3,7 +3,6 @@
#include <glog/logging.h>
#include <limits>
#include "distributed/plan_dispatcher.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp"
#include "query/frontend/opencypher/parser.hpp"
@ -20,35 +19,13 @@ DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60,
namespace query {
Interpreter::CachedPlan::CachedPlan(
plan::DistributedPlan distributed_plan, double cost,
distributed::PlanDispatcher *plan_dispatcher)
: distributed_plan_(std::move(distributed_plan)),
cost_(cost),
plan_dispatcher_(plan_dispatcher) {
if (plan_dispatcher_) {
for (const auto &plan_pair : distributed_plan_.worker_plans) {
const auto &plan_id = plan_pair.first;
const auto &worker_plan = plan_pair.second;
plan_dispatcher_->DispatchPlan(plan_id, worker_plan,
distributed_plan_.symbol_table);
}
}
}
Interpreter::CachedPlan::CachedPlan(plan::DistributedPlan distributed_plan,
double cost)
: distributed_plan_(std::move(distributed_plan)), cost_(cost) {}
Interpreter::CachedPlan::~CachedPlan() {
if (plan_dispatcher_) {
for (const auto &plan_pair : distributed_plan_.worker_plans) {
const auto &plan_id = plan_pair.first;
plan_dispatcher_->RemovePlan(plan_id);
}
}
}
Interpreter::CachedPlan::~CachedPlan() {}
Interpreter::Interpreter(database::GraphDb &db)
: plan_dispatcher_(db.type() == database::GraphDb::Type::DISTRIBUTED_MASTER
? &db.plan_dispatcher()
: nullptr) {}
Interpreter::Interpreter(database::GraphDb &db) {}
Interpreter::Results Interpreter::operator()(
const std::string &query, database::GraphDbAccessor &db_accessor,
@ -134,26 +111,13 @@ std::shared_ptr<Interpreter::CachedPlan> Interpreter::QueryToPlan(
std::tie(tmp_logical_plan, query_plan_cost_estimation) =
MakeLogicalPlan(ast_storage, ctx);
DCHECK(ctx.db_accessor_.db().type() !=
database::GraphDb::Type::DISTRIBUTED_WORKER);
if (ctx.db_accessor_.db().type() ==
database::GraphDb::Type::DISTRIBUTED_MASTER) {
auto distributed_plan = MakeDistributedPlan(
*tmp_logical_plan, ctx.symbol_table_, next_plan_id_);
VLOG(10) << "[Interpreter] Created plan for distributed execution "
<< next_plan_id_ - 1;
return std::make_shared<CachedPlan>(std::move(distributed_plan),
query_plan_cost_estimation,
plan_dispatcher_);
} else {
return std::make_shared<CachedPlan>(
plan::DistributedPlan{0,
std::move(tmp_logical_plan),
{},
std::move(ast_storage),
ctx.symbol_table_},
query_plan_cost_estimation, plan_dispatcher_);
}
return std::make_shared<CachedPlan>(
plan::DistributedPlan{0,
std::move(tmp_logical_plan),
{},
std::move(ast_storage),
ctx.symbol_table_},
query_plan_cost_estimation);
}
AstTreeStorage Interpreter::QueryToAst(const StrippedQuery &stripped,

View File

@ -16,10 +16,6 @@
DECLARE_int32(query_plan_cache_ttl);
namespace distributed {
class PlanDispatcher;
}
namespace query {
class Interpreter {
@ -29,8 +25,7 @@ class Interpreter {
class CachedPlan {
public:
/// Creates a cached plan and sends it to all the workers.
CachedPlan(plan::DistributedPlan distributed_plan, double cost,
distributed::PlanDispatcher *plan_dispatcher);
CachedPlan(plan::DistributedPlan distributed_plan, double cost);
/// Removes the cached plan from all the workers.
~CachedPlan();
@ -49,9 +44,6 @@ class Interpreter {
plan::DistributedPlan distributed_plan_;
double cost_;
utils::Timer cache_timer_;
// Optional, only available in a distributed master.
distributed::PlanDispatcher *plan_dispatcher_{nullptr};
};
using PlanCacheT = ConcurrentMap<HashType, std::shared_ptr<CachedPlan>>;
@ -175,9 +167,6 @@ class Interpreter {
// so this lock probably won't impact performance much...
SpinLock antlr_lock_;
// Optional, not null only in a distributed master.
distributed::PlanDispatcher *plan_dispatcher_{nullptr};
// stripped query -> CachedPlan
std::shared_ptr<CachedPlan> QueryToPlan(const StrippedQuery &stripped,
Context &ctx);

View File

@ -2,12 +2,6 @@
#include <memory>
// TODO: Remove these includes for hacked cloning of logical operators via boost
// serialization when proper cloning is added.
#include <sstream>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
#include "utils/exceptions.hpp"
@ -16,22 +10,6 @@ namespace query::plan {
namespace {
std::pair<std::unique_ptr<LogicalOperator>, AstTreeStorage> Clone(
const LogicalOperator &original_plan) {
// TODO: Add a proper Clone method to LogicalOperator
std::stringstream stream;
{
boost::archive::binary_oarchive out_archive(stream);
out_archive << &original_plan;
}
boost::archive::binary_iarchive in_archive(stream);
LogicalOperator *plan_copy = nullptr;
in_archive >> plan_copy;
return {std::unique_ptr<LogicalOperator>(plan_copy),
std::move(in_archive.template get_helper<AstTreeStorage>(
AstTreeStorage::kHelperId))};
}
int64_t AddWorkerPlan(DistributedPlan &distributed_plan,
std::atomic<int64_t> &next_plan_id,
const std::shared_ptr<LogicalOperator> &worker_plan) {
@ -749,43 +727,4 @@ class DistributedPlanner : public HierarchicalLogicalOperatorVisitor {
} // namespace
DistributedPlan MakeDistributedPlan(const LogicalOperator &original_plan,
const SymbolTable &symbol_table,
std::atomic<int64_t> &next_plan_id) {
DistributedPlan distributed_plan;
// If we will generate multiple worker plans, we will need to increment the
// next_plan_id for each one.
distributed_plan.master_plan_id = next_plan_id++;
distributed_plan.symbol_table = symbol_table;
std::tie(distributed_plan.master_plan, distributed_plan.ast_storage) =
Clone(original_plan);
DistributedPlanner planner(distributed_plan, next_plan_id);
distributed_plan.master_plan->Accept(planner);
if (planner.ShouldSplit()) {
// We haven't split the plan, this means that it should be the same on
// master and worker. We only need to prepend PullRemote to master plan.
std::shared_ptr<LogicalOperator> worker_plan(
std::move(distributed_plan.master_plan));
auto pull_id = AddWorkerPlan(distributed_plan, next_plan_id, worker_plan);
// If the plan performs writes, we need to finish with Synchronize.
if (planner.NeedsSynchronize()) {
auto pull_remote = std::make_shared<PullRemote>(
nullptr, pull_id,
worker_plan->OutputSymbols(distributed_plan.symbol_table));
distributed_plan.master_plan =
std::make_unique<Synchronize>(worker_plan, pull_remote, false);
} else {
distributed_plan.master_plan = std::make_unique<PullRemote>(
worker_plan, pull_id,
worker_plan->OutputSymbols(distributed_plan.symbol_table));
}
} else if (planner.NeedsSynchronize()) {
// If the plan performs writes on master, we still need to Synchronize, even
// though we don't split the plan.
distributed_plan.master_plan = std::make_unique<Synchronize>(
std::move(distributed_plan.master_plan), nullptr, false);
}
return distributed_plan;
}
} // namespace query::plan

View File

@ -11,15 +11,9 @@
#include <unordered_set>
#include <utility>
#include "boost/archive/binary_iarchive.hpp"
#include "boost/archive/binary_oarchive.hpp"
#include "boost/serialization/export.hpp"
#include "glog/logging.h"
#include "database/graph_db_accessor.hpp"
#include "distributed/pull_rpc_clients.hpp"
#include "distributed/updates_rpc_clients.hpp"
#include "distributed/updates_rpc_server.hpp"
#include "query/context.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/ast/ast.hpp"
@ -119,15 +113,6 @@ CreateNode::CreateNode(const std::shared_ptr<LogicalOperator> &input,
namespace {
// Returns a random worker id. Worker ID is obtained from the Db.
int RandomWorkerId(database::GraphDb &db) {
thread_local std::mt19937 gen_{std::random_device{}()};
thread_local std::uniform_int_distribution<int> rand_;
auto worker_ids = db.GetWorkerIds();
return worker_ids[rand_(gen_) % worker_ids.size()];
}
// Creates a vertex on this GraphDb. Returns a reference to vertex placed on the
// frame.
VertexAccessor &CreateLocalVertex(NodeAtom *node_atom, Frame &frame,
@ -146,34 +131,6 @@ VertexAccessor &CreateLocalVertex(NodeAtom *node_atom, Frame &frame,
return frame[context.symbol_table_.at(*node_atom->identifier_)].ValueVertex();
}
// Creates a vertex on the GraphDb with the given worker_id. Can be this worker.
VertexAccessor &CreateVertexOnWorker(int worker_id, NodeAtom *node_atom,
Frame &frame, Context &context) {
auto &dba = context.db_accessor_;
if (worker_id == dba.db().WorkerId())
return CreateLocalVertex(node_atom, frame, context);
std::unordered_map<storage::Property, query::TypedValue> properties;
// Evaluator should use the latest accessors, as modified in this query, when
// setting properties on new nodes.
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, dba, GraphView::NEW);
for (auto &kv : node_atom->properties_) {
auto value = kv.second->Accept(evaluator);
if (!value.IsPropertyValue()) {
throw QueryRuntimeException("'{}' cannot be used as a property value.",
value.type());
}
properties.emplace(kv.first.second, std::move(value));
}
auto new_node =
dba.InsertVertexIntoRemote(worker_id, node_atom->labels_, properties);
frame[context.symbol_table_.at(*node_atom->identifier_)] = new_node;
return frame[context.symbol_table_.at(*node_atom->identifier_)].ValueVertex();
}
} // namespace
ACCEPT_WITH_INPUT(CreateNode)
@ -192,16 +149,11 @@ std::vector<Symbol> CreateNode::ModifiedSymbols(
CreateNode::CreateNodeCursor::CreateNodeCursor(const CreateNode &self,
database::GraphDbAccessor &db)
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
bool CreateNode::CreateNodeCursor::Pull(Frame &frame, Context &context) {
if (input_cursor_->Pull(frame, context)) {
if (self_.on_random_worker_) {
CreateVertexOnWorker(RandomWorkerId(db_.db()), self_.node_atom_, frame,
context);
} else {
CreateLocalVertex(self_.node_atom_, frame, context);
}
CreateLocalVertex(self_.node_atom_, frame, context);
return true;
}
return false;
@ -286,7 +238,7 @@ VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(
ExpectType(dest_node_symbol, dest_node_value, TypedValue::Type::Vertex);
return dest_node_value.Value<VertexAccessor>();
} else {
return CreateVertexOnWorker(worker_id, self_.node_atom_, frame, context);
return CreateLocalVertex(self_.node_atom_, frame, context);
}
}
@ -414,32 +366,32 @@ std::unique_ptr<Cursor> ScanAllByLabelPropertyRange::MakeCursor(
-> std::experimental::optional<decltype(
db.Vertices(label_, property_, std::experimental::nullopt,
std::experimental::nullopt, false))> {
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, db, graph_view_);
auto convert = [&evaluator](const auto &bound)
-> std::experimental::optional<utils::Bound<PropertyValue>> {
if (!bound) return std::experimental::nullopt;
auto value = bound->value()->Accept(evaluator);
try {
return std::experimental::make_optional(
utils::Bound<PropertyValue>(value, bound->type()));
} catch (const TypedValueException &) {
throw QueryRuntimeException(
"'{}' cannot be used as a property value.", value.type());
}
};
auto maybe_lower = convert(lower_bound());
auto maybe_upper = convert(upper_bound());
// If any bound is null, then the comparison would result in nulls. This
// is treated as not satisfying the filter, so return no vertices.
if (maybe_lower && maybe_lower->value().IsNull())
return std::experimental::nullopt;
if (maybe_upper && maybe_upper->value().IsNull())
return std::experimental::nullopt;
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, db, graph_view_);
auto convert = [&evaluator](const auto &bound)
-> std::experimental::optional<utils::Bound<PropertyValue>> {
if (!bound) return std::experimental::nullopt;
auto value = bound->value()->Accept(evaluator);
try {
return std::experimental::make_optional(
db.Vertices(label_, property_, maybe_lower, maybe_upper,
graph_view_ == GraphView::NEW));
};
utils::Bound<PropertyValue>(value, bound->type()));
} catch (const TypedValueException &) {
throw QueryRuntimeException("'{}' cannot be used as a property value.",
value.type());
}
};
auto maybe_lower = convert(lower_bound());
auto maybe_upper = convert(upper_bound());
// If any bound is null, then the comparison would result in nulls. This
// is treated as not satisfying the filter, so return no vertices.
if (maybe_lower && maybe_lower->value().IsNull())
return std::experimental::nullopt;
if (maybe_upper && maybe_upper->value().IsNull())
return std::experimental::nullopt;
return std::experimental::make_optional(
db.Vertices(label_, property_, maybe_lower, maybe_upper,
graph_view_ == GraphView::NEW));
};
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
}
@ -462,18 +414,18 @@ std::unique_ptr<Cursor> ScanAllByLabelPropertyValue::MakeCursor(
auto vertices = [this, &db](Frame &frame, Context &context)
-> std::experimental::optional<decltype(
db.Vertices(label_, property_, TypedValue::Null, false))> {
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, db, graph_view_);
auto value = expression_->Accept(evaluator);
if (value.IsNull()) return std::experimental::nullopt;
try {
return std::experimental::make_optional(db.Vertices(
label_, property_, value, graph_view_ == GraphView::NEW));
} catch (const TypedValueException &) {
throw QueryRuntimeException(
"'{}' cannot be used as a property value.", value.type());
}
};
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, db, graph_view_);
auto value = expression_->Accept(evaluator);
if (value.IsNull()) return std::experimental::nullopt;
try {
return std::experimental::make_optional(
db.Vertices(label_, property_, value, graph_view_ == GraphView::NEW));
} catch (const TypedValueException &) {
throw QueryRuntimeException("'{}' cannot be used as a property value.",
value.type());
}
};
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
}
@ -1224,7 +1176,8 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor {
// For the given (edge, vertex, weight, depth) tuple checks if they
// satisfy the "where" condition. if so, places them in the priority queue.
auto expand_pair = [this, &evaluator, &frame, &create_state](
EdgeAccessor edge, VertexAccessor vertex, double weight, int depth) {
EdgeAccessor edge, VertexAccessor vertex,
double weight, int depth) {
SwitchAccessor(edge, self_.graph_view_);
SwitchAccessor(vertex, self_.graph_view_);
@ -2960,44 +2913,6 @@ void Union::UnionCursor::Reset() {
right_cursor_->Reset();
}
bool PullRemote::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
if (visitor.PreVisit(*this)) {
if (input_) input_->Accept(visitor);
}
return visitor.PostVisit(*this);
}
std::vector<Symbol> PullRemote::OutputSymbols(const SymbolTable &table) const {
return input_ ? input_->OutputSymbols(table) : std::vector<Symbol>{};
}
std::vector<Symbol> PullRemote::ModifiedSymbols(
const SymbolTable &table) const {
auto symbols = symbols_;
if (input_) {
auto input_symbols = input_->ModifiedSymbols(table);
symbols.insert(symbols.end(), input_symbols.begin(), input_symbols.end());
}
return symbols;
}
std::vector<Symbol> Synchronize::ModifiedSymbols(
const SymbolTable &table) const {
auto symbols = input_->ModifiedSymbols(table);
if (pull_remote_) {
auto pull_symbols = pull_remote_->ModifiedSymbols(table);
symbols.insert(symbols.end(), pull_symbols.begin(), pull_symbols.end());
}
return symbols;
}
bool Synchronize::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
if (visitor.PreVisit(*this)) {
input_->Accept(visitor) && pull_remote_->Accept(visitor);
}
return visitor.PostVisit(*this);
}
std::vector<Symbol> Cartesian::ModifiedSymbols(const SymbolTable &table) const {
auto symbols = left_op_->ModifiedSymbols(table);
auto right = right_op_->ModifiedSymbols(table);
@ -3014,407 +2929,6 @@ bool Cartesian::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
WITHOUT_SINGLE_INPUT(Cartesian);
PullRemoteOrderBy::PullRemoteOrderBy(
const std::shared_ptr<LogicalOperator> &input, int64_t plan_id,
const std::vector<std::pair<Ordering, Expression *>> &order_by,
const std::vector<Symbol> &symbols)
: input_(input), plan_id_(plan_id), symbols_(symbols) {
CHECK(input_ != nullptr)
<< "PullRemoteOrderBy should always be constructed with input!";
std::vector<Ordering> ordering;
ordering.reserve(order_by.size());
order_by_.reserve(order_by.size());
for (const auto &ordering_expression_pair : order_by) {
ordering.emplace_back(ordering_expression_pair.first);
order_by_.emplace_back(ordering_expression_pair.second);
}
compare_ = TypedValueVectorCompare(ordering);
}
ACCEPT_WITH_INPUT(PullRemoteOrderBy);
std::vector<Symbol> PullRemoteOrderBy::OutputSymbols(
const SymbolTable &table) const {
return input_->OutputSymbols(table);
}
std::vector<Symbol> PullRemoteOrderBy::ModifiedSymbols(
const SymbolTable &table) const {
return input_->ModifiedSymbols(table);
}
namespace {
/** Helper class that wraps remote pulling for cursors that handle results from
* distributed workers.
*/
class RemotePuller {
public:
RemotePuller(database::GraphDbAccessor &db,
const std::vector<Symbol> &symbols, int64_t plan_id)
: db_(db), symbols_(symbols), plan_id_(plan_id) {
worker_ids_ = db_.db().pull_clients().GetWorkerIds();
// Remove master from the worker ids list.
worker_ids_.erase(std::find(worker_ids_.begin(), worker_ids_.end(), 0));
}
void Initialize(Context &context) {
if (!remote_pulls_initialized_) {
VLOG(10) << "[RemotePuller] [" << context.db_accessor_.transaction_id()
<< "] [" << plan_id_ << "] initialized";
for (auto &worker_id : worker_ids_) {
UpdatePullForWorker(worker_id, context);
}
remote_pulls_initialized_ = true;
}
}
void Update(Context &context) {
// If we don't have results for a worker, check if his remote pull
// finished and save results locally.
auto move_frames = [this, &context](int worker_id, auto remote_results) {
VLOG(10) << "[RemotePuller] [" << context.db_accessor_.transaction_id()
<< "] [" << plan_id_ << "] received results from " << worker_id;
remote_results_[worker_id] = std::move(remote_results.frames);
// Since we return and remove results from the back of the vector,
// reverse the results so the first to return is on the end of the
// vector.
std::reverse(remote_results_[worker_id].begin(),
remote_results_[worker_id].end());
};
for (auto &worker_id : worker_ids_) {
if (!remote_results_[worker_id].empty()) continue;
auto found_it = remote_pulls_.find(worker_id);
if (found_it == remote_pulls_.end()) continue;
auto &remote_pull = found_it->second;
if (!remote_pull.IsReady()) continue;
auto remote_results = remote_pull.get();
switch (remote_results.pull_state) {
case distributed::PullState::CURSOR_EXHAUSTED:
VLOG(10) << "[RemotePuller] ["
<< context.db_accessor_.transaction_id() << "] [" << plan_id_
<< "] cursor exhausted from " << worker_id;
move_frames(worker_id, remote_results);
remote_pulls_.erase(found_it);
break;
case distributed::PullState::CURSOR_IN_PROGRESS:
VLOG(10) << "[RemotePuller] ["
<< context.db_accessor_.transaction_id() << "] [" << plan_id_
<< "] cursor in progress from " << worker_id;
move_frames(worker_id, remote_results);
UpdatePullForWorker(worker_id, context);
break;
case distributed::PullState::SERIALIZATION_ERROR:
throw mvcc::SerializationError(
"Serialization error occured during PullRemote !");
case distributed::PullState::LOCK_TIMEOUT_ERROR:
throw LockTimeoutException(
"LockTimeout error occured during PullRemote !");
case distributed::PullState::UPDATE_DELETED_ERROR:
throw QueryRuntimeException(
"RecordDeleted error ocured during PullRemote !");
case distributed::PullState::RECONSTRUCTION_ERROR:
throw query::ReconstructionException();
case distributed::PullState::UNABLE_TO_DELETE_VERTEX_ERROR:
throw RemoveAttachedVertexException();
case distributed::PullState::HINTED_ABORT_ERROR:
throw HintedAbortError();
case distributed::PullState::QUERY_ERROR:
throw QueryRuntimeException(
"Query runtime error occurred duing PullRemote !");
}
}
}
auto Workers() { return worker_ids_; }
int GetWorkerId(int worker_id_index) { return worker_ids_[worker_id_index]; }
size_t WorkerCount() { return worker_ids_.size(); }
void ClearWorkers() { worker_ids_.clear(); }
bool HasPendingPulls() { return !remote_pulls_.empty(); }
bool HasPendingPullFromWorker(int worker_id) {
return remote_pulls_.find(worker_id) != remote_pulls_.end();
}
bool HasResultsFromWorker(int worker_id) {
return !remote_results_[worker_id].empty();
}
std::vector<query::TypedValue> PopResultFromWorker(int worker_id) {
auto result = remote_results_[worker_id].back();
remote_results_[worker_id].pop_back();
// Remove the worker if we exhausted all locally stored results and there
// are no more pending remote pulls for that worker.
if (remote_results_[worker_id].empty() &&
remote_pulls_.find(worker_id) == remote_pulls_.end()) {
worker_ids_.erase(
std::find(worker_ids_.begin(), worker_ids_.end(), worker_id));
}
return result;
}
private:
database::GraphDbAccessor &db_;
std::vector<Symbol> symbols_;
int64_t plan_id_;
std::unordered_map<int, utils::Future<distributed::PullData>> remote_pulls_;
std::unordered_map<int, std::vector<std::vector<query::TypedValue>>>
remote_results_;
std::vector<int> worker_ids_;
bool remote_pulls_initialized_ = false;
void UpdatePullForWorker(int worker_id, Context &context) {
remote_pulls_[worker_id] = db_.db().pull_clients().Pull(
db_, worker_id, plan_id_, context.parameters_, symbols_, false);
}
};
class PullRemoteCursor : public Cursor {
public:
PullRemoteCursor(const PullRemote &self, database::GraphDbAccessor &db)
: self_(self),
input_cursor_(self.input() ? self.input()->MakeCursor(db) : nullptr),
remote_puller_(RemotePuller(db, self.symbols(), self.plan_id())) {}
bool Pull(Frame &frame, Context &context) override {
if (context.db_accessor_.should_abort()) throw HintedAbortError();
remote_puller_.Initialize(context);
bool have_remote_results = false;
while (!have_remote_results && remote_puller_.WorkerCount() > 0) {
if (context.db_accessor_.should_abort()) throw HintedAbortError();
remote_puller_.Update(context);
// Get locally stored results from workers in a round-robin fasion.
int num_workers = remote_puller_.WorkerCount();
for (int i = 0; i < num_workers; ++i) {
int worker_id_index =
(last_pulled_worker_id_index_ + i + 1) % num_workers;
int worker_id = remote_puller_.GetWorkerId(worker_id_index);
if (remote_puller_.HasResultsFromWorker(worker_id)) {
last_pulled_worker_id_index_ = worker_id_index;
have_remote_results = true;
break;
}
}
if (!have_remote_results) {
if (!remote_puller_.HasPendingPulls()) {
remote_puller_.ClearWorkers();
break;
}
// If there are no remote results available, try to pull and return
// local results.
if (input_cursor_ && input_cursor_->Pull(frame, context)) {
VLOG(10) << "[PullRemoteCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] producing local results ";
return true;
}
VLOG(10) << "[PullRemoteCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] no results available, sleeping ";
// If there aren't any local/remote results available, sleep.
std::this_thread::sleep_for(
std::chrono::microseconds(FLAGS_remote_pull_sleep_micros));
}
}
// No more remote results, make sure local results get exhausted.
if (!have_remote_results) {
if (input_cursor_ && input_cursor_->Pull(frame, context)) {
VLOG(10) << "[PullRemoteCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] producing local results ";
return true;
}
return false;
}
{
int worker_id = remote_puller_.GetWorkerId(last_pulled_worker_id_index_);
VLOG(10) << "[PullRemoteCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] producing results from worker "
<< worker_id;
auto result = remote_puller_.PopResultFromWorker(worker_id);
for (size_t i = 0; i < self_.symbols().size(); ++i) {
frame[self_.symbols()[i]] = std::move(result[i]);
}
}
return true;
}
void Reset() override {
throw QueryRuntimeException("Unsupported: Reset during PullRemote!");
}
private:
const PullRemote &self_;
const std::unique_ptr<Cursor> input_cursor_;
RemotePuller remote_puller_;
int last_pulled_worker_id_index_ = 0;
};
class SynchronizeCursor : public Cursor {
public:
SynchronizeCursor(const Synchronize &self, database::GraphDbAccessor &db)
: self_(self),
input_cursor_(self.input()->MakeCursor(db)),
pull_remote_cursor_(
self.pull_remote() ? self.pull_remote()->MakeCursor(db) : nullptr) {
}
bool Pull(Frame &frame, Context &context) override {
if (!initial_pull_done_) {
InitialPull(frame, context);
initial_pull_done_ = true;
}
// Yield local stuff while available.
if (!local_frames_.empty()) {
VLOG(10) << "[SynchronizeCursor] ["
<< context.db_accessor_.transaction_id()
<< "] producing local results";
auto &result = local_frames_.back();
for (size_t i = 0; i < frame.elems().size(); ++i) {
if (self_.advance_command()) {
query::ReconstructTypedValue(result[i]);
}
frame.elems()[i] = std::move(result[i]);
}
local_frames_.resize(local_frames_.size() - 1);
return true;
}
// We're out of local stuff, yield from pull_remote if available.
if (pull_remote_cursor_ && pull_remote_cursor_->Pull(frame, context)) {
VLOG(10) << "[SynchronizeCursor] ["
<< context.db_accessor_.transaction_id()
<< "] producing remote results";
return true;
}
return false;
}
void Reset() override {
throw QueryRuntimeException("Unsupported: Reset during Synchronize!");
}
private:
const Synchronize &self_;
const std::unique_ptr<Cursor> input_cursor_;
const std::unique_ptr<Cursor> pull_remote_cursor_;
bool initial_pull_done_{false};
std::vector<std::vector<TypedValue>> local_frames_;
void InitialPull(Frame &frame, Context &context) {
VLOG(10) << "[SynchronizeCursor] [" << context.db_accessor_.transaction_id()
<< "] initial pull";
auto &db = context.db_accessor_.db();
// Tell all workers to accumulate, only if there is a remote pull.
std::vector<utils::Future<distributed::PullData>> worker_accumulations;
if (pull_remote_cursor_) {
for (auto worker_id : db.pull_clients().GetWorkerIds()) {
if (worker_id == db.WorkerId()) continue;
worker_accumulations.emplace_back(db.pull_clients().Pull(
context.db_accessor_, worker_id, self_.pull_remote()->plan_id(),
context.parameters_, self_.pull_remote()->symbols(), true, 0));
}
}
// Accumulate local results
while (input_cursor_->Pull(frame, context)) {
local_frames_.emplace_back();
auto &local_frame = local_frames_.back();
local_frame.reserve(frame.elems().size());
for (auto &elem : frame.elems()) {
local_frame.emplace_back(std::move(elem));
}
}
// Wait for all workers to finish accumulation (first sync point).
for (auto &accu : worker_accumulations) {
switch (accu.get().pull_state) {
case distributed::PullState::CURSOR_EXHAUSTED:
continue;
case distributed::PullState::CURSOR_IN_PROGRESS:
throw QueryRuntimeException(
"Expected exhausted cursor after remote pull accumulate");
case distributed::PullState::SERIALIZATION_ERROR:
throw mvcc::SerializationError(
"Failed to perform remote accumulate due to SerializationError");
case distributed::PullState::UPDATE_DELETED_ERROR:
throw QueryRuntimeException(
"Failed to perform remote accumulate due to RecordDeletedError");
case distributed::PullState::LOCK_TIMEOUT_ERROR:
throw LockTimeoutException(
"Failed to perform remote accumulate due to "
"LockTimeoutException");
case distributed::PullState::RECONSTRUCTION_ERROR:
throw QueryRuntimeException(
"Failed to perform remote accumulate due to ReconstructionError");
case distributed::PullState::UNABLE_TO_DELETE_VERTEX_ERROR:
throw RemoveAttachedVertexException();
case distributed::PullState::HINTED_ABORT_ERROR:
throw HintedAbortError();
case distributed::PullState::QUERY_ERROR:
throw QueryRuntimeException(
"Failed to perform remote accumulate due to Query runtime error");
}
}
if (self_.advance_command()) {
context.db_accessor_.AdvanceCommand();
}
// Make all the workers apply their deltas.
auto tx_id = context.db_accessor_.transaction_id();
auto apply_futures =
db.updates_clients().UpdateApplyAll(db.WorkerId(), tx_id);
db.updates_server().Apply(tx_id);
for (auto &future : apply_futures) {
switch (future.get()) {
case distributed::UpdateResult::SERIALIZATION_ERROR:
throw mvcc::SerializationError(
"Failed to apply deferred updates due to SerializationError");
case distributed::UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
throw RemoveAttachedVertexException();
case distributed::UpdateResult::UPDATE_DELETED_ERROR:
throw QueryRuntimeException(
"Failed to apply deferred updates due to RecordDeletedError");
case distributed::UpdateResult::LOCK_TIMEOUT_ERROR:
throw LockTimeoutException(
"Failed to apply deferred update due to LockTimeoutException");
case distributed::UpdateResult::DONE:
break;
}
}
// If the command advanced, let the workers know.
if (self_.advance_command()) {
auto futures =
db.pull_clients().NotifyAllTransactionCommandAdvanced(tx_id);
for (auto &future : futures) future.wait();
}
}
};
class CartesianCursor : public Cursor {
public:
CartesianCursor(const Cartesian &self, database::GraphDbAccessor &db)
@ -3494,198 +3008,9 @@ class CartesianCursor : public Cursor {
bool cartesian_pull_initialized_{false};
};
class PullRemoteOrderByCursor : public Cursor {
public:
PullRemoteOrderByCursor(const PullRemoteOrderBy &self,
database::GraphDbAccessor &db)
: self_(self),
input_(self.input()->MakeCursor(db)),
remote_puller_(RemotePuller(db, self.symbols(), self.plan_id())) {}
bool Pull(Frame &frame, Context &context) {
if (context.db_accessor_.should_abort()) throw HintedAbortError();
ExpressionEvaluator evaluator(frame, context.parameters_,
context.symbol_table_, context.db_accessor_);
auto evaluate_result = [this, &evaluator]() {
std::vector<TypedValue> order_by;
order_by.reserve(self_.order_by().size());
for (auto expression_ptr : self_.order_by()) {
order_by.emplace_back(expression_ptr->Accept(evaluator));
}
return order_by;
};
auto restore_frame = [&frame,
this](const std::vector<TypedValue> &restore_from) {
for (size_t i = 0; i < restore_from.size(); ++i) {
frame[self_.symbols()[i]] = restore_from[i];
}
};
if (!merge_initialized_) {
VLOG(10) << "[PullRemoteOrderBy] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] initialize";
remote_puller_.Initialize(context);
missing_results_from_ = remote_puller_.Workers();
missing_master_result_ = true;
merge_initialized_ = true;
}
if (missing_master_result_) {
if (input_->Pull(frame, context)) {
std::vector<TypedValue> output;
output.reserve(self_.symbols().size());
for (const Symbol &symbol : self_.symbols()) {
output.emplace_back(frame[symbol]);
}
merge_.push_back(MergeResultItem{std::experimental::nullopt, output,
evaluate_result()});
}
missing_master_result_ = false;
}
while (!missing_results_from_.empty()) {
if (context.db_accessor_.should_abort()) throw HintedAbortError();
remote_puller_.Update(context);
bool has_all_result = true;
for (auto &worker_id : missing_results_from_) {
if (!remote_puller_.HasResultsFromWorker(worker_id) &&
remote_puller_.HasPendingPullFromWorker(worker_id)) {
has_all_result = false;
break;
}
}
if (!has_all_result) {
VLOG(10) << "[PullRemoteOrderByCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] missing results, sleep";
// If we don't have results from all workers, sleep before continuing.
std::this_thread::sleep_for(
std::chrono::microseconds(FLAGS_remote_pull_sleep_micros));
continue;
}
for (auto &worker_id : missing_results_from_) {
// It is possible that the workers remote pull finished but it didn't
// return any results. In that case, just skip it.
if (!remote_puller_.HasResultsFromWorker(worker_id)) continue;
auto remote_result = remote_puller_.PopResultFromWorker(worker_id);
restore_frame(remote_result);
merge_.push_back(
MergeResultItem{worker_id, remote_result, evaluate_result()});
}
missing_results_from_.clear();
}
if (merge_.empty()) return false;
auto result_it = std::min_element(
merge_.begin(), merge_.end(), [this](const auto &lhs, const auto &rhs) {
return self_.compare()(lhs.order_by, rhs.order_by);
});
restore_frame(result_it->remote_result);
if (result_it->worker_id) {
VLOG(10) << "[PullRemoteOrderByCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] producing results from worker "
<< result_it->worker_id.value();
missing_results_from_.push_back(result_it->worker_id.value());
} else {
VLOG(10) << "[PullRemoteOrderByCursor] ["
<< context.db_accessor_.transaction_id() << "] ["
<< self_.plan_id() << "] producing local results";
missing_master_result_ = true;
}
merge_.erase(result_it);
return true;
}
void Reset() {
throw QueryRuntimeException("Unsupported: Reset during PullRemoteOrderBy!");
}
private:
struct MergeResultItem {
std::experimental::optional<int> worker_id;
std::vector<TypedValue> remote_result;
std::vector<TypedValue> order_by;
};
const PullRemoteOrderBy &self_;
std::unique_ptr<Cursor> input_;
RemotePuller remote_puller_;
std::vector<MergeResultItem> merge_;
std::vector<int> missing_results_from_;
bool missing_master_result_ = false;
bool merge_initialized_ = false;
};
} // namespace
std::unique_ptr<Cursor> PullRemote::MakeCursor(
database::GraphDbAccessor &db) const {
return std::make_unique<PullRemoteCursor>(*this, db);
}
std::unique_ptr<Cursor> Synchronize::MakeCursor(
database::GraphDbAccessor &db) const {
return std::make_unique<SynchronizeCursor>(*this, db);
}
std::unique_ptr<Cursor> Cartesian::MakeCursor(
database::GraphDbAccessor &db) const {
return std::make_unique<CartesianCursor>(*this, db);
}
std::unique_ptr<Cursor> PullRemoteOrderBy::MakeCursor(
database::GraphDbAccessor &db) const {
return std::make_unique<PullRemoteOrderByCursor>(*this, db);
}
} // namespace query::plan
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Once);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateNode);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateExpand);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAll);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabel);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabelPropertyRange);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabelPropertyValue);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Expand);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ExpandVariable);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Filter);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Produce);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ConstructNamedPath);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Delete);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetProperty);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetProperties);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetLabels);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::RemoveProperty);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::RemoveLabels);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ExpandUniquenessFilter<EdgeAccessor>);
BOOST_CLASS_EXPORT_IMPLEMENT(
query::plan::ExpandUniquenessFilter<VertexAccessor>);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Accumulate);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Aggregate);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Skip);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Limit);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::OrderBy);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Merge);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Optional);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Unwind);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Distinct);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateIndex);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Union);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::PullRemote);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Synchronize);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Cartesian);
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::PullRemoteOrderBy);

File diff suppressed because it is too large Load Diff

View File

@ -1,105 +0,0 @@
#include "stats/metrics.hpp"
#include <tuple>
#include "fmt/format.h"
#include "glog/logging.h"
namespace stats {
std::mutex &MetricsMutex() {
static std::mutex mutex;
return mutex;
}
std::map<std::string, std::unique_ptr<Metric>> &AccessMetrics() {
static std::map<std::string, std::unique_ptr<Metric>> metrics;
MetricsMutex().lock();
return metrics;
}
void ReleaseMetrics() { MetricsMutex().unlock(); }
Metric::Metric(int64_t start_value) : value_(start_value) {}
Counter::Counter(int64_t start_value) : Metric(start_value) {}
void Counter::Bump(int64_t delta) { value_ += delta; }
std::experimental::optional<int64_t> Counter::Flush() { return value_; }
int64_t Counter::Value() { return value_; }
Gauge::Gauge(int64_t start_value) : Metric(start_value) {}
void Gauge::Set(int64_t value) { value_ = value; }
std::experimental::optional<int64_t> Gauge::Flush() { return value_; }
IntervalMin::IntervalMin(int64_t start_value) : Metric(start_value) {}
void IntervalMin::Add(int64_t value) {
int64_t curr = value_;
while (curr > value && !value_.compare_exchange_weak(curr, value))
;
}
std::experimental::optional<int64_t> IntervalMin::Flush() {
int64_t curr = value_;
value_.compare_exchange_weak(curr, std::numeric_limits<int64_t>::max());
return curr == std::numeric_limits<int64_t>::max()
? std::experimental::nullopt
: std::experimental::make_optional(curr);
}
IntervalMax::IntervalMax(int64_t start_value) : Metric(start_value) {}
void IntervalMax::Add(int64_t value) {
int64_t curr = value_;
while (curr < value && !value_.compare_exchange_weak(curr, value))
;
}
std::experimental::optional<int64_t> IntervalMax::Flush() {
int64_t curr = value_;
value_.compare_exchange_weak(curr, std::numeric_limits<int64_t>::min());
return curr == std::numeric_limits<int64_t>::min()
? std::experimental::nullopt
: std::experimental::make_optional(curr);
}
template <class T>
T &GetMetric(const std::string &name, int64_t start_value) {
auto &metrics = AccessMetrics();
auto it = metrics.find(name);
if (it == metrics.end()) {
auto got = metrics.emplace(name, std::make_unique<T>(start_value));
CHECK(got.second) << "Failed to create counter " << name;
it = got.first;
}
ReleaseMetrics();
auto *ptr = dynamic_cast<T *>(it->second.get());
if (!ptr) {
LOG(FATAL) << fmt::format("GetMetric({}) called with invalid metric type",
name);
}
return *ptr;
}
Counter &GetCounter(const std::string &name, int64_t start_value) {
return GetMetric<Counter>(name, start_value);
}
Gauge &GetGauge(const std::string &name, int64_t start_value) {
return GetMetric<Gauge>(name, start_value);
}
IntervalMin &GetIntervalMin(const std::string &name) {
return GetMetric<IntervalMin>(name, std::numeric_limits<int64_t>::max());
}
IntervalMax &GetIntervalMax(const std::string &name) {
return GetMetric<IntervalMax>(name, std::numeric_limits<int64_t>::min());
}
} // namespace stats

View File

@ -1,202 +0,0 @@
/**
* @file
*
* This file contains some metrics types that can be aggregated on client side
* and periodically flushed to StatsD.
*/
#pragma once
#include <atomic>
#include <experimental/optional>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include "fmt/format.h"
namespace stats {
// TODO(mtomic): it would probably be nice to have Value method for every metric
// type, however, there is no use case for this yet
/**
* Abstract base class for all metrics.
*/
class Metric {
public:
/**
* Constructs a metric to be exported to StatsD.
*
* @param name metric will be exported to StatsD with this path
* @param value initial value
*/
virtual ~Metric() {}
/**
* Metric refresh thread will periodically call this function. It should
* return the metric value aggregated since the last flush call or nullopt
* if there were no updates.
*/
virtual std::experimental::optional<int64_t> Flush() = 0;
explicit Metric(int64_t start_value = 0);
protected:
std::atomic<int64_t> value_;
};
/**
* A simple counter.
*/
class Counter : public Metric {
public:
explicit Counter(int64_t start_value = 0);
/**
* Change counter value by delta.
*
* @param delta value change
*/
void Bump(int64_t delta = 1);
/** Returns the current value of the counter. **/
std::experimental::optional<int64_t> Flush() override;
/** Returns the current value of the counter. **/
int64_t Value();
friend Counter &GetCounter(const std::string &name);
};
/**
* To be used instead of Counter constructor. If counter with this name doesn't
* exist, it will be initialized with start_value.
*
* @param name counter name
* @param start_value start value
*/
Counter &GetCounter(const std::string &name, int64_t start_value = 0);
/**
* A simple gauge. Gauge value is explicitly set, instead of being added to or
* subtracted from.
*/
class Gauge : public Metric {
public:
explicit Gauge(int64_t start_value = 0);
/**
* Set gauge value.
*
* @param value value to be set
*/
void Set(int64_t value);
/** Returns the current gauge value. **/
std::experimental::optional<int64_t> Flush() override;
};
/**
* To be used instead of Gauge constructor. If gauge with this name doesn't
* exist, it will be initialized with start_value.
*
* @param name gauge name
* @param start_value start value
*/
Gauge &GetGauge(const std::string &name, int64_t start_value = 0);
/**
* Aggregates minimum between two flush periods.
*/
class IntervalMin : public Metric {
public:
explicit IntervalMin(int64_t start_value);
/**
* Add another value into the minimum computation.
*
* @param value value to be added
*/
void Add(int64_t value);
/**
* Returns the minimum value encountered since the last flush period,
* or nullopt if no values were added.
*/
std::experimental::optional<int64_t> Flush() override;
};
/**
* To be used instead of IntervalMin constructor.
*
* @param name interval min name
*/
IntervalMin &GetIntervalMin(const std::string &name);
/**
* Aggregates maximum betweenw two flush periods.
*/
class IntervalMax : public Metric {
public:
explicit IntervalMax(int64_t start_value);
/**
* Add another value into the maximum computation.
*/
void Add(int64_t value);
/**
* Returns the maximum value encountered since the last flush period,
* or nullopt if no values were added.
*/
std::experimental::optional<int64_t> Flush() override;
};
/**
* To be used instead of IntervalMax constructor.
*
* @param name interval max name
*/
IntervalMax &GetIntervalMax(const std::string &name);
/**
* A stopwatch utility. It exports 4 metrics: total time measured since the
* beginning of the program, total number of times time intervals measured,
* minimum and maximum time interval measured since the last metric flush.
* Metrics exported by the stopwatch will be named
* [name].{total_time|count|min|max}.
*
* @param name timed event name
* @param f Callable, an action to be performed.
*/
template <class Function>
int64_t Stopwatch(const std::string &name, Function f) {
auto &total_time = GetCounter(fmt::format("{}.total_time", name));
auto &count = GetCounter(fmt::format("{}.count", name));
auto &min = GetIntervalMin(fmt::format("{}.min", name));
auto &max = GetIntervalMax(fmt::format("{}.max", name));
auto start = std::chrono::system_clock::now();
f();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now() - start)
.count();
total_time.Bump(duration);
count.Bump();
min.Add(duration);
max.Add(duration);
return duration;
}
/**
* Access internal metric list. You probably don't want to use this,
* but if you do, make sure to call ReleaseMetrics when you're done.
*/
std::map<std::string, std::unique_ptr<Metric>> &AccessMetrics();
/**
* Releases internal lock on metric list.
*/
void ReleaseMetrics();
} // namespace stats

View File

@ -1,113 +0,0 @@
#include "stats/stats.hpp"
#include "glog/logging.h"
#include "communication/rpc/client.hpp"
#include "data_structures/concurrent/push_queue.hpp"
#include "utils/thread.hpp"
#include "stats/stats_rpc_messages.hpp"
DEFINE_HIDDEN_string(statsd_address, "", "Stats server IP address");
DEFINE_HIDDEN_int32(statsd_port, 2500, "Stats server port");
DEFINE_HIDDEN_int32(statsd_flush_interval, 500,
"Stats flush interval (in milliseconds)");
namespace stats {
std::string statsd_prefix = "";
std::thread stats_dispatch_thread;
std::thread counter_refresh_thread;
std::atomic<bool> stats_running{false};
ConcurrentPushQueue<StatsReq> stats_queue;
void RefreshMetrics() {
LOG(INFO) << "Metrics flush thread started";
utils::ThreadSetName("Stats refresh");
while (stats_running) {
auto &metrics = AccessMetrics();
for (auto &kv : metrics) {
auto value = kv.second->Flush();
if (value) {
LogStat(kv.first, *value);
}
}
ReleaseMetrics();
// TODO(mtomic): hardcoded sleep time
std::this_thread::sleep_for(std::chrono::seconds(1));
}
LOG(INFO) << "Metrics flush thread stopped";
}
void StatsDispatchMain(const io::network::Endpoint &endpoint) {
// TODO(mtomic): we probably want to batch based on request size and MTU
const int MAX_BATCH_SIZE = 100;
LOG(INFO) << "Stats dispatcher thread started";
utils::ThreadSetName("Stats dispatcher");
communication::rpc::Client client(endpoint);
BatchStatsReq batch_request;
batch_request.requests.reserve(MAX_BATCH_SIZE);
while (stats_running) {
auto last = stats_queue.begin();
size_t sent = 0, total = 0;
auto flush_batch = [&] {
if (client.Call<BatchStatsRpc>(batch_request)) {
sent += batch_request.requests.size();
}
total += batch_request.requests.size();
batch_request.requests.clear();
};
for (auto it = last; it != stats_queue.end(); it++) {
batch_request.requests.emplace_back(std::move(*it));
if (batch_request.requests.size() == MAX_BATCH_SIZE) {
flush_batch();
}
}
if (!batch_request.requests.empty()) {
flush_batch();
}
VLOG(30) << fmt::format("Sent {} out of {} events from queue.", sent,
total);
last.delete_tail();
std::this_thread::sleep_for(
std::chrono::milliseconds(FLAGS_statsd_flush_interval));
}
}
void LogStat(const std::string &metric_path, double value,
const std::vector<std::pair<std::string, std::string>> &tags) {
if (stats_running) {
stats_queue.push(statsd_prefix + metric_path, tags, value);
}
}
void InitStatsLogging(std::string prefix) {
if (!prefix.empty()) {
statsd_prefix = prefix + ".";
}
if (FLAGS_statsd_address != "") {
stats_running = true;
stats_dispatch_thread = std::thread(
StatsDispatchMain, io::network::Endpoint{FLAGS_statsd_address,
(uint16_t)FLAGS_statsd_port});
counter_refresh_thread = std::thread(RefreshMetrics);
}
}
void StopStatsLogging() {
if (stats_running) {
stats_running = false;
stats_dispatch_thread.join();
counter_refresh_thread.join();
}
}
} // namespace stats

View File

@ -1,33 +0,0 @@
/// @file
#pragma once
#include <thread>
#include <vector>
#include "gflags/gflags.h"
#include "stats/metrics.hpp"
namespace stats {
/**
* Start sending metrics to StatsD server.
*
* @param prefix prefix to prepend to exported keys
*/
void InitStatsLogging(std::string prefix = "");
/**
* Stop sending metrics to StatsD server. This should be called before exiting
* program.
*/
void StopStatsLogging();
/**
* Send a value to StatsD with current timestamp.
*/
void LogStat(const std::string &metric_path, double value,
const std::vector<std::pair<std::string, std::string>> &tags = {});
} // namespace stats

View File

@ -1,62 +0,0 @@
#pragma once
#include "boost/serialization/access.hpp"
#include "boost/serialization/base_object.hpp"
#include "boost/serialization/string.hpp"
#include "boost/serialization/utility.hpp"
#include "boost/serialization/vector.hpp"
#include "communication/rpc/messages.hpp"
#include "utils/timestamp.hpp"
namespace stats {
struct StatsReq : public communication::rpc::Message {
StatsReq() {}
StatsReq(std::string metric_path,
std::vector<std::pair<std::string, std::string>> tags, double value)
: metric_path(metric_path),
tags(tags),
value(value),
timestamp(utils::Timestamp::Now().SecSinceTheEpoch()) {}
std::string metric_path;
std::vector<std::pair<std::string, std::string>> tags;
double value;
uint64_t timestamp;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<communication::rpc::Message>(*this);
ar &metric_path &tags &value &timestamp;
}
};
RPC_NO_MEMBER_MESSAGE(StatsRes);
struct BatchStatsReq : public communication::rpc::Message {
BatchStatsReq() {}
explicit BatchStatsReq(std::vector<StatsReq> requests) : requests(requests) {}
std::vector<StatsReq> requests;
private:
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &boost::serialization::base_object<communication::rpc::Message>(*this);
ar &requests;
}
};
RPC_NO_MEMBER_MESSAGE(BatchStatsRes);
using StatsRpc = communication::rpc::RequestResponse<StatsReq, StatsRes>;
using BatchStatsRpc =
communication::rpc::RequestResponse<BatchStatsReq, BatchStatsRes>;
} // namespace stats

View File

@ -2,7 +2,6 @@
#include <cstdint>
#include "boost/serialization/access.hpp"
#include "glog/logging.h"
#include "storage/gid.hpp"
@ -91,11 +90,5 @@ class Address {
private:
StorageT storage_{0};
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, unsigned int) {
ar &storage_;
}
};
} // namespace storage

Some files were not shown because too many files have changed in this diff Show More