memgraph/tests/unit/transaction_engine_distributed.cpp
Matej Ferencevic 53c405c699 Throw exceptions on RPC failure and Distributed error handling
Summary:
This diff changes the RPC layer to directly return `TResponse` to the user when
issuing a `Call<...>` RPC call. The call throws an exception on failure
(instead of the previous return `nullopt`).

All servers (network, RPC and distributed) are set to have explicit `Shutdown`
methods so that a controlled shutdown can always be performed. The object
destructors now have `CHECK`s to enforce that the `AwaitShutdown` methods were
called.

The distributed memgraph is changed that none of the binaries (master/workers)
crash when there is a communication failure. Instead, the whole cluster starts
a graceful shutdown when a persistent communication error is detected.
Transient errors are allowed during execution. The transaction that errored out
will be aborted on the whole cluster. The cluster state is managed using a new
Heartbeat RPC call.

Reviewers: buda, teon.banek, msantl

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1604
2018-09-27 16:27:40 +02:00

159 lines
4.4 KiB
C++

#include <algorithm>
#include <mutex>
#include <unordered_set>
#include <vector>
#include "gtest/gtest.h"
#include "communication/rpc/server.hpp"
#include "distributed/cluster_discovery_master.hpp"
#include "distributed/coordination_master.hpp"
#include "io/network/endpoint.hpp"
#include "transactions/distributed/engine_master.hpp"
#include "transactions/distributed/engine_worker.hpp"
using namespace tx;
using namespace communication::rpc;
using namespace distributed;
class WorkerEngineTest : public testing::Test {
protected:
const std::string local{"127.0.0.1"};
void TearDown() override {
// First we shutdown the master.
master_coordination_.Shutdown();
EXPECT_TRUE(master_coordination_.AwaitShutdown());
// Shutdown the RPC servers.
master_server_.Shutdown();
master_server_.AwaitShutdown();
worker_server_.Shutdown();
worker_server_.AwaitShutdown();
}
Server master_server_{{local, 0}};
Server worker_server_{{local, 0}};
MasterCoordination master_coordination_{master_server_.endpoint()};
EngineMaster master_{&master_server_, &master_coordination_};
ClientPool master_client_pool{master_server_.endpoint()};
EngineWorker worker_{&worker_server_, &master_client_pool};
};
TEST_F(WorkerEngineTest, BeginOnWorker) {
worker_.Begin();
auto second = worker_.Begin();
EXPECT_EQ(master_.RunningTransaction(second->id_)->snapshot().size(), 1);
}
TEST_F(WorkerEngineTest, AdvanceOnWorker) {
auto tx = worker_.Begin();
auto cid = tx->cid();
EXPECT_EQ(worker_.Advance(tx->id_), cid + 1);
}
TEST_F(WorkerEngineTest, CommitOnWorker) {
auto tx = worker_.Begin();
auto tx_id = tx->id_;
worker_.Commit(*tx);
EXPECT_TRUE(master_.Info(tx_id).is_committed());
}
TEST_F(WorkerEngineTest, AbortOnWorker) {
auto tx = worker_.Begin();
auto tx_id = tx->id_;
worker_.Abort(*tx);
EXPECT_TRUE(master_.Info(tx_id).is_aborted());
}
TEST_F(WorkerEngineTest, RunningTransaction) {
master_.Begin();
master_.Begin();
worker_.RunningTransaction(1);
worker_.RunningTransaction(2);
int count = 0;
worker_.LocalForEachActiveTransaction([&count](Transaction &t) {
++count;
if (t.id_ == 1) {
EXPECT_EQ(t.snapshot(), tx::Snapshot(std::vector<tx::TransactionId>{}));
} else {
EXPECT_EQ(t.snapshot(), tx::Snapshot({1}));
}
});
EXPECT_EQ(count, 2);
}
TEST_F(WorkerEngineTest, Info) {
auto *tx_1 = master_.Begin();
auto *tx_2 = master_.Begin();
// We can't check active transactions in the worker (see comments there for
// info).
master_.Commit(*tx_1);
EXPECT_TRUE(master_.Info(1).is_committed());
EXPECT_TRUE(worker_.Info(1).is_committed());
master_.Abort(*tx_2);
EXPECT_TRUE(master_.Info(2).is_aborted());
EXPECT_TRUE(worker_.Info(2).is_aborted());
}
TEST_F(WorkerEngineTest, GlobalGcSnapshot) {
auto *tx_1 = master_.Begin();
master_.Begin();
master_.Commit(*tx_1);
EXPECT_EQ(master_.GlobalGcSnapshot(), tx::Snapshot({1, 2}));
EXPECT_EQ(worker_.GlobalGcSnapshot(), master_.GlobalGcSnapshot());
}
TEST_F(WorkerEngineTest, GlobalActiveTransactions) {
auto *tx_1 = master_.Begin();
master_.Begin();
auto *tx_3 = master_.Begin();
master_.Begin();
master_.Commit(*tx_1);
master_.Abort(*tx_3);
EXPECT_EQ(worker_.GlobalActiveTransactions(), tx::Snapshot({2, 4}));
}
TEST_F(WorkerEngineTest, LocalLast) {
master_.Begin();
EXPECT_EQ(worker_.LocalLast(), 0);
worker_.RunningTransaction(1);
EXPECT_EQ(worker_.LocalLast(), 1);
master_.Begin();
EXPECT_EQ(worker_.LocalLast(), 1);
master_.Begin();
EXPECT_EQ(worker_.LocalLast(), 1);
master_.Begin();
worker_.RunningTransaction(4);
EXPECT_EQ(worker_.LocalLast(), 4);
}
TEST_F(WorkerEngineTest, LocalForEachActiveTransaction) {
master_.Begin();
worker_.RunningTransaction(1);
master_.Begin();
master_.Begin();
master_.Begin();
worker_.RunningTransaction(4);
std::unordered_set<tx::TransactionId> local;
worker_.LocalForEachActiveTransaction(
[&local](Transaction &t) { local.insert(t.id_); });
EXPECT_EQ(local, std::unordered_set<tx::TransactionId>({1, 4}));
}
TEST_F(WorkerEngineTest, EnsureTxIdGreater) {
ASSERT_LE(master_.Begin()->id_, 40);
worker_.EnsureNextIdGreater(42);
EXPECT_EQ(master_.Begin()->id_, 43);
EXPECT_EQ(worker_.Begin()->id_, 44);
}
TEST_F(WorkerEngineTest, GlobalNext) {
auto tx = master_.Begin();
EXPECT_NE(worker_.LocalLast(), worker_.GlobalLast());
EXPECT_EQ(master_.LocalLast(), worker_.GlobalLast());
EXPECT_EQ(worker_.GlobalLast(), tx->id_);
}