memgraph/tests/manual/distributed_common.hpp
Matej Ferencevic 53c405c699 Throw exceptions on RPC failure and Distributed error handling
Summary:
This diff changes the RPC layer to directly return `TResponse` to the user when
issuing a `Call<...>` RPC call. The call throws an exception on failure
(instead of the previous return `nullopt`).

All servers (network, RPC and distributed) are set to have explicit `Shutdown`
methods so that a controlled shutdown can always be performed. The object
destructors now have `CHECK`s to enforce that the `AwaitShutdown` methods were
called.

The distributed memgraph is changed that none of the binaries (master/workers)
crash when there is a communication failure. Instead, the whole cluster starts
a graceful shutdown when a persistent communication error is detected.
Transient errors are allowed during execution. The transaction that errored out
will be aborted on the whole cluster. The cluster state is managed using a new
Heartbeat RPC call.

Reviewers: buda, teon.banek, msantl

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1604
2018-09-27 16:27:40 +02:00

130 lines
4.1 KiB
C++

#pragma once
#include <chrono>
#include <experimental/filesystem>
#include <vector>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "communication/result_stream_faker.hpp"
#include "database/distributed_graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "glue/communication.hpp"
#include "query/distributed_interpreter.hpp"
#include "query/typed_value.hpp"
#include "utils/file.hpp"
DECLARE_string(durability_directory);
namespace fs = std::experimental::filesystem;
class WorkerInThread {
public:
explicit WorkerInThread(database::Config config) : worker_(config) {
thread_ =
std::thread([this, config] { EXPECT_TRUE(worker_.AwaitShutdown()); });
}
~WorkerInThread() {
if (thread_.joinable()) thread_.join();
}
database::Worker worker_;
std::thread thread_;
};
class Cluster {
const std::chrono::microseconds kInitTime{200};
const std::string kLocal = "127.0.0.1";
public:
Cluster(int worker_count, const std::string &test_name) {
tmp_dir_ = fs::temp_directory_path() / "MG_test_unit_distributed_common_" /
test_name;
EXPECT_TRUE(utils::EnsureDir(tmp_dir_));
database::Config master_config;
master_config.master_endpoint = {kLocal, 0};
master_config.durability_directory = GetDurabilityDirectory(0);
// Flag needs to be updated due to props on disk storage.
FLAGS_durability_directory = GetDurabilityDirectory(0);
master_ = std::make_unique<database::Master>(master_config);
interpreter_ =
std::make_unique<query::DistributedInterpreter>(master_.get());
std::this_thread::sleep_for(kInitTime);
auto worker_config = [this](int worker_id) {
database::Config config;
config.worker_id = worker_id;
config.master_endpoint = master_->endpoint();
config.durability_directory = GetDurabilityDirectory(worker_id);
config.worker_endpoint = {kLocal, 0};
return config;
};
for (int i = 0; i < worker_count; ++i) {
// Flag needs to be updated due to props on disk storage.
FLAGS_durability_directory = GetDurabilityDirectory(i + 1);
workers_.emplace_back(
std::make_unique<WorkerInThread>(worker_config(i + 1)));
std::this_thread::sleep_for(kInitTime);
}
}
void Stop() {
interpreter_ = nullptr;
master_->Shutdown();
EXPECT_TRUE(master_->AwaitShutdown());
workers_.clear();
}
~Cluster() {
if (master_) Stop();
}
auto Execute(const std::string &query,
std::map<std::string, PropertyValue> params = {}) {
auto dba = master_->Access();
ResultStreamFaker<query::TypedValue> result;
(*interpreter_)(query, *dba, params, false).PullAll(result);
dba->Commit();
return result.GetResults();
};
fs::path GetDurabilityDirectory(int worker_id) {
if (worker_id == 0) return tmp_dir_ / "master";
return tmp_dir_ / fmt::format("worker{}", worker_id);
}
private:
std::unique_ptr<database::Master> master_;
std::vector<std::unique_ptr<WorkerInThread>> workers_;
std::unique_ptr<query::DistributedInterpreter> interpreter_;
fs::path tmp_dir_{fs::temp_directory_path() /
"MG_test_manual_distributed_common"};
};
void CheckResults(
const std::vector<std::vector<query::TypedValue>> &results,
const std::vector<std::vector<query::TypedValue>> &expected_rows,
const std::string &msg) {
query::TypedValue::BoolEqual equality;
CHECK(results.size() == expected_rows.size())
<< msg << " (expected " << expected_rows.size() << " rows "
<< ", got " << results.size() << ")";
for (size_t row_id = 0; row_id < results.size(); ++row_id) {
auto &result = results[row_id];
auto &expected = expected_rows[row_id];
CHECK(result.size() == expected.size())
<< msg << " (expected " << expected.size() << " elements in row "
<< row_id << ", got " << result.size() << ")";
for (size_t col_id = 0; col_id < result.size(); ++col_id) {
CHECK(equality(result[col_id], expected[col_id]))
<< msg << " (expected value '" << expected[col_id] << "' got '"
<< result[col_id] << "' in row " << row_id << " col " << col_id
<< ")";
}
}
}