2018-03-12 18:26:40 +08:00
|
|
|
#include <atomic>
|
2017-12-19 19:40:30 +08:00
|
|
|
#include <experimental/optional>
|
|
|
|
#include <memory>
|
|
|
|
#include <thread>
|
2017-12-22 21:36:25 +08:00
|
|
|
#include <unordered_set>
|
2017-12-19 19:40:30 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2018-01-22 23:59:40 +08:00
|
|
|
#include "gmock/gmock.h"
|
2017-12-19 19:40:30 +08:00
|
|
|
#include "gtest/gtest.h"
|
|
|
|
|
2018-04-03 22:19:17 +08:00
|
|
|
#include "communication/rpc/client_pool.hpp"
|
2018-01-24 19:16:14 +08:00
|
|
|
#include "communication/rpc/server.hpp"
|
2018-04-03 22:19:17 +08:00
|
|
|
#include "distributed/cluster_discovery_master.hpp"
|
|
|
|
#include "distributed/cluster_discovery_worker.hpp"
|
2017-12-19 19:40:30 +08:00
|
|
|
#include "distributed/coordination_master.hpp"
|
|
|
|
#include "distributed/coordination_worker.hpp"
|
2018-04-03 22:19:17 +08:00
|
|
|
#include "distributed/rpc_worker_clients.hpp"
|
2018-01-15 21:03:07 +08:00
|
|
|
#include "io/network/endpoint.hpp"
|
2018-09-03 19:00:59 +08:00
|
|
|
#include "utils/file.hpp"
|
2017-12-19 19:40:30 +08:00
|
|
|
|
2018-04-03 22:19:17 +08:00
|
|
|
using communication::rpc::ClientPool;
|
2018-04-11 22:05:49 +08:00
|
|
|
using communication::rpc::Server;
|
2017-12-19 19:40:30 +08:00
|
|
|
using namespace distributed;
|
2018-01-24 19:16:14 +08:00
|
|
|
using namespace std::literals::chrono_literals;
|
2017-12-19 19:40:30 +08:00
|
|
|
|
|
|
|
const int kWorkerCount = 5;
|
|
|
|
const std::string kLocal = "127.0.0.1";
|
|
|
|
|
2018-03-12 18:26:40 +08:00
|
|
|
class WorkerCoordinationInThread {
|
2018-04-11 22:05:49 +08:00
|
|
|
struct Worker {
|
|
|
|
Worker(Endpoint master_endpoint) : master_endpoint(master_endpoint) {}
|
|
|
|
Endpoint master_endpoint;
|
|
|
|
Server server{{kLocal, 0}};
|
|
|
|
WorkerCoordination coord{server, master_endpoint};
|
|
|
|
ClientPool client_pool{master_endpoint};
|
|
|
|
ClusterDiscoveryWorker discovery{server, coord, client_pool};
|
|
|
|
std::atomic<int> worker_id_{0};
|
|
|
|
};
|
|
|
|
|
2017-12-19 19:40:30 +08:00
|
|
|
public:
|
2018-03-12 18:26:40 +08:00
|
|
|
WorkerCoordinationInThread(io::network::Endpoint master_endpoint,
|
2018-09-03 19:00:59 +08:00
|
|
|
fs::path durability_directory,
|
2018-04-11 22:05:49 +08:00
|
|
|
int desired_id = -1) {
|
2018-03-12 18:26:40 +08:00
|
|
|
std::atomic<bool> init_done{false};
|
2018-09-03 19:00:59 +08:00
|
|
|
worker_thread_ = std::thread(
|
|
|
|
[this, master_endpoint, durability_directory, desired_id, &init_done] {
|
2018-04-11 22:05:49 +08:00
|
|
|
worker.emplace(master_endpoint);
|
2018-09-03 19:00:59 +08:00
|
|
|
worker->discovery.RegisterWorker(desired_id, durability_directory);
|
2018-04-11 22:05:49 +08:00
|
|
|
worker->worker_id_ = desired_id;
|
2018-03-12 18:26:40 +08:00
|
|
|
init_done = true;
|
2018-04-11 22:05:49 +08:00
|
|
|
worker->coord.WaitForShutdown();
|
|
|
|
worker = std::experimental::nullopt;
|
2018-03-12 18:26:40 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
while (!init_done) std::this_thread::sleep_for(10ms);
|
2017-12-19 19:40:30 +08:00
|
|
|
}
|
|
|
|
|
2018-04-11 22:05:49 +08:00
|
|
|
int worker_id() const { return worker->worker_id_; }
|
|
|
|
auto endpoint() const { return worker->server.endpoint(); }
|
|
|
|
auto worker_endpoint(int worker_id) {
|
|
|
|
return worker->coord.GetEndpoint(worker_id);
|
|
|
|
}
|
|
|
|
auto worker_ids() { return worker->coord.GetWorkerIds(); }
|
2017-12-19 19:40:30 +08:00
|
|
|
void join() { worker_thread_.join(); }
|
2018-07-17 17:03:03 +08:00
|
|
|
void NotifyWorkerRecovered() {
|
|
|
|
std::experimental::optional<durability::RecoveryInfo> no_recovery_info;
|
|
|
|
worker->discovery.NotifyWorkerRecovered(no_recovery_info);
|
|
|
|
}
|
2017-12-19 19:40:30 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
std::thread worker_thread_;
|
2018-04-11 22:05:49 +08:00
|
|
|
std::experimental::optional<Worker> worker;
|
2017-12-19 19:40:30 +08:00
|
|
|
};
|
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
class Distributed : public ::testing::Test {
|
|
|
|
public:
|
|
|
|
void SetUp() override { ASSERT_TRUE(utils::EnsureDir(tmp_dir_)); }
|
|
|
|
|
|
|
|
void TearDown() override {
|
|
|
|
if (fs::exists(tmp_dir_)) fs::remove_all(tmp_dir_);
|
|
|
|
}
|
|
|
|
|
|
|
|
const fs::path tmp_dir(const fs::path &path) const { return tmp_dir_ / path; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
fs::path tmp_dir_{fs::temp_directory_path() /
|
|
|
|
"MG_test_unit_distributed_coordination"};
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(Distributed, Coordination) {
|
2018-02-23 17:56:56 +08:00
|
|
|
Server master_server({kLocal, 0});
|
2018-03-12 18:26:40 +08:00
|
|
|
std::vector<std::unique_ptr<WorkerCoordinationInThread>> workers;
|
2018-01-10 20:56:12 +08:00
|
|
|
{
|
2018-04-03 22:19:17 +08:00
|
|
|
MasterCoordination master_coord(master_server.endpoint());
|
2018-07-17 17:03:03 +08:00
|
|
|
master_coord.SetRecoveredSnapshot(std::experimental::nullopt);
|
2018-04-03 22:19:17 +08:00
|
|
|
RpcWorkerClients rpc_worker_clients(master_coord);
|
2018-09-03 19:00:59 +08:00
|
|
|
ClusterDiscoveryMaster master_discovery_(
|
|
|
|
master_server, master_coord, rpc_worker_clients, tmp_dir("master"));
|
2018-01-10 20:56:12 +08:00
|
|
|
|
2018-04-10 19:07:01 +08:00
|
|
|
for (int i = 1; i <= kWorkerCount; ++i)
|
2018-03-12 18:26:40 +08:00
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir(fmt::format("worker{}", i)), i));
|
2018-01-10 20:56:12 +08:00
|
|
|
|
|
|
|
// Expect that all workers have a different ID.
|
|
|
|
std::unordered_set<int> worker_ids;
|
|
|
|
for (const auto &w : workers) worker_ids.insert(w->worker_id());
|
2018-03-12 18:26:40 +08:00
|
|
|
ASSERT_EQ(worker_ids.size(), kWorkerCount);
|
2018-01-10 20:56:12 +08:00
|
|
|
|
|
|
|
// Check endpoints.
|
|
|
|
for (auto &w1 : workers) {
|
|
|
|
for (auto &w2 : workers) {
|
|
|
|
EXPECT_EQ(w1->worker_endpoint(w2->worker_id()), w2->endpoint());
|
|
|
|
}
|
2017-12-19 19:40:30 +08:00
|
|
|
}
|
2018-09-03 19:00:59 +08:00
|
|
|
}
|
|
|
|
// Coordinated shutdown.
|
2017-12-19 19:40:30 +08:00
|
|
|
for (auto &worker : workers) worker->join();
|
|
|
|
}
|
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
TEST_F(Distributed, DesiredAndUniqueId) {
|
2018-02-23 17:56:56 +08:00
|
|
|
Server master_server({kLocal, 0});
|
2018-03-12 18:26:40 +08:00
|
|
|
std::vector<std::unique_ptr<WorkerCoordinationInThread>> workers;
|
2018-01-10 20:56:12 +08:00
|
|
|
{
|
2018-04-03 22:19:17 +08:00
|
|
|
MasterCoordination master_coord(master_server.endpoint());
|
2018-07-17 17:03:03 +08:00
|
|
|
master_coord.SetRecoveredSnapshot(std::experimental::nullopt);
|
2018-04-03 22:19:17 +08:00
|
|
|
RpcWorkerClients rpc_worker_clients(master_coord);
|
2018-09-03 19:00:59 +08:00
|
|
|
ClusterDiscoveryMaster master_discovery_(
|
|
|
|
master_server, master_coord, rpc_worker_clients, tmp_dir("master"));
|
2017-12-19 19:40:30 +08:00
|
|
|
|
2018-03-12 18:26:40 +08:00
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir("worker42"), 42));
|
2018-01-10 20:56:12 +08:00
|
|
|
EXPECT_EQ(workers[0]->worker_id(), 42);
|
2018-04-10 19:07:01 +08:00
|
|
|
|
|
|
|
EXPECT_DEATH(
|
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir("worker42"), 42)),
|
2018-04-10 19:07:01 +08:00
|
|
|
"");
|
2018-01-10 20:56:12 +08:00
|
|
|
}
|
2017-12-19 19:40:30 +08:00
|
|
|
|
2018-01-10 20:56:12 +08:00
|
|
|
for (auto &worker : workers) worker->join();
|
2017-12-19 19:40:30 +08:00
|
|
|
}
|
2018-01-22 23:59:40 +08:00
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
TEST_F(Distributed, CoordinationWorkersId) {
|
2018-02-23 17:56:56 +08:00
|
|
|
Server master_server({kLocal, 0});
|
2018-03-12 18:26:40 +08:00
|
|
|
std::vector<std::unique_ptr<WorkerCoordinationInThread>> workers;
|
2018-01-22 23:59:40 +08:00
|
|
|
{
|
2018-04-03 22:19:17 +08:00
|
|
|
MasterCoordination master_coord(master_server.endpoint());
|
2018-07-17 17:03:03 +08:00
|
|
|
master_coord.SetRecoveredSnapshot(std::experimental::nullopt);
|
2018-04-03 22:19:17 +08:00
|
|
|
RpcWorkerClients rpc_worker_clients(master_coord);
|
2018-09-03 19:00:59 +08:00
|
|
|
ClusterDiscoveryMaster master_discovery_(
|
|
|
|
master_server, master_coord, rpc_worker_clients, tmp_dir("master"));
|
2018-01-22 23:59:40 +08:00
|
|
|
|
2018-03-12 18:26:40 +08:00
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir("worker42"), 42));
|
2018-03-12 18:26:40 +08:00
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir("worker43"), 43));
|
2018-01-22 23:59:40 +08:00
|
|
|
|
|
|
|
std::vector<int> ids;
|
|
|
|
ids.push_back(0);
|
|
|
|
|
|
|
|
for (auto &worker : workers) ids.push_back(worker->worker_id());
|
|
|
|
EXPECT_THAT(master_coord.GetWorkerIds(),
|
|
|
|
testing::UnorderedElementsAreArray(ids));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &worker : workers) worker->join();
|
|
|
|
}
|
2018-04-03 22:19:17 +08:00
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
TEST_F(Distributed, ClusterDiscovery) {
|
2018-04-03 22:19:17 +08:00
|
|
|
Server master_server({kLocal, 0});
|
|
|
|
std::vector<std::unique_ptr<WorkerCoordinationInThread>> workers;
|
|
|
|
{
|
|
|
|
MasterCoordination master_coord(master_server.endpoint());
|
2018-07-17 17:03:03 +08:00
|
|
|
master_coord.SetRecoveredSnapshot(std::experimental::nullopt);
|
2018-04-03 22:19:17 +08:00
|
|
|
RpcWorkerClients rpc_worker_clients(master_coord);
|
2018-09-03 19:00:59 +08:00
|
|
|
ClusterDiscoveryMaster master_discovery_(
|
|
|
|
master_server, master_coord, rpc_worker_clients, tmp_dir("master"));
|
2018-04-03 22:19:17 +08:00
|
|
|
std::vector<int> ids;
|
|
|
|
int worker_count = 10;
|
|
|
|
|
|
|
|
ids.push_back(0);
|
|
|
|
for (int i = 1; i <= worker_count; ++i) {
|
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir(fmt::format("worker", i)), i));
|
2018-04-03 22:19:17 +08:00
|
|
|
|
|
|
|
ids.push_back(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
EXPECT_THAT(master_coord.GetWorkerIds(),
|
|
|
|
testing::UnorderedElementsAreArray(ids));
|
|
|
|
for (auto &worker : workers) {
|
|
|
|
EXPECT_THAT(worker->worker_ids(),
|
|
|
|
testing::UnorderedElementsAreArray(ids));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &worker : workers) worker->join();
|
|
|
|
}
|
2018-04-11 22:05:49 +08:00
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
TEST_F(Distributed, KeepsTrackOfRecovered) {
|
2018-05-09 19:26:22 +08:00
|
|
|
Server master_server({kLocal, 0});
|
|
|
|
std::vector<std::unique_ptr<WorkerCoordinationInThread>> workers;
|
|
|
|
{
|
|
|
|
MasterCoordination master_coord(master_server.endpoint());
|
2018-07-17 17:03:03 +08:00
|
|
|
master_coord.SetRecoveredSnapshot(std::experimental::nullopt);
|
2018-05-09 19:26:22 +08:00
|
|
|
RpcWorkerClients rpc_worker_clients(master_coord);
|
2018-09-03 19:00:59 +08:00
|
|
|
ClusterDiscoveryMaster master_discovery_(
|
|
|
|
master_server, master_coord, rpc_worker_clients, tmp_dir("master"));
|
2018-05-09 19:26:22 +08:00
|
|
|
int worker_count = 10;
|
|
|
|
for (int i = 1; i <= worker_count; ++i) {
|
|
|
|
workers.emplace_back(std::make_unique<WorkerCoordinationInThread>(
|
2018-09-03 19:00:59 +08:00
|
|
|
master_server.endpoint(), tmp_dir(fmt::format("worker{}", i)), i));
|
2018-05-09 19:26:22 +08:00
|
|
|
workers.back()->NotifyWorkerRecovered();
|
|
|
|
EXPECT_THAT(master_coord.CountRecoveredWorkers(), i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &worker : workers) worker->join();
|
|
|
|
}
|
|
|
|
|
2018-04-11 22:05:49 +08:00
|
|
|
int main(int argc, char **argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
::testing::FLAGS_gtest_death_test_style = "threadsafe";
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|