2019-04-23 17:00:49 +08:00
|
|
|
#include <filesystem>
|
2018-02-05 23:28:22 +08:00
|
|
|
#include <memory>
|
2018-02-23 00:24:28 +08:00
|
|
|
#include <thread>
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-06-12 17:29:22 +08:00
|
|
|
#include <gflags/gflags.h>
|
2018-02-05 16:48:45 +08:00
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
2019-02-14 15:36:40 +08:00
|
|
|
#include "database/distributed/graph_db.hpp"
|
2018-10-05 18:37:23 +08:00
|
|
|
#include "database/distributed/graph_db_accessor.hpp"
|
2018-05-15 23:38:47 +08:00
|
|
|
#include "distributed/data_manager.hpp"
|
2018-03-23 22:21:46 +08:00
|
|
|
#include "distributed/updates_rpc_server.hpp"
|
2018-10-04 21:23:07 +08:00
|
|
|
#include "storage/distributed/address_types.hpp"
|
2018-09-05 02:30:58 +08:00
|
|
|
#include "transactions/distributed/engine_master.hpp"
|
2018-09-03 19:00:59 +08:00
|
|
|
#include "utils/file.hpp"
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-06-12 17:29:22 +08:00
|
|
|
DECLARE_string(durability_directory);
|
|
|
|
|
2019-04-23 17:00:49 +08:00
|
|
|
namespace fs = std::filesystem;
|
2018-04-06 15:59:54 +08:00
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
class WorkerInThread {
|
|
|
|
public:
|
|
|
|
explicit WorkerInThread(database::Config config) : worker_(config) {
|
2018-10-16 16:58:41 +08:00
|
|
|
thread_ = std::thread([this, config] {
|
|
|
|
worker_.Start();
|
|
|
|
EXPECT_TRUE(worker_.AwaitShutdown());
|
|
|
|
});
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
2018-02-05 23:28:22 +08:00
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
~WorkerInThread() {
|
|
|
|
if (thread_.joinable()) thread_.join();
|
|
|
|
}
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
database::Worker *db() { return &worker_; }
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
database::Worker worker_;
|
|
|
|
std::thread thread_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class DistributedGraphDbTest : public ::testing::Test {
|
2018-09-03 19:00:59 +08:00
|
|
|
public:
|
2018-05-15 23:38:47 +08:00
|
|
|
const std::string kLocal = "127.0.0.1";
|
|
|
|
const int kWorkerCount = 2;
|
2018-02-05 16:48:45 +08:00
|
|
|
|
|
|
|
protected:
|
2018-03-08 23:57:15 +08:00
|
|
|
virtual int QueryExecutionTimeSec(int) { return 180; }
|
|
|
|
|
2018-04-06 15:59:54 +08:00
|
|
|
void Initialize(
|
|
|
|
std::function<database::Config(database::Config config)> modify_config) {
|
2018-06-15 03:15:01 +08:00
|
|
|
using namespace std::literals::chrono_literals;
|
2018-02-05 16:48:45 +08:00
|
|
|
const auto kInitTime = 200ms;
|
|
|
|
|
|
|
|
database::Config master_config;
|
|
|
|
master_config.master_endpoint = {kLocal, 0};
|
2018-03-08 23:57:15 +08:00
|
|
|
master_config.query_execution_time_sec = QueryExecutionTimeSec(0);
|
2018-09-03 19:00:59 +08:00
|
|
|
master_config.durability_directory = GetDurabilityDirectory(0);
|
2019-03-12 22:26:43 +08:00
|
|
|
master_config.vertex_cache_size = 1;
|
|
|
|
master_config.edge_cache_size = 1;
|
2018-09-03 19:00:59 +08:00
|
|
|
// Flag needs to be updated due to props on disk storage.
|
|
|
|
FLAGS_durability_directory = GetDurabilityDirectory(0);
|
2018-05-09 19:26:22 +08:00
|
|
|
// This is semantically wrong since this is not a cluster of size 1 but of
|
|
|
|
// size kWorkerCount+1, but it's hard to wait here for workers to recover
|
2018-08-28 22:28:35 +08:00
|
|
|
// and simultaneously assign the port to which the workers must connect.
|
|
|
|
// TODO (buda): Fix sometime in the future - not mission critical.
|
2018-05-09 19:26:22 +08:00
|
|
|
master_config.recovering_cluster_size = 1;
|
2018-04-06 15:59:54 +08:00
|
|
|
master_ = std::make_unique<database::Master>(modify_config(master_config));
|
2018-10-16 16:58:41 +08:00
|
|
|
master_->Start();
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-05-09 19:26:22 +08:00
|
|
|
std::this_thread::sleep_for(kInitTime);
|
2018-02-05 16:48:45 +08:00
|
|
|
auto worker_config = [this](int worker_id) {
|
|
|
|
database::Config config;
|
|
|
|
config.worker_id = worker_id;
|
2019-03-12 22:26:43 +08:00
|
|
|
config.vertex_cache_size = 1;
|
|
|
|
config.edge_cache_size = 1;
|
2018-02-05 16:48:45 +08:00
|
|
|
config.master_endpoint = master_->endpoint();
|
2018-09-03 19:00:59 +08:00
|
|
|
config.durability_directory = GetDurabilityDirectory(worker_id);
|
2018-02-05 16:48:45 +08:00
|
|
|
config.worker_endpoint = {kLocal, 0};
|
2018-03-08 23:57:15 +08:00
|
|
|
config.query_execution_time_sec = QueryExecutionTimeSec(worker_id);
|
2018-02-05 16:48:45 +08:00
|
|
|
return config;
|
|
|
|
};
|
|
|
|
|
2018-02-05 23:28:22 +08:00
|
|
|
for (int i = 0; i < kWorkerCount; ++i) {
|
2018-09-03 19:00:59 +08:00
|
|
|
// Flag needs to be updated due to props on disk storage.
|
|
|
|
FLAGS_durability_directory = GetDurabilityDirectory(i + 1);
|
2018-04-06 15:59:54 +08:00
|
|
|
workers_.emplace_back(std::make_unique<WorkerInThread>(
|
|
|
|
modify_config(worker_config(i + 1))));
|
2018-02-05 23:28:22 +08:00
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
2018-11-06 22:48:18 +08:00
|
|
|
|
|
|
|
// Wait for the whole cluster to be up and running.
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
while (master_->GetWorkerIds().size() < kWorkerCount + 1) {
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < kWorkerCount; ++i) {
|
|
|
|
while (workers_[i]->worker_.GetWorkerIds().size() < kWorkerCount + 1) {
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
2018-02-05 16:48:45 +08:00
|
|
|
}
|
|
|
|
|
2018-04-06 15:59:54 +08:00
|
|
|
void SetUp() override {
|
|
|
|
Initialize([](database::Config config) { return config; });
|
|
|
|
}
|
|
|
|
|
|
|
|
void ShutDown() {
|
2018-09-27 21:07:46 +08:00
|
|
|
// Shutdown the master. It will send a shutdown signal to the workers.
|
|
|
|
master_->Shutdown();
|
|
|
|
EXPECT_TRUE(master_->AwaitShutdown());
|
|
|
|
// Wait for all workers to finish shutting down.
|
2018-04-06 15:59:54 +08:00
|
|
|
workers_.clear();
|
2018-02-05 16:48:45 +08:00
|
|
|
}
|
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
fs::path GetDurabilityDirectory(int worker_id) {
|
|
|
|
if (worker_id == 0) return tmp_dir_ / "master";
|
|
|
|
return tmp_dir_ / fmt::format("worker{}", worker_id);
|
|
|
|
}
|
|
|
|
|
2018-04-06 15:59:54 +08:00
|
|
|
void CleanDurability() {
|
|
|
|
if (fs::exists(tmp_dir_)) fs::remove_all(tmp_dir_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TearDown() override {
|
|
|
|
ShutDown();
|
|
|
|
CleanDurability();
|
|
|
|
}
|
|
|
|
|
2018-02-05 16:48:45 +08:00
|
|
|
database::Master &master() { return *master_; }
|
2018-02-05 23:28:22 +08:00
|
|
|
|
|
|
|
database::Worker &worker(int worker_id) {
|
|
|
|
return workers_[worker_id - 1]->worker_;
|
|
|
|
}
|
2018-02-05 16:48:45 +08:00
|
|
|
|
2018-02-14 16:44:48 +08:00
|
|
|
/// Inserts a vertex and returns it's global address. Does it in a new
|
|
|
|
/// transaction.
|
2018-02-20 21:48:36 +08:00
|
|
|
storage::VertexAddress InsertVertex(database::GraphDb &db) {
|
2018-07-26 15:08:21 +08:00
|
|
|
auto dba = db.Access();
|
|
|
|
auto r_val = dba->InsertVertex().GlobalAddress();
|
|
|
|
dba->Commit();
|
2018-02-14 16:44:48 +08:00
|
|
|
return r_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Inserts an edge (on the 'from' side) and returns it's global address.
|
2018-02-28 21:30:21 +08:00
|
|
|
auto InsertEdge(storage::VertexAddress from_addr,
|
|
|
|
storage::VertexAddress to_addr,
|
2018-02-23 00:07:35 +08:00
|
|
|
const std::string &edge_type_name) {
|
2018-02-28 21:30:21 +08:00
|
|
|
CHECK(from_addr.is_remote() && to_addr.is_remote())
|
2018-02-23 00:07:35 +08:00
|
|
|
<< "Distributed test InsertEdge only takes global addresses";
|
2018-07-26 15:08:21 +08:00
|
|
|
auto dba = master().Access();
|
|
|
|
VertexAccessor from{from_addr, *dba};
|
|
|
|
VertexAccessor to{to_addr, *dba};
|
2018-08-28 22:28:35 +08:00
|
|
|
auto r_val = dba->InsertEdge(from, to, dba->EdgeType(edge_type_name))
|
|
|
|
.GlobalAddress();
|
2018-07-26 15:08:21 +08:00
|
|
|
master().updates_server().Apply(dba->transaction_id());
|
|
|
|
worker(1).updates_server().Apply(dba->transaction_id());
|
|
|
|
worker(2).updates_server().Apply(dba->transaction_id());
|
|
|
|
dba->Commit();
|
2018-02-28 21:30:21 +08:00
|
|
|
return r_val;
|
2018-02-14 16:44:48 +08:00
|
|
|
}
|
|
|
|
|
2018-02-23 00:20:17 +08:00
|
|
|
auto VertexCount(database::GraphDb &db) {
|
2018-07-26 15:08:21 +08:00
|
|
|
auto dba = db.Access();
|
|
|
|
auto vertices = dba->Vertices(false);
|
2018-02-23 00:20:17 +08:00
|
|
|
return std::distance(vertices.begin(), vertices.end());
|
|
|
|
};
|
|
|
|
|
2018-02-28 21:30:21 +08:00
|
|
|
auto EdgeCount(database::GraphDb &db) {
|
2018-07-26 15:08:21 +08:00
|
|
|
auto dba = db.Access();
|
|
|
|
auto edges = dba->Edges(false);
|
2018-02-28 21:30:21 +08:00
|
|
|
return std::distance(edges.begin(), edges.end());
|
|
|
|
};
|
|
|
|
|
2018-07-05 16:55:00 +08:00
|
|
|
fs::path tmp_dir_{fs::temp_directory_path() / "MG_test_unit_durability"};
|
|
|
|
|
|
|
|
public:
|
|
|
|
// Each test has to specify its own durability suffix to avoid conflicts
|
|
|
|
DistributedGraphDbTest() = delete;
|
|
|
|
|
2018-09-07 21:59:10 +08:00
|
|
|
explicit DistributedGraphDbTest(const std::string &dir_suffix)
|
2018-07-05 16:55:00 +08:00
|
|
|
: dir_suffix_(dir_suffix) {
|
|
|
|
tmp_dir_ =
|
|
|
|
fs::temp_directory_path() / ("MG_test_unit_durability_" + dir_suffix_);
|
|
|
|
}
|
2018-04-16 16:43:16 +08:00
|
|
|
|
2018-02-05 16:48:45 +08:00
|
|
|
private:
|
2018-07-05 16:55:00 +08:00
|
|
|
std::string dir_suffix_{""};
|
2018-02-05 23:28:22 +08:00
|
|
|
std::unique_ptr<database::Master> master_;
|
|
|
|
std::vector<std::unique_ptr<WorkerInThread>> workers_;
|
2018-02-05 16:48:45 +08:00
|
|
|
};
|
2018-05-15 23:38:47 +08:00
|
|
|
|
|
|
|
class Cluster {
|
|
|
|
public:
|
2018-09-03 19:00:59 +08:00
|
|
|
Cluster(int num_workers, const std::string &test_name) {
|
2018-06-15 03:15:01 +08:00
|
|
|
using namespace std::literals::chrono_literals;
|
2018-09-03 19:00:59 +08:00
|
|
|
tmp_dir_ = fs::temp_directory_path() / "MG_test_unit_distributed_common_" /
|
|
|
|
test_name;
|
|
|
|
EXPECT_TRUE(utils::EnsureDir(tmp_dir_));
|
|
|
|
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
database::Config master_config;
|
|
|
|
master_config.master_endpoint = {kLocal, 0};
|
2018-09-03 19:00:59 +08:00
|
|
|
master_config.durability_directory = GetDurabilityDirectory(0);
|
|
|
|
// Flag needs to be updated due to props on disk storage.
|
|
|
|
FLAGS_durability_directory = GetDurabilityDirectory(0);
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
|
2018-10-16 16:58:41 +08:00
|
|
|
master_ = std::make_unique<database::Master>(master_config);
|
|
|
|
master_->Start();
|
|
|
|
auto master_endpoint = master_->endpoint();
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
|
|
|
|
const auto kInitTime = 200ms;
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
|
|
|
|
auto worker_config = [this, master_endpoint](int worker_id) {
|
|
|
|
database::Config config;
|
|
|
|
config.worker_id = worker_id;
|
|
|
|
config.master_endpoint = master_endpoint;
|
|
|
|
config.worker_endpoint = {kLocal, 0};
|
2018-09-03 19:00:59 +08:00
|
|
|
config.durability_directory = GetDurabilityDirectory(worker_id);
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
return config;
|
|
|
|
};
|
|
|
|
|
|
|
|
for (int i = 0; i < num_workers; ++i) {
|
2018-09-03 19:00:59 +08:00
|
|
|
// Flag needs to be updated due to props on disk storage.
|
|
|
|
FLAGS_durability_directory = GetDurabilityDirectory(i + 1);
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
workers_.emplace_back(
|
|
|
|
std::make_unique<WorkerInThread>(worker_config(i + 1)));
|
2018-11-06 22:48:18 +08:00
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for the whole cluster to be up and running.
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
while (master_->GetWorkerIds().size() < num_workers + 1) {
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < num_workers; ++i) {
|
|
|
|
while (workers_[i]->worker_.GetWorkerIds().size() < num_workers + 1) {
|
|
|
|
std::this_thread::sleep_for(kInitTime);
|
|
|
|
}
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
std::this_thread::sleep_for(kInitTime);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
Cluster(const Cluster &) = delete;
|
|
|
|
Cluster(Cluster &&) = delete;
|
|
|
|
Cluster &operator=(const Cluster &) = delete;
|
|
|
|
Cluster &operator=(Cluster &&) = delete;
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
~Cluster() {
|
2018-09-27 21:07:46 +08:00
|
|
|
master_->Shutdown();
|
|
|
|
EXPECT_TRUE(master_->AwaitShutdown());
|
2018-05-15 23:38:47 +08:00
|
|
|
workers_.clear();
|
2018-09-03 19:00:59 +08:00
|
|
|
if (fs::exists(tmp_dir_)) fs::remove_all(tmp_dir_);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
auto *master() { return master_.get(); }
|
2018-05-15 23:38:47 +08:00
|
|
|
auto workers() {
|
|
|
|
return iter::imap([](auto &worker) { return worker->db(); }, workers_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ClearCache(tx::TransactionId tx_id) {
|
|
|
|
master()->data_manager().ClearCacheForSingleTransaction(tx_id);
|
|
|
|
for (auto member : workers()) {
|
|
|
|
member->data_manager().ClearCacheForSingleTransaction(tx_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ApplyUpdates(tx::TransactionId tx_id) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
master()->updates_server().Apply(tx_id);
|
|
|
|
for (auto member : workers()) {
|
|
|
|
member->updates_server().Apply(tx_id);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
ClearCache(tx_id);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void AdvanceCommand(tx::TransactionId tx_id) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
ApplyUpdates(tx_id);
|
|
|
|
master()->tx_engine().Advance(tx_id);
|
|
|
|
for (auto worker : workers()) worker->tx_engine().UpdateCommand(tx_id);
|
|
|
|
ClearCache(tx_id);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
fs::path GetDurabilityDirectory(int worker_id) {
|
|
|
|
if (worker_id == 0) return tmp_dir_ / "master";
|
|
|
|
return tmp_dir_ / fmt::format("worker{}", worker_id);
|
|
|
|
}
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
private:
|
|
|
|
const std::string kLocal = "127.0.0.1";
|
|
|
|
|
2018-09-03 19:00:59 +08:00
|
|
|
fs::path tmp_dir_{fs::temp_directory_path() /
|
|
|
|
"MG_test_unit_distributed_common"};
|
|
|
|
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
std::unique_ptr<database::Master> master_;
|
2018-05-15 23:38:47 +08:00
|
|
|
std::vector<std::unique_ptr<WorkerInThread>> workers_;
|
|
|
|
};
|