memgraph/tests/manual/distributed_common.hpp

#pragma once

#include <chrono>
#include <vector>

#include "communication/result_stream_faker.hpp"
#include "database/distributed_graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "glue/communication.hpp"
#include "query/distributed_interpreter.hpp"
#include "query/typed_value.hpp"

class WorkerInThread {
 public:
  explicit WorkerInThread(database::Config config) : worker_(config) {
    thread_ = std::thread([this, config] { worker_.WaitForShutdown(); });
  }

  ~WorkerInThread() {
    if (thread_.joinable()) thread_.join();
  }

  database::Worker worker_;
  std::thread thread_;
};

class Cluster {
  const std::chrono::microseconds kInitTime{200};
  const std::string kLocal = "127.0.0.1";

 public:
  Cluster(int worker_count) {
    database::Config masterconfig;
    masterconfig.master_endpoint = {kLocal, 0};
    master_ = std::make_unique<database::Master>(masterconfig);
    interpreter_ =
        std::make_unique<query::DistributedInterpreter>(master_.get());
    std::this_thread::sleep_for(kInitTime);

    auto worker_config = [this](int worker_id) {
      database::Config config;
      config.worker_id = worker_id;
      config.master_endpoint = master_->endpoint();
      config.worker_endpoint = {kLocal, 0};
      return config;
    };

    for (int i = 0; i < worker_count; ++i) {
      workers_.emplace_back(
          std::make_unique<WorkerInThread>(worker_config(i + 1)));
      std::this_thread::sleep_for(kInitTime);
    }
  }

  void Stop() {
    interpreter_ = nullptr;
    auto t = std::thread([this]() { master_ = nullptr; });
    workers_.clear();
    if (t.joinable()) t.join();
  }

  ~Cluster() {
    if (master_) Stop();
  }

  auto Execute(const std::string &query,
               std::map<std::string, PropertyValue> params = {}) {
    auto dba = master_->Access();
    ResultStreamFaker<query::TypedValue> result;
    (*interpreter_)(query, *dba, params, false).PullAll(result);
    dba->Commit();
    return result.GetResults();
  };

 private:
  std::unique_ptr<database::Master> master_;
  std::vector<std::unique_ptr<WorkerInThread>> workers_;
  std::unique_ptr<query::DistributedInterpreter> interpreter_;
};

void CheckResults(
    const std::vector<std::vector<query::TypedValue>> &results,
    const std::vector<std::vector<query::TypedValue>> &expected_rows,
    const std::string &msg) {
  query::TypedValue::BoolEqual equality;
  CHECK(results.size() == expected_rows.size())
      << msg << " (expected " << expected_rows.size() << " rows "
      << ", got " << results.size() << ")";
  for (size_t row_id = 0; row_id < results.size(); ++row_id) {
    auto &result = results[row_id];
    auto &expected = expected_rows[row_id];
    CHECK(result.size() == expected.size())
        << msg << " (expected " << expected.size() << " elements in row "
        << row_id << ", got " << result.size() << ")";
    for (size_t col_id = 0; col_id < result.size(); ++col_id) {
      CHECK(equality(result[col_id], expected[col_id]))
          << msg << " (expected value '" << expected[col_id] << "' got '"
          << result[col_id] << "' in row " << row_id << " col " << col_id
          << ")";
    }
  }
}
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`#pragma once`

			`#include <chrono>`
			`#include <vector>`

			`#include "communication/result_stream_faker.hpp"`
Split GraphDb to distributed and single node files Summary: This change, hopefully, simplifies the implementation of different kinds of GraphDb. The pimpl idiom is now simplified by removing all of the crazy inheritance. Implementations classes are just plain data stores, without any methods. The interface classes now have a more flat hierarchy: ``` GraphDb (pure interface) \| +----+---------- DistributedGraphDb (pure interface) \| \| Single Node +-----+------+ \| \| Master Worker ``` DistributedGraphDb is used as an intermediate interface for all the things that should work only in distributed. Therefore, virtual calls for distributed stuff have been removed from GraphDb. Some are exposed via DistributedGraphDb, other's are only in concrete Master and Worker classes. The code which relied on those virtual calls has been refactored to either use DistributedGraphDb, take a pointer to what is actually needed or use dynamic_cast. Obviously, dynamic_cast is a temporary solution and should be replaced with another mechanism (e.g. virtual call, or some other function pointer style). The cost of the above change is some code duplication in constructors and destructors of classes. This duplication has a lot of little tweaks that make it hard to generalize, not to mention that virtual calls do not work in constructor and destructor. If we really care about generalizing this, we should think about abandoning RAII in favor of constructor + Init method. The next steps for splitting the dependencies that seem logical are: 1) Split GraphDbAccessor implementation, either via inheritance or passing in an implementation pointer. GraphDbAccessor should then only be created by a virtual call on GraphDb. 2) Split Interpreter implementation. Besides allowing single node interpreter to exist without depending on distributed, this will enable the planner and operators to be correctly separated. Reviewers: msantl, mferencevic, ipaljak Reviewed By: msantl Subscribers: dgleich, pullbot Differential Revision: https://phabricator.memgraph.io/D1493 2018-07-19 23:00:50 +08:00			`#include "database/distributed_graph_db.hpp"`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`#include "database/graph_db_accessor.hpp"`
Integrate auth checks into query execution Reviewers: mtomic, teon.banek Reviewed By: mtomic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1544 2018-08-22 16:59:46 +08:00			`#include "glue/communication.hpp"`
Extract distributed interpretation out of Interpreter Reviewers: mtomic, mferencevic, msantl, buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1560 2018-08-24 16:12:04 +08:00			`#include "query/distributed_interpreter.hpp"`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`#include "query/typed_value.hpp"`

			`class WorkerInThread {`
			`public:`
			`explicit WorkerInThread(database::Config config) : worker_(config) {`
			`thread_ = std::thread([this, config] { worker_.WaitForShutdown(); });`
			`}`

			`~WorkerInThread() {`
			`if (thread_.joinable()) thread_.join();`
			`}`

			`database::Worker worker_;`
			`std::thread thread_;`
			`};`

			`class Cluster {`
			`const std::chrono::microseconds kInitTime{200};`
			`const std::string kLocal = "127.0.0.1";`

			`public:`
			`Cluster(int worker_count) {`
			`database::Config masterconfig;`
			`masterconfig.master_endpoint = {kLocal, 0};`
			`master_ = std::make_unique<database::Master>(masterconfig);`
Extract distributed interpretation out of Interpreter Reviewers: mtomic, mferencevic, msantl, buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1560 2018-08-24 16:12:04 +08:00			`interpreter_ =`
			`std::make_unique<query::DistributedInterpreter>(master_.get());`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`std::this_thread::sleep_for(kInitTime);`

			`auto worker_config = [this](int worker_id) {`
			`database::Config config;`
			`config.worker_id = worker_id;`
			`config.master_endpoint = master_->endpoint();`
			`config.worker_endpoint = {kLocal, 0};`
			`return config;`
			`};`

			`for (int i = 0; i < worker_count; ++i) {`
			`workers_.emplace_back(`
			`std::make_unique<WorkerInThread>(worker_config(i + 1)));`
			`std::this_thread::sleep_for(kInitTime);`
			`}`
			`}`

			`void Stop() {`
Invalidate distributed plan caches Summary: - Remove caches on workers as a result of plan expiration or race during insertion. - Extract caching functionality into a class. - Minor refactor of Interpreter::operator() - New RPC and test for it. - Rename ConsumePlanRes to DispatchPlanRes for consistency, remove return value as it's always true and never used. - Interpreter is now constructed with a `GraphDb` reference. At the moment only for reaching the `distributed::PlanDispatcher`, but in the future we should probably use that primarily for planning. I added a function to `PlanConsumer` that is only used for testing. I prefer not doing this, but I felt this needed testing. I can remove it now if you like. Reviewers: teon.banek, msantl Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1292 2018-03-13 17:35:14 +08:00			`interpreter_ = nullptr;`
Fix distributed index creation Summary: During the creation of indexes there could be a case in which a vertex contains a label/property but is not a part of index after index building completes. This happens if vertices are being inserted while the index is being built. Reviewers: buda, msantl Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1484 2018-07-11 20:34:19 +08:00			`auto t = std::thread([this]() { master_ = nullptr; });`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`workers_.clear();`
Fix distributed index creation Summary: During the creation of indexes there could be a case in which a vertex contains a label/property but is not a part of index after index building completes. This happens if vertices are being inserted while the index is being built. Reviewers: buda, msantl Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1484 2018-07-11 20:34:19 +08:00			`if (t.joinable()) t.join();`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`}`

			`~Cluster() {`
			`if (master_) Stop();`
			`}`

			`auto Execute(const std::string &query,`
Clean-up TypedValue misuse Summary: In a bunch of places `TypedValue` was used where `PropertyValue` should be. A lot of times it was only because `TypedValue` serialization code could be reused for `PropertyValue`, only without providing callbacks for `VERTEX`, `EDGE` and `PATH`. So first I wrote separate serialization code for `PropertyValue` and put it into storage folder. Then I fixed all the places where `TypedValue` was incorrectly used instead of `PropertyValue`. I also disabled implicit `TypedValue` to `PropertyValue` conversion in hopes of preventing misuse in the future. After that, I wrote code for `VertexAccessor` and `EdgeAccessor` serialization and put it into `storage` folder because it was almost duplicated in distributed BFS and pull produce RPC messages. On the sender side, some subset of records (old or new or both) is serialized, and on the reciever side, records are deserialized and immediately put into transaction cache. Then I rewrote the `TypedValue` serialization functions (`SaveCapnpTypedValue` and `LoadCapnpTypedValue`) to not take callbacks for `VERTEX`, `EDGE` and `PATH`, but use accessor serialization functions instead. That means that any code that wants to use `TypedValue` serialization must hold a reference to `GraphDbAccessor` and `DataManager`, so that should make clients reconsider if they really want to use `TypedValue` instead of `PropertyValue`. Reviewers: teon.banek, msantl Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1598 2018-09-13 18:12:07 +08:00			`std::map<std::string, PropertyValue> params = {}) {`
Separate distributed implementation of GraphDbAccessor Summary: GraphDbAccessor is now constructed only through GraphDb. This allows the concrete GraphDb to instantiate a concrete GraphDbAccessor. This allows us to use virtual calls, so that the implementation may be kept separate. The major downside of doing things this way is heap allocation of GraphDbAccessor. In case it turns out to be a real performance issues, another solution with pointer to static implementation may be used. InsertVertexIntoRemote is now a non-member function, which reduces coupling. It made no sense for it to be member function because it used only the public parts of GraphDbAccessor. Reviewers: msantl, mtomic, mferencevic Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1504 2018-07-26 15:08:21 +08:00			`auto dba = master_->Access();`
Extract TypedValue/DecodedValue conversion to higher component Summary: This is the first step in cutting the crazy dependencies of communication module to the whole database. Includes have been reorganized and conversion between DecodedValue and other Memgraph types (TypedValue and PropertyValue) has been extracted to a higher level component called `communication/conversion`. Encoder, like Decoder, now relies only on DecodedValue. Hopefully the conversion operations will not significantly slow down streaming Bolt data. Additionally, Bolt ID is now wrapped in a class. Our storage model uses unsigned int64, while Bolt expects signed int64. The implicit conversions may lead to encode/decode errors, so the wrapper should enforce some type safety to prevent such errors. Reviewers: mferencevic, buda, msantl, mtomic Reviewed By: mferencevic, mtomic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1453 2018-07-02 21:34:33 +08:00			`ResultStreamFaker<query::TypedValue> result;`
Extract distributed interpretation out of Interpreter Reviewers: mtomic, mferencevic, msantl, buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1560 2018-08-24 16:12:04 +08:00			`(interpreter_)(query, dba, params, false).PullAll(result);`
Separate distributed implementation of GraphDbAccessor Summary: GraphDbAccessor is now constructed only through GraphDb. This allows the concrete GraphDb to instantiate a concrete GraphDbAccessor. This allows us to use virtual calls, so that the implementation may be kept separate. The major downside of doing things this way is heap allocation of GraphDbAccessor. In case it turns out to be a real performance issues, another solution with pointer to static implementation may be used. InsertVertexIntoRemote is now a non-member function, which reduces coupling. It made no sense for it to be member function because it used only the public parts of GraphDbAccessor. Reviewers: msantl, mtomic, mferencevic Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1504 2018-07-26 15:08:21 +08:00			`dba->Commit();`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`return result.GetResults();`
			`};`

			`private:`
			`std::unique_ptr<database::Master> master_;`
			`std::vector<std::unique_ptr<WorkerInThread>> workers_;`
Extract distributed interpretation out of Interpreter Reviewers: mtomic, mferencevic, msantl, buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1560 2018-08-24 16:12:04 +08:00			`std::unique_ptr<query::DistributedInterpreter> interpreter_;`
Create a local distributed test based on the credit card fraud demo. Reviewers: msantl, mtomic Reviewed By: msantl Differential Revision: https://phabricator.memgraph.io/D1289 2018-03-07 20:59:52 +08:00			`};`

			`void CheckResults(`
			`const std::vector<std::vector<query::TypedValue>> &results,`
			`const std::vector<std::vector<query::TypedValue>> &expected_rows,`
			`const std::string &msg) {`
			`query::TypedValue::BoolEqual equality;`
			`CHECK(results.size() == expected_rows.size())`
			`<< msg << " (expected " << expected_rows.size() << " rows "`
			`<< ", got " << results.size() << ")";`
			`for (size_t row_id = 0; row_id < results.size(); ++row_id) {`
			`auto &result = results[row_id];`
			`auto &expected = expected_rows[row_id];`
			`CHECK(result.size() == expected.size())`
			`<< msg << " (expected " << expected.size() << " elements in row "`
			`<< row_id << ", got " << result.size() << ")";`
			`for (size_t col_id = 0; col_id < result.size(); ++col_id) {`
			`CHECK(equality(result[col_id], expected[col_id]))`
			`<< msg << " (expected value '" << expected[col_id] << "' got '"`
			`<< result[col_id] << "' in row " << row_id << " col " << col_id`
			`<< ")";`
			`}`
			`}`
			`}`