2018-03-13 17:35:14 +08:00
#include <chrono>
#include <experimental/optional>
2018-02-23 00:07:35 +08:00
#include "gmock/gmock.h"
2018-02-23 00:20:17 +08:00
#include "gtest/gtest.h"
2018-02-08 20:57:03 +08:00
2018-07-02 21:34:33 +08:00
#include "communication/result_stream_faker.hpp"
2018-02-08 20:57:03 +08:00
#include "database/graph_db.hpp"
2018-03-13 17:35:14 +08:00
#include "distributed/plan_consumer.hpp"
2018-07-06 21:12:45 +08:00
#include "distributed/plan_dispatcher.hpp"
2018-04-30 15:33:09 +08:00
#include "distributed/pull_rpc_clients.hpp"
2018-02-08 20:57:03 +08:00
#include "distributed_common.hpp"
#include "query/interpreter.hpp"
#include "query_common.hpp"
#include "query_plan_common.hpp"
2018-03-08 23:57:15 +08:00
#include "utils/timer.hpp"
2018-02-08 20:57:03 +08:00
2018-03-15 22:00:43 +08:00
// We use this to ensure a cached plan is removed from the concurrent map and
// properly destructed.
2018-02-08 20:57:03 +08:00
using namespace distributed;
using namespace database;
2018-06-15 03:15:01 +08:00
using namespace std::literals::chrono_literals;
2018-02-08 20:57:03 +08:00
2018-02-23 00:20:17 +08:00
class DistributedInterpretationTest : public DistributedGraphDbTest {
2018-07-05 16:55:00 +08:00
DistributedInterpretationTest() : DistributedGraphDbTest("interpretation") {}
2018-03-13 17:35:14 +08:00
void SetUp() override {
void TearDown() override {
interpreter_ = std::experimental::nullopt;
2018-04-30 15:33:09 +08:00
auto RunWithDba(const std::string &query, GraphDbAccessor &dba) {
2018-02-23 00:20:17 +08:00
std::map<std::string, query::TypedValue> params = {};
2018-07-18 16:40:06 +08:00
ResultStreamFaker<query::TypedValue> stream;
auto results = interpreter_.value()(query, dba, params, false);
return stream.GetResults();
2018-02-23 00:20:17 +08:00
2018-03-13 17:35:14 +08:00
2018-04-30 15:33:09 +08:00
auto Run(const std::string &query) {
GraphDbAccessor dba(master());
auto results = RunWithDba(query, dba);
return results;
2018-03-13 17:35:14 +08:00
std::experimental::optional<query::Interpreter> interpreter_;
2018-02-23 00:20:17 +08:00
2018-02-08 20:57:03 +08:00
2018-03-23 22:21:46 +08:00
TEST_F(DistributedInterpretationTest, PullTest) {
2018-02-23 00:20:17 +08:00
auto results = Run("OPTIONAL MATCH(n) UNWIND(RANGE(0, 20)) AS X RETURN 1");
2018-07-09 17:04:13 +08:00
ASSERT_EQ(results.size(), 1 * 21);
2018-02-08 20:57:03 +08:00
2018-02-23 00:20:17 +08:00
for (auto result : results) {
ASSERT_EQ(result.size(), 1U);
ASSERT_EQ(result[0].ValueInt(), 1);
2018-02-08 20:57:03 +08:00
2018-03-23 22:21:46 +08:00
TEST_F(DistributedInterpretationTest, PullNoResultsTest) {
2018-02-23 00:20:17 +08:00
auto results = Run("MATCH (n) RETURN n");
ASSERT_EQ(results.size(), 0U);
2018-02-08 20:57:03 +08:00
2018-02-23 00:20:17 +08:00
TEST_F(DistributedInterpretationTest, CreateExpand) {
2018-02-08 20:57:03 +08:00
2018-02-23 00:20:17 +08:00
Run("MATCH (n) CREATE (n)-[:T]->(m) RETURN n");
2018-02-08 20:57:03 +08:00
2018-02-23 00:20:17 +08:00
EXPECT_EQ(VertexCount(master()), 2);
EXPECT_EQ(VertexCount(worker(1)), 4);
EXPECT_EQ(VertexCount(worker(2)), 6);
2018-02-08 20:57:03 +08:00
2018-02-23 00:07:35 +08:00
2018-02-23 00:20:17 +08:00
TEST_F(DistributedInterpretationTest, RemoteExpandTest2) {
2018-02-23 00:07:35 +08:00
// Make a fully connected graph with vertices scattered across master and
// worker storage.
// Vertex count is low, because test gets exponentially slower. The expected
// result size is ~ vertices^3, and then that is compared at the end in no
// particular order which causes O(result_size^2) comparisons.
int verts_per_storage = 3;
std::vector<storage::VertexAddress> vertices;
vertices.reserve(verts_per_storage * 3);
auto add_vertices = [this, &vertices, &verts_per_storage](auto &db) {
for (int i = 0; i < verts_per_storage; ++i)
auto get_edge_type = [](int v1, int v2) {
return std::to_string(v1) + "-" + std::to_string(v2);
std::vector<std::string> edge_types;
edge_types.reserve(vertices.size() * vertices.size());
2018-02-23 00:20:17 +08:00
for (size_t i = 0; i < vertices.size(); ++i) {
for (size_t j = 0; j < vertices.size(); ++j) {
2018-02-23 00:07:35 +08:00
auto edge_type = get_edge_type(i, j);
InsertEdge(vertices[i], vertices[j], edge_type);
2018-02-23 00:20:17 +08:00
auto results = Run("MATCH (n)-[r1]-(m)-[r2]-(l) RETURN type(r1), type(r2)");
2018-02-23 00:07:35 +08:00
// We expect the number of results to be:
size_t expected_result_size =
// pick (n)
vertices.size() *
// pick both directed edges to other (m) and a
// single edge to (m) which equals (n), hence -1
(2 * vertices.size() - 1) *
// Pick as before, but exclude the previously taken edge, hence another -1
(2 * vertices.size() - 1 - 1);
std::vector<std::vector<std::string>> expected;
2018-02-23 00:20:17 +08:00
for (size_t n = 0; n < vertices.size(); ++n) {
for (size_t m = 0; m < vertices.size(); ++m) {
2018-02-23 00:07:35 +08:00
std::vector<std::string> r1s{get_edge_type(n, m)};
if (n != m) r1s.push_back(get_edge_type(m, n));
2018-02-23 00:20:17 +08:00
for (size_t l = 0; l < vertices.size(); ++l) {
2018-02-23 00:07:35 +08:00
std::vector<std::string> r2s{get_edge_type(m, l)};
if (m != l) r2s.push_back(get_edge_type(l, m));
for (const auto &r1 : r1s) {
for (const auto &r2 : r2s) {
if (r1 == r2) continue;
expected.push_back({r1, r2});
ASSERT_EQ(expected.size(), expected_result_size);
2018-02-23 00:20:17 +08:00
ASSERT_EQ(results.size(), expected_result_size);
2018-02-23 00:07:35 +08:00
std::vector<std::vector<std::string>> got;
2018-02-23 00:20:17 +08:00
for (const auto &res : results) {
2018-02-23 00:07:35 +08:00
std::vector<std::string> row;
for (const auto &col : res) {
ASSERT_THAT(got, testing::UnorderedElementsAreArray(expected));
2018-03-07 01:01:38 +08:00
TEST_F(DistributedInterpretationTest, Cartesian) {
// Create some data on the master and both workers.
storage::Property prop;
GraphDbAccessor dba{master()};
auto tx_id = dba.transaction_id();
GraphDbAccessor dba1{worker(1), tx_id};
GraphDbAccessor dba2{worker(2), tx_id};
prop = dba.Property("prop");
auto add_data = [prop](GraphDbAccessor &dba, int value) {
dba.InsertVertex().PropsSet(prop, value);
for (int i = 0; i < 10; ++i) add_data(dba, i);
for (int i = 10; i < 20; ++i) add_data(dba1, i);
for (int i = 20; i < 30; ++i) add_data(dba2, i);
std::vector<std::vector<int64_t>> expected;
for (int64_t i = 0; i < 30; ++i)
for (int64_t j = 0; j < 30; ++j) expected.push_back({i, j});
auto results = Run("MATCH (n), (m) RETURN n.prop, m.prop;");
size_t expected_result_size = 30 * 30;
ASSERT_EQ(expected.size(), expected_result_size);
ASSERT_EQ(results.size(), expected_result_size);
std::vector<std::vector<int64_t>> got;
for (const auto &res : results) {
std::vector<int64_t> row;
for (const auto &col : res) {
ASSERT_THAT(got, testing::UnorderedElementsAreArray(expected));
2018-03-08 23:57:15 +08:00
class TestQueryWaitsOnFutures : public DistributedInterpretationTest {
int QueryExecutionTimeSec(int worker_id) override {
return worker_id == 2 ? 3 : 1;
TEST_F(TestQueryWaitsOnFutures, Test) {
const int kVertexCount = 10;
2018-03-12 22:06:00 +08:00
auto make_fully_connected = [](database::GraphDb &db) {
2018-03-08 23:57:15 +08:00
database::GraphDbAccessor dba(db);
std::vector<VertexAccessor> vertices;
for (int i = 0; i < kVertexCount; ++i)
auto et = dba.EdgeType("et");
for (auto &from : vertices)
for (auto &to : vertices) dba.InsertEdge(from, to, et);
ASSERT_EQ(VertexCount(worker(1)), kVertexCount);
ASSERT_EQ(EdgeCount(worker(1)), kVertexCount * kVertexCount);
utils::Timer timer;
try {
Run("MATCH ()--()--()--()--()--()--() RETURN count(1)");
} catch (...) {
double seconds = timer.Elapsed().count();
EXPECT_GT(seconds, 1);
EXPECT_LT(seconds, 2);
ASSERT_EQ(VertexCount(worker(2)), kVertexCount);
ASSERT_EQ(EdgeCount(worker(2)), kVertexCount * kVertexCount);
utils::Timer timer;
try {
Run("MATCH ()--()--()--()--()--()--() RETURN count(1)");
} catch (...) {
double seconds = timer.Elapsed().count();
EXPECT_GT(seconds, 3);
2018-03-13 17:35:14 +08:00
TEST_F(DistributedInterpretationTest, PlanExpiration) {
FLAGS_query_plan_cache_ttl = 1;
Run("MATCH (n) RETURN n");
auto ids1 = worker(1).plan_consumer().CachedPlanIds();
ASSERT_EQ(ids1.size(), 1);
2018-03-15 22:00:43 +08:00
// Sleep so the cached plan becomes invalid.
2018-03-13 17:35:14 +08:00
Run("MATCH (n) RETURN n");
2018-03-15 22:00:43 +08:00
// Sleep so the invalidated plan (removed from cache which is a concurrent
// map) gets destructed and thus remote caches cleared.
2018-03-13 17:35:14 +08:00
auto ids2 = worker(1).plan_consumer().CachedPlanIds();
ASSERT_EQ(ids2.size(), 1);
EXPECT_NE(ids1, ids2);
2018-03-15 22:00:43 +08:00
TEST_F(DistributedInterpretationTest, ConcurrentPlanExpiration) {
FLAGS_query_plan_cache_ttl = 1;
auto count_vertices = [this]() {
utils::Timer timer;
while (timer.Elapsed() < 3s) {
Run("MATCH () RETURN count(1)");
std::vector<std::thread> counters;
for (size_t i = 0; i < std::thread::hardware_concurrency(); ++i)
for (auto &t : counters) t.join();
2018-04-30 15:33:09 +08:00
TEST_F(DistributedInterpretationTest, OngoingProduceKeyTest) {
int worker_count = 10;
for (int i = 0; i < worker_count; ++i) {
GraphDbAccessor dba(master());
auto count1 = RunWithDba("MATCH (n) RETURN count(n)", dba);
auto count2 = RunWithDba("MATCH (n) RETURN count(n)", dba);
ASSERT_EQ(count1[0][0].ValueInt(), 3 * worker_count);
ASSERT_EQ(count2[0][0].ValueInt(), 3 * worker_count);
TEST_F(DistributedInterpretationTest, AdvanceCommandOnWorkers) {
GraphDbAccessor dba(master());
RunWithDba("UNWIND RANGE(1, 10) as x CREATE (:A {id: x})", dba);
// Advance commands on workers also.
Split GraphDb to distributed and single node files
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
GraphDb (pure interface)
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
auto futures = master().pull_clients().NotifyAllTransactionCommandAdvanced(
2018-04-30 15:33:09 +08:00
for (auto &future : futures) future.wait();
auto count = RunWithDba("MATCH (n) RETURN count(n)", dba);
ASSERT_EQ(count[0][0].ValueInt(), 10);
2018-03-15 22:00:43 +08:00
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
FLAGS_skiplist_gc_interval = 1;
return RUN_ALL_TESTS();