Do preprocess in client

Reviewers: buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D793
2017-09-13 21:20:03 +02:00 · 2017-09-13 21:20:03 +02:00 · d640ca3f1a
commit d640ca3f1a
parent 59b9b7af21
9 changed files with 150 additions and 116 deletions
--- a/tests/macro_benchmark/harness/clients.py
+++ b/tests/macro_benchmark/harness/clients.py
@ -67,8 +67,10 @@ class QueryClient:
                           str(queries), return_code, stderr)
            raise Exception("BoltClient execution failed")

+        data = {"groups" : []}
        with open(output) as f:
-            data = json.loads(f.read())
+            for line in f:
+               data["groups"].append(json.loads(line))
        data[CPU_TIME] = cpu_time_end - cpu_time_start
        data[MAX_MEMORY] = usage["max_memory"]

--- a/tests/macro_benchmark/harness/clients/bolt_client.hpp
+++ b/tests/macro_benchmark/harness/clients/bolt_client.hpp
@ -16,8 +16,9 @@ using communication::bolt::DecodedValue;

 class BoltClient {
 public:
-  BoltClient(std::string &address, std::string &port, std::string &username,
-             std::string &password, std::string database = "") {
+  BoltClient(const std::string &address, const std::string &port,
+             const std::string &username, const std::string &password,
+             const std::string & = "") {
    SocketT socket;
    EndpointT endpoint;

--- a/tests/macro_benchmark/harness/clients/common.hpp
+++ b/tests/macro_benchmark/harness/clients/common.hpp
@ -70,6 +70,7 @@ communication::bolt::QueryData ExecuteNTimesTillSuccess(
          std::chrono::milliseconds(rand_dist_(pseudo_rand_gen_)));
    }
  }
+  LOG(WARNING) << query << " failed " << times << "times";
  throw last_exception;
 }
 }
--- a/tests/macro_benchmark/harness/clients/long_running_client.cpp
+++ b/tests/macro_benchmark/harness/clients/long_running_client.cpp
@ -12,6 +12,7 @@
 #include <glog/logging.h>
 #include <json/json.hpp>

+#include "bolt_client.hpp"
 #include "common.hpp"
 #include "communication/bolt/client.hpp"
 #include "communication/bolt/v1/decoder/decoded_value.hpp"
@ -23,9 +24,6 @@
 #include "utils/assert.hpp"
 #include "utils/timer.hpp"

-using SocketT = io::network::Socket;
-using EndpointT = io::network::NetworkEndpoint;
-using Client = communication::bolt::Client<SocketT>;
 using communication::bolt::DecodedValue;
 using communication::bolt::DecodedVertex;
 using communication::bolt::DecodedEdge;
@ -39,7 +37,6 @@ DEFINE_string(password, "", "Password for the database");
 DEFINE_int32(duration, 30, "Number of seconds to execute benchmark");

 const int MAX_RETRIES = 30;
-const int NUM_BUCKETS = 100;

 struct VertexAndEdges {
  DecodedVertex vertex;
@ -47,17 +44,23 @@ struct VertexAndEdges {
  std::vector<DecodedVertex> vertices;
 };

-std::pair<VertexAndEdges, int> DetachDeleteVertex(Client &client,
+std::pair<VertexAndEdges, int> DetachDeleteVertex(BoltClient &client,
                                                  const std::string &label,
                                                  int64_t id) {
+  auto vertex_record =
+      ExecuteNTimesTillSuccess(
+          client, "MATCH (n :" + label + " {id : $id}) RETURN n",
+          std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES)
+          .records;
+  CHECK(vertex_record.size() == 1U) << "id : " << id << " "
+                                    << vertex_record.size();
+
  auto records =
      ExecuteNTimesTillSuccess(
          client, "MATCH (n :" + label + " {id : $id})-[e]-(m) RETURN n, e, m",
          std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES)
          .records;

-  if (records.size() == 0U) return {{}, 1};
-
  ExecuteNTimesTillSuccess(
      client, "MATCH (n :" + label + " {id : $id})-[]-(m) DETACH DELETE n",
      std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES);
@ -74,10 +77,11 @@ std::pair<VertexAndEdges, int> DetachDeleteVertex(Client &client,
    vertices.push_back(record[2].ValueVertex());
  }

-  return {{records[0][0].ValueVertex(), edges, vertices}, 2};
+  return {{vertex_record[0][0].ValueVertex(), edges, vertices}, 3};
 }

-int ReturnVertexAndEdges(Client &client, const VertexAndEdges &vertex_and_edges,
+int ReturnVertexAndEdges(BoltClient &client,
+                         const VertexAndEdges &vertex_and_edges,
                         const std::string &independent_label) {
  int num_queries = 0;
  {
@ -134,13 +138,35 @@ int ReturnVertexAndEdges(Client &client, const VertexAndEdges &vertex_and_edges,
    if (x.type() == DecodedValue::Type::Double) {
      LOG_EVERY_N(INFO, 5000) << "exec " << x.ValueDouble() << " planning "
                              << y.ValueDouble();
-      CHECK(ret.records.size() == 1U) << "Graph in invalid state";
    }
+    CHECK(ret.records.size() == 1U)
+        << "Graph in invalid state "
+        << vertex_and_edges.vertex.properties.at("id");
    ++num_queries;
  }
  return num_queries;
 }

+int64_t NumNodes(BoltClient &client, const std::string &label) {
+  auto result = ExecuteNTimesTillSuccess(
+      client, "MATCH (n :" + label + ") RETURN COUNT(n) as cnt", {},
+      MAX_RETRIES);
+  return result.records[0][0].ValueInt();
+}
+
+std::vector<int64_t> Neighbours(BoltClient &client, const std::string &label,
+                                int64_t id) {
+  auto result = ExecuteNTimesTillSuccess(
+      client, "MATCH (n :" + label + " {id: " + std::to_string(id) +
+                  "})-[e]-(m) RETURN m.id",
+      {}, MAX_RETRIES);
+  std::vector<int64_t> ret;
+  for (const auto &record : result.records) {
+    ret.push_back(record[0].ValueInt());
+  }
+  return ret;
+}
+
 int main(int argc, char **argv) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  google::InitGoogleLogging(argv[0]);
@ -149,40 +175,60 @@ int main(int argc, char **argv) {
  std::cin >> config;
  const auto &queries = config["queries"];
  const double read_probability = config["read_probability"];
-  const int64_t num_independent_nodes = config["num_independent_nodes"];
  const std::string independent_label = config["independent_label"];
-  const int64_t num_nodes = config["num_nodes"];
+  std::vector<int64_t> independent_nodes_ids;
+
+  BoltClient client(FLAGS_address, FLAGS_port, FLAGS_username, FLAGS_password);
+  const int64_t num_nodes = NumNodes(client, independent_label);
+  {
+    std::vector<int64_t> ids;
+    std::unordered_set<int64_t> independent;
+    for (int64_t i = 1; i <= num_nodes; ++i) {
+      ids.push_back(i);
+      independent.insert(i);
+    }
+    {
+      std::mt19937 mt;
+      std::shuffle(ids.begin(), ids.end(), mt);
+    }
+
+    for (auto i : ids) {
+      if (independent.find(i) == independent.end()) continue;
+      independent.erase(i);
+      std::vector<int64_t> neighbour_ids =
+          Neighbours(client, independent_label, i);
+      independent_nodes_ids.push_back(i);
+      for (auto j : neighbour_ids) {
+        independent.erase(j);
+      }
+    }
+  }

  utils::Timer timer;
  std::vector<std::thread> threads;
  std::atomic<int64_t> executed_queries{0};
  std::atomic<bool> keep_running{true};

+  LOG(INFO) << "nodes " << num_nodes << " independent "
+            << independent_nodes_ids.size();
+
+  int64_t next_to_assign = 0;
  for (int i = 0; i < FLAGS_num_workers; ++i) {
+    int64_t size = independent_nodes_ids.size();
+    int64_t next_next_to_assign = next_to_assign + size / FLAGS_num_workers +
+                                  (i < size % FLAGS_num_workers);
+    std::vector<int64_t> to_remove(
+        independent_nodes_ids.begin() + next_to_assign,
+        independent_nodes_ids.begin() + next_next_to_assign);
+    LOG(INFO) << next_to_assign << " " << next_next_to_assign;
+    next_to_assign = next_next_to_assign;
+
    threads.emplace_back(
-        [&](int thread_id) {
-          // Initialise client.
-          SocketT socket;
-          EndpointT endpoint;
-          try {
-            endpoint = EndpointT(FLAGS_address, FLAGS_port);
-          } catch (const io::network::NetworkEndpointException &e) {
-            LOG(FATAL) << "Invalid address or port: " << FLAGS_address << ":"
-                       << FLAGS_port;
-          }
-          if (!socket.Connect(endpoint)) {
-            LOG(FATAL) << "Could not connect to: " << FLAGS_address << ":"
-                       << FLAGS_port;
-          }
-          Client client(std::move(socket), FLAGS_username, FLAGS_password);
+        [&](int thread_id, std::vector<int64_t> to_remove) {
+          BoltClient client(FLAGS_address, FLAGS_port, FLAGS_username,
+                            FLAGS_password);

          std::mt19937 random_gen(thread_id);
-          int64_t to_remove =
-              num_independent_nodes / FLAGS_num_workers * thread_id + 1;
-          int64_t last_to_remove =
-              to_remove + num_independent_nodes / FLAGS_num_workers;
-          bool remove = true;
-          int64_t num_shifts = 0;
          std::vector<VertexAndEdges> removed;

          while (keep_running) {
@ -203,40 +249,34 @@ int main(int argc, char **argv) {
                                       MAX_RETRIES);
              ++executed_queries;
            } else {
-              if (!remove) {
+              if (real_dist(random_gen) <
+                  static_cast<double>(removed.size()) /
+                      (removed.size() + to_remove.size())) {
+                CHECK(removed.size());
+                std::uniform_int_distribution<> int_dist(0, removed.size() - 1);
+                std::swap(removed.back(), removed[int_dist(random_gen)]);
                executed_queries += ReturnVertexAndEdges(client, removed.back(),
                                                         independent_label);
+                to_remove.push_back(
+                    removed.back().vertex.properties["id"].ValueInt());
                removed.pop_back();
-                if (removed.empty()) {
-                  remove = true;
-                }
              } else {
-                auto ret =
-                    DetachDeleteVertex(client, independent_label, to_remove);
-                ++to_remove;
+                CHECK(to_remove.size());
+                std::uniform_int_distribution<> int_dist(0,
+                                                         to_remove.size() - 1);
+                std::swap(to_remove.back(), to_remove[int_dist(random_gen)]);
+                auto ret = DetachDeleteVertex(client, independent_label,
+                                              to_remove.back());
+                removed.push_back(ret.first);
+                to_remove.pop_back();
                executed_queries += ret.second;
-                if (ret.second > 1) {
-                  removed.push_back(std::move(ret.first));
-                }
-                if (to_remove == last_to_remove) {
-                  for (auto &x : removed) {
-                    x.vertex.properties["id"].ValueInt() += num_nodes;
-                  }
-                  remove = false;
-                  ++num_shifts;
-                  to_remove =
-                      num_independent_nodes / FLAGS_num_workers * thread_id +
-                      1 + num_shifts * num_nodes;
-                  last_to_remove =
-                      to_remove + num_independent_nodes / FLAGS_num_workers;
-                }
              }
            }
          }

          client.Close();
        },
-        i);
+        i, std::move(to_remove));
  }

  // Open stream for writing stats.
--- a/tests/macro_benchmark/harness/clients/postgres_client.hpp
+++ b/tests/macro_benchmark/harness/clients/postgres_client.hpp
@ -28,8 +28,9 @@ class ClientQueryException : public ClientException {

 class Client {
 public:
-  Client(std::string &host, std::string &port, std::string &username,
-         std::string &password, std::string database = "") {
+  Client(const std::string &host, const std::string &port,
+         const std::string &username, const std::string &password,
+         const std::string &database = "") {
    // https://www.postgresql.org/docs/9.4/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
    std::string pass = "";
    if (password != "") {
--- a/tests/macro_benchmark/harness/clients/query_client.cpp
+++ b/tests/macro_benchmark/harness/clients/query_client.cpp
@ -8,6 +8,7 @@
 #include "communication/bolt/v1/decoder/decoded_value.hpp"
 #include "threading/sync/spinlock.hpp"
 #include "utils/algorithm.hpp"
+#include "utils/string.hpp"
 #include "utils/timer.hpp"

 #include "bolt_client.hpp"
@ -45,25 +46,20 @@ void PrintSummary(
  os << "{\"wall_time\": " << duration << ", "
     << "\"metadatas\": ";
  PrintJsonMetadata(os, metadata);
-  os << "}";
+  os << "}\n";
 }

-template <typename ClientT, typename ExceptionT>
-void ExecuteQueries(std::istream &istream, int num_workers,
+template <typename ClientT>
+void ExecuteQueries(const std::vector<std::string> &queries, int num_workers,
                    std::ostream &ostream, std::string &address,
                    std::string &port, std::string &username,
                    std::string &password, std::string &database) {
-  std::string query;
  std::vector<std::thread> threads;

  SpinLock spinlock;
  uint64_t last = 0;
-  std::vector<std::string> queries;
  std::vector<std::map<std::string, DecodedValue>> metadata;

-  while (std::getline(istream, query)) {
-    queries.push_back(query);
-  }
  metadata.resize(queries.size());

  utils::Timer timer;
@ -86,7 +82,7 @@ void ExecuteQueries(std::istream &istream, int num_workers,
        try {
          metadata[pos] =
              ExecuteNTimesTillSuccess(client, str, {}, MAX_RETRIES).metadata;
-        } catch (const ExceptionT &e) {
+        } catch (const utils::BasicException &e) {
          LOG(FATAL) << "Could not execute query '" << str << "' "
                     << MAX_RETRIES << " times! Error message: " << e.what();
        }
@ -128,27 +124,38 @@ int main(int argc, char **argv) {
  std::string port = FLAGS_port;
  if (FLAGS_protocol == "bolt") {
    if (port == "") port = "7687";
-
-    using BoltClientT = BoltClient;
-    using BoltExceptionT = communication::bolt::ClientQueryException;
-    ExecuteQueries<BoltClientT, BoltExceptionT>(
-        *istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
-        FLAGS_username, FLAGS_password, FLAGS_database);
  } else if (FLAGS_protocol == "postgres") {
-    LOG(FATAL) << "Postgres not yet supported";
-    // TODO: Currently libpq is linked dynamically so it is a pain to move
-    // harness_client executable to other machines without libpq.
-    //    CHECK(FLAGS_username != "") << "Username can't be empty for
-    //    postgres!";
-    //    CHECK(FLAGS_database != "") << "Database can't be empty for
-    //    postgres!";
-    //    if (port == "") port = "5432";
-    //
-    //    using PostgresClientT = postgres::Client;
-    //    using PostgresExceptionT = postgres::ClientQueryException;
-    //    ExecuteQueries<PostgresClientT, PostgresExceptionT>(
-    //        *istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
-    //        FLAGS_username, FLAGS_password, FLAGS_database);
+    if (port == "") port = "5432";
+  }
+
+  while (!istream->eof()) {
+    std::vector<std::string> queries;
+    std::string query;
+    while (std::getline(*istream, query) && utils::Trim(query) != "" &&
+           utils::Trim(query) != ";") {
+      queries.push_back(query);
+    }
+
+    if (FLAGS_protocol == "bolt") {
+      ExecuteQueries<BoltClient>(queries, FLAGS_num_workers, *ostream,
+                                 FLAGS_address, port, FLAGS_username,
+                                 FLAGS_password, FLAGS_database);
+    } else if (FLAGS_protocol == "postgres") {
+      LOG(FATAL) << "Postgres not yet supported";
+      // TODO: Currently libpq is linked dynamically so it is a pain to move
+      // harness_client executable to other machines without libpq.
+      //    CHECK(FLAGS_username != "") << "Username can't be empty for
+      //    postgres!";
+      //    CHECK(FLAGS_database != "") << "Database can't be empty for
+      //    postgres!";
+      //    if (port == "") port = "5432";
+      //
+      //    using PostgresClientT = postgres::Client;
+      //    using PostgresExceptionT = postgres::ClientQueryException;
+      //    ExecuteQueries<PostgresClientT, PostgresExceptionT>(
+      //        *istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
+      //        FLAGS_username, FLAGS_password, FLAGS_database);
+    }
  }

  return 0;
--- a/tests/macro_benchmark/harness/groups/pokec/pokec_small.run.json
+++ b/tests/macro_benchmark/harness/groups/pokec/pokec_small.run.json
@ -1,6 +1,4 @@
 {
-    "num_independent_nodes" : 4111,
-    "num_nodes" : 10000,
    "independent_label": "User",
    "read_probability": 0.5,
    "queries" : [
--- a/tests/macro_benchmark/harness/long_running_suite.py
+++ b/tests/macro_benchmark/harness/long_running_suite.py
@ -24,33 +24,14 @@ class LongRunningSuite:

    def run(self, scenario, group_name, scenario_name, runner):
        runner.start()
-        # This suite allows empty lines in setup. Those lines separate query
-        # groups. It is guaranteed that groups will be executed sequentially,
-        # but queries in each group are possibly executed concurrently.
-        query_groups = [[]]
-        for query in scenario.get("setup")():
-            if query == "":
-                query_groups.append([])
-            else:
-                query_groups[-1].append(query)
-        if query_groups[-1] == []:
-            query_groups.pop()

-        log.info("Executing {} query groups in setup"
-                .format(len(query_groups)))
-
-        for i, queries in enumerate(query_groups):
-            start_time = time.time()
-            # TODO: number of threads configurable
-            runner.setup(queries, self.args.num_client_workers)
-            log.info("\t{}. group imported in done in {:.2f} seconds".format(
-                i + 1, time.time() - start_time))
+        log.info("Executing setup")
+        runner.setup(scenario.get("setup")(), self.args.num_client_workers)

        config = next(scenario.get("config")())
        duration = config["duration"]
        log.info("Executing run for {} seconds with {} client workers".format(
            duration, self.args.num_client_workers))
-        # TODO: number of threads configurable
        results = runner.run(next(scenario.get("run")()), duration,
                self.args.num_client_workers)

--- a/tests/macro_benchmark/harness/query_suite.py
+++ b/tests/macro_benchmark/harness/query_suite.py
@ -91,14 +91,17 @@ class _QuerySuite:
                execute("itersetup")
                run_result = execute("run",
                                     scenario_config.get("num_client_workers", 1))
-                add_measurement(run_result, iteration, WALL_TIME)
                add_measurement(run_result, iteration, CPU_TIME)
                add_measurement(run_result, iteration, MAX_MEMORY)
+                assert len(run_result["groups"]) == 1, \
+                        "Multiple groups in run step not yet supported"
+                group = run_result["groups"][0]
+                add_measurement(group, iteration, WALL_TIME)
                for measurement in ["parsing_time",
                                    "plan_execution_time",
                                    "planning_time"] :
-                    for i in range(len(run_result.get("metadatas", []))):
-                        add_measurement(run_result["metadatas"][i], iteration,
+                    for i in range(len(group.get("metadatas", []))):
+                        add_measurement(group["metadatas"][i], iteration,
                                        measurement)
                execute("iterteardown")