Measure read throughput in HA

Summary: This macro benchmark measures read throughput in HA. The test first creates a random graph with a given number of nodes and edges. After that, it concurently performs the following query for 10 seconds: ``` MATCH (n {id:$random_id})-[e]->(m) RETURN e, m; ``` In other words, it randomly picks a node and returns all its neighbours. Locally measured results are as follows: | nodes | edges | queries per second | | 100 | 500 | 8900 | | 1000 | 5000 | 2700 | | 10000 | 50000 | 1200 | Running the same test on Memgraph single node yields very similar results (up to a few hundred queries). Reviewers: msantl Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1916
2019-03-12 14:06:14 +01:00 · 2019-03-12 14:06:14 +01:00 · d3e00635c6
commit d3e00635c6
parent 94afbd9f56
13 changed files with 287 additions and 16 deletions
--- a/apollo_build.yaml
+++ b/apollo_build.yaml
@ -31,7 +31,7 @@
    mkdir build_release
    cd build_release
    cmake -DCMAKE_BUILD_TYPE=release ..
-    TIMEOUT=1200 make -j$THREADS memgraph memgraph_distributed memgraph_ha tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot memgraph__feature_benchmark__kafka__benchmark memgraph__feature_benchmark__ha__benchmark
+    TIMEOUT=1200 make -j$THREADS memgraph memgraph_distributed memgraph_ha tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot memgraph__feature_benchmark__kafka__benchmark memgraph__feature_benchmark__ha__read__benchmark memgraph__feature_benchmark__ha__write__benchmark

    # Generate distributed card fraud dataset.
    cd ../tests/distributed/card_fraud
--- a/tests/feature_benchmark/apollo_runs.yaml
+++ b/tests/feature_benchmark/apollo_runs.yaml
@ -9,13 +9,24 @@
    - ../../../build_release/tests/feature_benchmark/kafka/benchmark # benchmark binary
  enable_network: true

- name: feature_benchmark__ha
-  cd: ha
+- name: feature_benchmark__ha__read
+  cd: ha/read
  commands: ./runner.sh
  infiles:
    - runner.sh # runner script
    - raft.json # raft configuration file
    - coordination.json # coordination configuration file
-    - ../../../build_release/tests/feature_benchmark/ha/benchmark # benchmark binary
-    - ../../../build_release/memgraph_ha # memgraph binary
+    - ../../../../build_release/tests/feature_benchmark/ha/read/benchmark # benchmark binary
+    - ../../../../build_release/memgraph_ha # memgraph binary
+  enable_network: true
+
+- name: feature_benchmark__ha__write
+  cd: ha/write
+  commands: ./runner.sh
+  infiles:
+    - runner.sh # runner script
+    - raft.json # raft configuration file
+    - coordination.json # coordination configuration file
+    - ../../../../build_release/tests/feature_benchmark/ha/write/benchmark # benchmark binary
+    - ../../../../build_release/memgraph_ha # memgraph binary
  enable_network: true
--- a/tests/feature_benchmark/ha/CMakeLists.txt
+++ b/tests/feature_benchmark/ha/CMakeLists.txt
@ -1,6 +1,3 @@
-set(target_name memgraph__feature_benchmark__ha)
-
-set(benchmark_target_name ${target_name}__benchmark)
-add_executable(${benchmark_target_name} benchmark.cpp)
-set_target_properties(${benchmark_target_name} PROPERTIES OUTPUT_NAME benchmark)
-target_link_libraries(${benchmark_target_name} mg-utils mg-communication)
+# test binaries
+add_subdirectory(read)
+add_subdirectory(write)
--- a/tests/feature_benchmark/ha/read/CMakeLists.txt
+++ b/tests/feature_benchmark/ha/read/CMakeLists.txt
@ -0,0 +1,6 @@
+set(target_name memgraph__feature_benchmark__ha__read)
+
+set(benchmark_target_name ${target_name}__benchmark)
+add_executable(${benchmark_target_name} benchmark.cpp)
+set_target_properties(${benchmark_target_name} PROPERTIES OUTPUT_NAME benchmark)
+target_link_libraries(${benchmark_target_name} mg-utils mg-communication)
--- a/tests/feature_benchmark/ha/read/benchmark.cpp
+++ b/tests/feature_benchmark/ha/read/benchmark.cpp
@ -0,0 +1,156 @@
+#include <atomic>
+#include <chrono>
+#include <experimental/optional>
+#include <fstream>
+#include <random>
+#include <thread>
+
+#include <fmt/format.h>
+#include <gflags/gflags.h>
+
+#include "communication/bolt/client.hpp"
+#include "io/network/endpoint.hpp"
+#include "io/network/utils.hpp"
+#include "utils/flag_validation.hpp"
+#include "utils/thread.hpp"
+#include "utils/timer.hpp"
+
+using namespace std::literals::chrono_literals;
+
+DEFINE_string(address, "127.0.0.1", "Server address");
+DEFINE_int32(port, 7687, "Server port");
+DEFINE_int32(cluster_size, 3, "Size of the raft cluster.");
+DEFINE_string(username, "", "Username for the database");
+DEFINE_string(password, "", "Password for the database");
+DEFINE_bool(use_ssl, false, "Set to true to connect with SSL to the server.");
+DEFINE_double(duration, 10.0,
+              "How long should the client perform reads (seconds)");
+DEFINE_string(output_file, "", "Output file where the results should be.");
+DEFINE_int32(nodes, 1000, "Number of nodes in DB");
+DEFINE_int32(edges, 5000, "Number of edges in DB");
+
+std::experimental::optional<io::network::Endpoint> GetLeaderEndpoint() {
+  for (int retry = 0; retry < 10; ++retry) {
+    for (int i = 0; i < FLAGS_cluster_size; ++i) {
+      try {
+        communication::ClientContext context(FLAGS_use_ssl);
+        communication::bolt::Client client(&context);
+
+        uint16_t port = FLAGS_port + i;
+        io::network::Endpoint endpoint{FLAGS_address, port};
+
+        client.Connect(endpoint, FLAGS_username, FLAGS_password);
+        client.Execute("MATCH (n) RETURN n", {});
+        client.Close();
+
+        // If we succeeded with the above query, we found the current leader.
+        return std::experimental::make_optional(endpoint);
+
+      } catch (const communication::bolt::ClientQueryException &) {
+        // This one is not the leader, continue.
+        continue;
+      } catch (const communication::bolt::ClientFatalException &) {
+        // This one seems to be down, continue.
+        continue;
+      }
+    }
+    LOG(INFO) << "Couldn't find Raft cluster leader, retrying...";
+    std::this_thread::sleep_for(1s);
+  }
+
+  return std::experimental::nullopt;
+}
+
+int main(int argc, char **argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  google::SetUsageMessage("Memgraph HA read benchmark client");
+  google::InitGoogleLogging(argv[0]);
+
+  std::atomic<int64_t> query_counter{0};
+
+  auto leader_endpoint = GetLeaderEndpoint();
+  if (!leader_endpoint) {
+    LOG(ERROR) << "Couldn't find Raft cluster leader!";
+    return 1;
+  }
+
+  // populate the db (random graph with given number of nodes and edges)
+  communication::ClientContext context(FLAGS_use_ssl);
+  communication::bolt::Client client(&context);
+  client.Connect(*leader_endpoint, FLAGS_username, FLAGS_password);
+  for (int i = 0; i < FLAGS_nodes; ++i) {
+    client.Execute("CREATE (:Node {id:" + std::to_string(i) + "})", {});
+  }
+
+  auto seed =
+      std::chrono::high_resolution_clock::now().time_since_epoch().count();
+  std::mt19937 rng(seed);
+  std::uniform_int_distribution<int> dist(0, FLAGS_nodes - 1);
+
+  for (int i = 0; i < FLAGS_edges; ++i) {
+    int a = dist(rng), b = dist(rng);
+    client.Execute("MATCH (n {id:" + std::to_string(a) + "})," +
+                   "      (m {id:" + std::to_string(b) + "})" +
+                   "CREATE (n)-[:Edge]->(m);", {});
+  }
+
+  const int num_threads = std::thread::hardware_concurrency();
+  std::vector<std::thread> threads;
+  std::vector<double> thread_duration;
+  threads.reserve(num_threads);
+  thread_duration.resize(num_threads);
+
+  for (int i = 0; i < num_threads; ++i) {
+    threads.emplace_back([i, endpoint = *leader_endpoint, &query_counter,
+                          &local_duration = thread_duration[i]]() {
+      utils::ThreadSetName(fmt::format("BenchWriter{}", i));
+      communication::ClientContext context(FLAGS_use_ssl);
+      communication::bolt::Client client(&context);
+      client.Connect(endpoint, FLAGS_username, FLAGS_password);
+
+      auto seed =
+          std::chrono::high_resolution_clock::now().time_since_epoch().count();
+      std::mt19937 rng(seed);
+      std::uniform_int_distribution<int> dist(0, FLAGS_nodes - 1);
+
+      utils::Timer t;
+      while (true) {
+        local_duration = t.Elapsed().count();
+        if (local_duration >= FLAGS_duration) break;
+        int id = dist(rng);
+
+        try {
+          client.Execute("MATCH (n {id:" + std::to_string(id) +
+                         "})-[e]->(m) RETURN e, m;", {});
+          query_counter.fetch_add(1);
+        } catch (const communication::bolt::ClientQueryException &e) {
+          LOG(WARNING) << e.what();
+          break;
+        } catch (const communication::bolt::ClientFatalException &e) {
+          LOG(WARNING) << e.what();
+          break;
+        }
+      }
+
+      client.Close();
+    });
+  }
+
+  for (auto &t : threads) {
+    if (t.joinable()) t.join();
+  }
+
+  double duration = 0;
+  for (auto &d : thread_duration) duration += d;
+  duration /= num_threads;
+
+  double read_per_second = query_counter / duration;
+
+  std::ofstream output(FLAGS_output_file);
+  output << "duration " << duration << std::endl;
+  output << "executed_reads " << query_counter << std::endl;
+  output << "read_per_second " << read_per_second << std::endl;
+  output.close();
+
+  return 0;
+}
--- a/tests/feature_benchmark/ha/read/coordination.json
+++ b/tests/feature_benchmark/ha/read/coordination.json
--- a/tests/feature_benchmark/ha/read/raft.json
+++ b/tests/feature_benchmark/ha/read/raft.json
--- a/tests/feature_benchmark/ha/read/runner.sh
+++ b/tests/feature_benchmark/ha/read/runner.sh
@ -21,9 +21,9 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 cd "$DIR"

 # Find memgraph binaries.
-binary_dir="$DIR/../../../build"
+binary_dir="$DIR/../../../../build"
 if [ ! -d $binary_dir ]; then
-    binary_dir="$DIR/../../../build_release"
+    binary_dir="$DIR/../../../../build_release"
 fi

 # Results for apollo
@ -51,8 +51,8 @@ done
 sleep 10

 # Start the memgraph process and wait for it to start.
-echo_info "Starting HA benchmark"
-$binary_dir/tests/feature_benchmark/ha/benchmark \
+echo_info "Starting HA read benchmark"
+$binary_dir/tests/feature_benchmark/ha/read/benchmark \
    --duration=$DURATION \
    --output-file=$RESULTS &
 pid=$!
--- a/tests/feature_benchmark/ha/write/CMakeLists.txt
+++ b/tests/feature_benchmark/ha/write/CMakeLists.txt
@ -0,0 +1,6 @@
+set(target_name memgraph__feature_benchmark__ha__write)
+
+set(benchmark_target_name ${target_name}__benchmark)
+add_executable(${benchmark_target_name} benchmark.cpp)
+set_target_properties(${benchmark_target_name} PROPERTIES OUTPUT_NAME benchmark)
+target_link_libraries(${benchmark_target_name} mg-utils mg-communication)
--- a/tests/feature_benchmark/ha/write/benchmark.cpp
+++ b/tests/feature_benchmark/ha/write/benchmark.cpp
@ -60,7 +60,7 @@ std::experimental::optional<io::network::Endpoint> GetLeaderEndpoint() {

 int main(int argc, char **argv) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  google::SetUsageMessage("Memgraph HA benchmark client");
+  google::SetUsageMessage("Memgraph HA write benchmark client");
  google::InitGoogleLogging(argv[0]);

  std::atomic<int64_t> query_counter{0};
--- a/tests/feature_benchmark/ha/write/coordination.json
+++ b/tests/feature_benchmark/ha/write/coordination.json
@ -0,0 +1,5 @@
+[
+  [1, "127.0.0.1", 10000],
+  [2, "127.0.0.1", 10001],
+  [3, "127.0.0.1", 10002]
+]
--- a/tests/feature_benchmark/ha/write/raft.json
+++ b/tests/feature_benchmark/ha/write/raft.json
@ -0,0 +1,7 @@
+{
+  "election_timeout_min": 350,
+  "election_timeout_max": 700,
+  "heartbeat_interval": 100,
+  "replication_timeout": 10000,
+  "log_size_snapshot_threshold": -1
+}
--- a/tests/feature_benchmark/ha/write/runner.sh
+++ b/tests/feature_benchmark/ha/write/runner.sh
@ -0,0 +1,83 @@
+#!/bin/bash
+
+## Helper functions
+
+function wait_for_server {
+    port=$1
+    while ! nc -z -w 1 127.0.0.1 $port; do
+        sleep 0.1
+    done
+    sleep 1
+}
+
+function echo_info { printf "\033[1;36m~~ $1 ~~\033[0m\n"; }
+function echo_success { printf "\033[1;32m~~ $1 ~~\033[0m\n\n"; }
+function echo_failure { printf "\033[1;31m~~ $1 ~~\033[0m\n\n"; }
+
+## Environment setup
+
+# Get script location.
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$DIR"
+
+# Find memgraph binaries.
+binary_dir="$DIR/../../../../build"
+if [ ! -d $binary_dir ]; then
+    binary_dir="$DIR/../../../../build_release"
+fi
+
+# Results for apollo
+RESULTS="$DIR/.apollo_measurements"
+
+# Benchmark parameters
+DURATION=10
+
+# Startup
+declare -a HA_PIDS
+
+for server_id in 1 2 3
+do
+  $binary_dir/memgraph_ha --server_id $server_id \
+    --coordination_config_file="coordination.json" \
+    --raft_config_file="raft.json" \
+    --port $((7686 + $server_id)) \
+    --db-recover-on-startup=false \
+    --durability_directory=dur$server_id &
+  HA_PIDS[$server_id]=$!
+  wait_for_server $((7686 + $server_id))
+done
+
+# Allow some time for leader election.
+sleep 10
+
+# Start the memgraph process and wait for it to start.
+echo_info "Starting HA write benchmark"
+$binary_dir/tests/feature_benchmark/ha/write/benchmark \
+    --duration=$DURATION \
+    --output-file=$RESULTS &
+pid=$!
+
+wait -n $pid
+code=$?
+
+# Shutdown
+for server_id in 1 2 3
+do
+  kill -15 ${HA_PIDS[$server_id]}
+done
+
+# Cleanup
+for server_id in 1 2 3
+do
+  wait -n ${HA_PIDS[$server_id]}
+  rm -r dur$server_id
+done
+
+if [ $code -eq 0 ]; then
+    echo_success "Benchmark finished successfully"
+else
+    echo_failure "Benchmark didn't finish successfully"
+    exit $code
+fi
+
+exit 0