From f16246604f8332be03f11c9f45ed7a4d7f985393 Mon Sep 17 00:00:00 2001
From: Dominik Gleich <dominik.gleich@memgraph.io>
Date: Thu, 16 Nov 2017 10:27:29 +0100
Subject: [PATCH] Add recovery speed measurement tool

Reviewers: mferencevic, buda

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D985
---
 tests/macro_benchmark/databases.py | 10 +++++
 tools/requirements.txt             |  2 +-
 tools/snapshot_recovery_speed.py   | 62 ++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100755 tools/snapshot_recovery_speed.py

diff --git a/tests/macro_benchmark/databases.py b/tests/macro_benchmark/databases.py
index 96ebdcf01..ade5fbfe9 100644
--- a/tests/macro_benchmark/databases.py
+++ b/tests/macro_benchmark/databases.py
@@ -32,6 +32,9 @@ class Memgraph:
                           default=get_absolute_path("memgraph", "build"))
         argp.add_argument("--port", default="7687",
                           help="Database and client port")
+        argp.add_argument("--snapshot-directory", default=None)
+        argp.add_argument("--snapshot-on-exit", action="store_true")
+        argp.add_argument("--snapshot-recover-on-startup", action="store_true")
         self.log.info("Initializing Runner with arguments %r", args)
         self.args, _ = argp.parse_known_args(args)
         self.config = config
@@ -45,6 +48,13 @@ class Memgraph:
         database_args = ["--port", self.args.port]
         if self.num_workers:
             database_args += ["--num_workers", str(self.num_workers)]
+        if self.args.snapshot_directory:
+            database_args += ["--snapshot-directory",
+                    self.args.snapshot_directory]
+        if self.args.snapshot_recover_on_startup:
+            database_args += ["--snapshot-recover-on-startup"]
+        if self.args.snapshot_on_exit:
+            database_args += ["--snapshot-on-exit"]
 
         # find executable path
         runner_bin = self.args.runner_bin
diff --git a/tools/requirements.txt b/tools/requirements.txt
index e86db6d17..d3c37c4f1 100644
--- a/tools/requirements.txt
+++ b/tools/requirements.txt
@@ -1,8 +1,8 @@
 cycler==0.10.0
 matplotlib==2.0.2
 numpy==1.13.1
-pkg-resources==0.0.0
 pyparsing==2.2.0
 python-dateutil==2.6.1
 pytz==2017.2
 six==1.11.0
+tabulate==0.8.1
diff --git a/tools/snapshot_recovery_speed.py b/tools/snapshot_recovery_speed.py
new file mode 100755
index 000000000..10549fcaf
--- /dev/null
+++ b/tools/snapshot_recovery_speed.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import time
+import sys  
+import os
+import tempfile
+from tabulate import tabulate
+from timeit import default_timer as timer
+
+# hackish way to resuse existing start code
+sys.path.append(os.path.dirname(os.path.realpath(__file__)) + 
+        "/../tests/macro_benchmark/")
+from databases import *
+from clients import *
+from common import get_absolute_path
+
+def main():
+    path = get_absolute_path("benchmarking.conf", "config")
+    tmp_dir = tempfile.TemporaryDirectory()
+
+    SNAPSHOT_DIR_ARG = ["--snapshot-directory", tmp_dir.name]
+    MAKE_SNAPSHOT_ARGS = ["--snapshot-on-exit"] + SNAPSHOT_DIR_ARG
+    RECOVER_SNAPSHOT_ARGS = ["--snapshot-recover-on-startup"] + SNAPSHOT_DIR_ARG
+    snapshot_memgraph = Memgraph(MAKE_SNAPSHOT_ARGS, path, 1)
+    recover_memgraph = Memgraph(RECOVER_SNAPSHOT_ARGS, path, 1)
+    client = QueryClient(None, 1)
+
+    results = []
+    for node_cnt in [10**6, 5*10**6, 10**7]:
+        for edge_per_node in [0, 1, 3]:
+            for prop_per_node in [0, 1]:
+                snapshot_memgraph.start()
+                properties = "{}".format(",".join(
+                    ["p{}: 0".format(x) for x in range(prop_per_node)])) 
+                client(["UNWIND RANGE(1, {}) AS _ CREATE ({{ {} }})".format(node_cnt, properties)], snapshot_memgraph)
+                client(["UNWIND RANGE(1, {}) AS _ MATCH (n) CREATE (n)-[:l]->(n)"
+                        .format(edge_per_node)], snapshot_memgraph)
+                snapshot_memgraph.stop()
+
+                # This waits for the snapshot to be recovered and then exits
+                start = timer()
+                recover_memgraph.start()
+                recover_memgraph.stop()
+                stop = timer()
+                diff = stop - start
+
+                snapshots = os.listdir(tmp_dir.name)
+                assert len(snapshots) == 1
+
+                snap_path = tmp_dir.name + "/" + snapshots[0]
+                snap_size = round(os.path.getsize(snap_path) / 1024. / 1024., 2)
+                os.remove(snap_path)
+
+                edge_cnt = edge_per_node * node_cnt
+                results.append((node_cnt, edge_cnt, prop_per_node, snap_size, diff))
+
+    print(tabulate(tabular_data=results, headers=["Nodes", "Edges", 
+        "Properties", "Snapshot size (MB)", "Elapsed time (s)"]))
+
+if __name__ == "__main__":
+    main()