From f16246604f8332be03f11c9f45ed7a4d7f985393 Mon Sep 17 00:00:00 2001 From: Dominik Gleich <dominik.gleich@memgraph.io> Date: Thu, 16 Nov 2017 10:27:29 +0100 Subject: [PATCH] Add recovery speed measurement tool Reviewers: mferencevic, buda Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D985 --- tests/macro_benchmark/databases.py | 10 +++++ tools/requirements.txt | 2 +- tools/snapshot_recovery_speed.py | 62 ++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100755 tools/snapshot_recovery_speed.py diff --git a/tests/macro_benchmark/databases.py b/tests/macro_benchmark/databases.py index 96ebdcf01..ade5fbfe9 100644 --- a/tests/macro_benchmark/databases.py +++ b/tests/macro_benchmark/databases.py @@ -32,6 +32,9 @@ class Memgraph: default=get_absolute_path("memgraph", "build")) argp.add_argument("--port", default="7687", help="Database and client port") + argp.add_argument("--snapshot-directory", default=None) + argp.add_argument("--snapshot-on-exit", action="store_true") + argp.add_argument("--snapshot-recover-on-startup", action="store_true") self.log.info("Initializing Runner with arguments %r", args) self.args, _ = argp.parse_known_args(args) self.config = config @@ -45,6 +48,13 @@ class Memgraph: database_args = ["--port", self.args.port] if self.num_workers: database_args += ["--num_workers", str(self.num_workers)] + if self.args.snapshot_directory: + database_args += ["--snapshot-directory", + self.args.snapshot_directory] + if self.args.snapshot_recover_on_startup: + database_args += ["--snapshot-recover-on-startup"] + if self.args.snapshot_on_exit: + database_args += ["--snapshot-on-exit"] # find executable path runner_bin = self.args.runner_bin diff --git a/tools/requirements.txt b/tools/requirements.txt index e86db6d17..d3c37c4f1 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -1,8 +1,8 @@ cycler==0.10.0 matplotlib==2.0.2 numpy==1.13.1 -pkg-resources==0.0.0 pyparsing==2.2.0 python-dateutil==2.6.1 pytz==2017.2 six==1.11.0 +tabulate==0.8.1 diff --git a/tools/snapshot_recovery_speed.py b/tools/snapshot_recovery_speed.py new file mode 100755 index 000000000..10549fcaf --- /dev/null +++ b/tools/snapshot_recovery_speed.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import time +import sys +import os +import tempfile +from tabulate import tabulate +from timeit import default_timer as timer + +# hackish way to resuse existing start code +sys.path.append(os.path.dirname(os.path.realpath(__file__)) + + "/../tests/macro_benchmark/") +from databases import * +from clients import * +from common import get_absolute_path + +def main(): + path = get_absolute_path("benchmarking.conf", "config") + tmp_dir = tempfile.TemporaryDirectory() + + SNAPSHOT_DIR_ARG = ["--snapshot-directory", tmp_dir.name] + MAKE_SNAPSHOT_ARGS = ["--snapshot-on-exit"] + SNAPSHOT_DIR_ARG + RECOVER_SNAPSHOT_ARGS = ["--snapshot-recover-on-startup"] + SNAPSHOT_DIR_ARG + snapshot_memgraph = Memgraph(MAKE_SNAPSHOT_ARGS, path, 1) + recover_memgraph = Memgraph(RECOVER_SNAPSHOT_ARGS, path, 1) + client = QueryClient(None, 1) + + results = [] + for node_cnt in [10**6, 5*10**6, 10**7]: + for edge_per_node in [0, 1, 3]: + for prop_per_node in [0, 1]: + snapshot_memgraph.start() + properties = "{}".format(",".join( + ["p{}: 0".format(x) for x in range(prop_per_node)])) + client(["UNWIND RANGE(1, {}) AS _ CREATE ({{ {} }})".format(node_cnt, properties)], snapshot_memgraph) + client(["UNWIND RANGE(1, {}) AS _ MATCH (n) CREATE (n)-[:l]->(n)" + .format(edge_per_node)], snapshot_memgraph) + snapshot_memgraph.stop() + + # This waits for the snapshot to be recovered and then exits + start = timer() + recover_memgraph.start() + recover_memgraph.stop() + stop = timer() + diff = stop - start + + snapshots = os.listdir(tmp_dir.name) + assert len(snapshots) == 1 + + snap_path = tmp_dir.name + "/" + snapshots[0] + snap_size = round(os.path.getsize(snap_path) / 1024. / 1024., 2) + os.remove(snap_path) + + edge_cnt = edge_per_node * node_cnt + results.append((node_cnt, edge_cnt, prop_per_node, snap_size, diff)) + + print(tabulate(tabular_data=results, headers=["Nodes", "Edges", + "Properties", "Snapshot size (MB)", "Elapsed time (s)"])) + +if __name__ == "__main__": + main()