From 0f562cd043b93e3b27367df428b2a8d8fc9469fc Mon Sep 17 00:00:00 2001 From: Matej Ferencevic Date: Sat, 29 Jul 2017 20:40:29 +0200 Subject: [PATCH] First version of apollo harness integration. Reviewers: mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D602 --- tests/macro_benchmark/harness/bolt_client.py | 10 +++-- tests/macro_benchmark/harness/harness.py | 40 +++++++++---------- tests/macro_benchmark/harness/jail_faker.py | 29 +------------- tests/macro_benchmark/harness/run_bolt_client | 7 ++++ tools/apollo_generate | 10 +++++ 5 files changed, 44 insertions(+), 52 deletions(-) mode change 100644 => 100755 tests/macro_benchmark/harness/harness.py create mode 100755 tests/macro_benchmark/harness/run_bolt_client diff --git a/tests/macro_benchmark/harness/bolt_client.py b/tests/macro_benchmark/harness/bolt_client.py index 0a042a3e9..d29c51844 100644 --- a/tests/macro_benchmark/harness/bolt_client.py +++ b/tests/macro_benchmark/harness/bolt_client.py @@ -55,8 +55,9 @@ def _prepare_for_json(obj): return None -def _print_dict(d): - print(json.dumps(_prepare_for_json(d), indent=2)) +def _print_dict(fname, d): + with open(fname, "a") as f: + f.write(json.dumps(_prepare_for_json(d), indent=2)) def _run_query(args, query, self): @@ -71,6 +72,7 @@ _run_query.__defaults__ = (_run_query,) def main(): argp = connection_argument_parser() argp.add_argument("--num-workers", type=int, default=1) + argp.add_argument("--output", type=str) # Parse args and ensure that stdout is not polluted by argument parsing. try: @@ -78,7 +80,7 @@ def main(): with redirect_stderr(f): args = argp.parse_args() except: - _print_dict({RETURN_CODE: 1, ERROR_MSG: "Invalid cmd-line arguments"}) + _print_dict(args.output, {RETURN_CODE: 1, ERROR_MSG: "Invalid cmd-line arguments"}) sys.exit(1) queries = filter(lambda x: x.strip() != '', sys.stdin.read().split("\n")) @@ -91,7 +93,7 @@ def main(): end = time.time() delta_time = end - start - _print_dict({ + _print_dict(args.output, { RETURN_CODE: 0, WALL_TIME: (None if not queries else delta_time), "metadatas": metadatas diff --git a/tests/macro_benchmark/harness/harness.py b/tests/macro_benchmark/harness/harness.py old mode 100644 new mode 100755 index b53a0193c..1276b2bf0 --- a/tests/macro_benchmark/harness/harness.py +++ b/tests/macro_benchmark/harness/harness.py @@ -21,7 +21,8 @@ except: import jail_faker as jail APOLLO = False -from bolt_client import WALL_TIME +DIR_PATH = path.dirname(path.realpath(__file__)) +WALL_TIME = "wall_time" from perf import Perf log = logging.getLogger(__name__) @@ -131,7 +132,7 @@ class _QuerySuite: """ argp = ArgumentParser("QuerySuite.scenarios argument parser") argp.add_argument("--query-scenarios-root", default=path.join( - path.dirname(path.dirname(path.realpath(__file__))), "groups"), + DIR_PATH, "..", "groups"), dest="root") args, _ = argp.parse_known_args() log.info("Loading query scenarios from root: %s", args.root) @@ -209,8 +210,7 @@ class _QuerySuite: except: pass - if not APOLLO: - pid = runner.start() + pid = runner.start() execute("setup") # warmup phase @@ -316,7 +316,7 @@ class _BaseRunner: argp = ArgumentParser("RunnerArgumentParser") # TODO: These two options should be passed two database and client, not # only client as we are doing at the moment. - argp.add_argument("--RunnerUri", default="localhost:7687") + argp.add_argument("--RunnerUri", default="127.0.0.1:7687") argp.add_argument("--RunnerEncryptBolt", action="store_true") return argp @@ -326,9 +326,12 @@ class _BaseRunner: def execute(self, queries, num_client_workers): self.log.debug("execute('%s')", str(queries)) - client_args = [path.join(path.dirname(__file__), "bolt_client.py")] - client_args += ["--endpoint", self.args.RunnerUri] + client = path.join(DIR_PATH, "run_bolt_client") + client_args = ["--endpoint", self.args.RunnerUri] client_args += ["--num-workers", str(num_client_workers)] + output_fd, output = tempfile.mkstemp() + os.close(output_fd) + client_args += ["--output", output] if self.args.RunnerEncryptBolt: client_args.append("--ssl-enabled") queries_fd, queries_path = tempfile.mkstemp() @@ -343,7 +346,7 @@ class _BaseRunner: # TODO make the timeout configurable per query or something return_code = self.bolt_client.run_and_wait( - "python3", client_args, timeout=10000, stdin=queries_path) + client, client_args, timeout=600, stdin=queries_path) os.remove(queries_path) if return_code != 0: with open(self.bolt_client.get_stderr()) as f: @@ -352,12 +355,11 @@ class _BaseRunner: "Failed with return_code %d and stderr:\n%s", str(queries), return_code, stderr) raise Exception("BoltClient execution failed") - with open(self.bolt_client.get_stdout()) as f: + with open(output) as f: return json.loads(f.read()) def stop(self): self.log.info("stop") - self.bolt_client.send_signal(jail.SIGKILL) self.bolt_client.wait() self.database_bin.send_signal(jail.SIGTERM) self.database_bin.wait() @@ -375,12 +377,12 @@ class MemgraphRunner(_BaseRunner): self.log = logging.getLogger("MemgraphRunner") argp = self._get_argparser() argp.add_argument("--RunnerBin", - default=os.path.join(os.path.dirname(__file__), + default=os.path.join(DIR_PATH, "../../../build/memgraph")) argp.add_argument("--RunnerConfig", - default=os.path.join( - os.path.dirname(__file__), - "../../../config/benchmarking.conf")) + default=os.path.normpath(os.path.join( + DIR_PATH, + "../../../config/benchmarking.conf"))) # parse args self.log.info("Initializing Runner with arguments %r", args) self.args, _ = argp.parse_known_args(args) @@ -394,7 +396,7 @@ class MemgraphRunner(_BaseRunner): environment = os.environ.copy() environment["MEMGRAPH_CONFIG"] = self.args.RunnerConfig self.database_bin.run(self.args.RunnerBin, env=environment, - timeout=10000) + timeout=600) # TODO change to a check via SIGUSR time.sleep(1.0) return self.database_bin.get_pid() if not APOLLO else None @@ -407,12 +409,10 @@ class NeoRunner(_BaseRunner): argp = self._get_argparser() argp.add_argument( "--RunnerConfigDir", - default=path.join(path.dirname(path.realpath(__file__)), - "neo4j_config")) + default=path.join(DIR_PATH, "neo4j_config")) argp.add_argument( "--RunnerHomeDir", - default=path.join(path.dirname(path.realpath(__file__)), - "neo4j_home")) + default=path.join(DIR_PATH, "neo4j_home")) # parse args self.log.info("Initializing Runner with arguments %r", args) self.args, _ = argp.parse_known_args(args) @@ -430,7 +430,7 @@ class NeoRunner(_BaseRunner): if path.exists(neo4j_data_path): shutil.rmtree(neo4j_data_path) self.database_bin.run("/usr/share/neo4j/bin/neo4j", args=["console"], - env=environment, timeout=10000) + env=environment, timeout=600) # TODO change to a check via SIGUSR time.sleep(5.0) return self.database_bin.get_pid() if not APOLLO else None diff --git a/tests/macro_benchmark/harness/jail_faker.py b/tests/macro_benchmark/harness/jail_faker.py index 66ec3cfdd..0889e9dc4 100644 --- a/tests/macro_benchmark/harness/jail_faker.py +++ b/tests/macro_benchmark/harness/jail_faker.py @@ -14,7 +14,6 @@ from signal import * SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -TEMP_DIR = os.path.join(SCRIPT_DIR, ".temp") STORAGE_DIR = os.path.join(SCRIPT_DIR, ".storage") @@ -48,11 +47,6 @@ class Process: # create exe list exe = [binary] + args - # temporary stdout and stderr files - stdout = self._temporary_file("stdout") - stderr = self._temporary_file("stderr") - self._files = [stdout, stderr] - # set environment variables keys = ["PATH", "HOME", "USER", "LANG", "PWD"] for key in keys: @@ -64,9 +58,7 @@ class Process: # start process self._proc = subprocess.Popen(exe, env = env, cwd = cwd, - stdin = open(stdin, "r"), - stdout = open(stdout, "w"), - stderr = open(stderr, "w")) + stdin = open(stdin, "r")) def run_and_wait(self, *args, **kwargs): check = kwargs.pop("check", True) @@ -113,27 +105,12 @@ class Process: sys.stderr.write("WARNING: Trying to set memory for {} to " "{}\n".format(str(self), memory)) - # this currently isn't implemented in the real API - def get_stdout(self): - if self._proc == None or self._proc.returncode == None: - raise ProcessException - return self._files[0] - - # this currently isn't implemented in the real API - def get_stderr(self): - if self._proc == None or self._proc.returncode == None: - raise ProcessException - return self._files[1] - # WARNING: this won't be implemented in the real API def get_pid(self): if self._proc == None: raise ProcessException return self._proc.pid - def _temporary_file(self, name): - return os.path.join(TEMP_DIR, ".".join([name, str(uuid.uuid4()), "dat"])) - def _set_usage(self, val, name, only_value = False): self._usage[name] = val if only_value: return @@ -195,9 +172,6 @@ _thread.start() if not os.path.exists(STORAGE_DIR): os.mkdir(STORAGE_DIR) -if os.path.exists(TEMP_DIR): - shutil.rmtree(TEMP_DIR) -os.mkdir(TEMP_DIR) _storage_name = os.path.join(STORAGE_DIR, time.strftime("%Y%m%d%H%M%S") + ".json") _storage_file = open(_storage_name, "w") @@ -208,7 +182,6 @@ def cleanup(): if proc._proc == None: continue proc.send_signal(SIGKILL) proc.get_status() - shutil.rmtree(TEMP_DIR) _storage_file.close() # end of private methods ------------------------------------------------------ diff --git a/tests/macro_benchmark/harness/run_bolt_client b/tests/macro_benchmark/harness/run_bolt_client new file mode 100755 index 000000000..f92c3074f --- /dev/null +++ b/tests/macro_benchmark/harness/run_bolt_client @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $DIR +source ../ve3/bin/activate +python3 bolt_client.py $@ +exit $? diff --git a/tools/apollo_generate b/tools/apollo_generate index db8c71271..94c732db1 100755 --- a/tools/apollo_generate +++ b/tools/apollo_generate @@ -172,6 +172,16 @@ RUNS.append(generate_run("quality_assurance", commands = commands, "\.quality_assurance_status".format( BASE_DIR_NAME))) +# macro benchmark tests +macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark") +stress_common = os.path.join(BASE_DIR, "tests", "stress", "common.py") +infile = create_archive("macro_benchmark", [binary_path, binary_link_path, + macro_bench_path, stress_common, config_path], cwd = WORKSPACE_DIR) +supervisor = "./{}/tests/macro_benchmark/harness/harness.py".format(BASE_DIR_NAME) +args = "QuerySuite MemgraphRunner --groups aggregation" +RUNS.append(generate_run("macro_benchmark", supervisor = supervisor, + arguments = args, infile = infile)) + # store ARCHIVES and RUNS store_metadata(OUTPUT_DIR, "archives", ARCHIVES) store_metadata(OUTPUT_DIR, "runs", RUNS)