From 0914c5a941cfadbadae207a38aa4c97c1a554ae0 Mon Sep 17 00:00:00 2001 From: Matej Ferencevic <matej.ferencevic@memgraph.io> Date: Fri, 25 Aug 2017 11:08:45 +0200 Subject: [PATCH] Migrate harness to use our bolt client Reviewers: mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D711 --- ...marking.conf => benchmarking_latency.conf} | 8 ++ tests/CMakeLists.txt | 3 + tests/macro_benchmark/CMakeLists.txt | 31 ++++++ tests/macro_benchmark/harness/bolt_client.py | 104 ------------------ tests/macro_benchmark/harness/harness.py | 42 ++++--- .../harness}/harness_client.cpp | 12 +- tests/macro_benchmark/harness/run_bolt_client | 7 -- tools/apollo/build_diff | 2 +- tools/apollo/generate | 5 +- 9 files changed, 80 insertions(+), 134 deletions(-) rename config/{benchmarking.conf => benchmarking_latency.conf} (83%) create mode 100644 tests/macro_benchmark/CMakeLists.txt delete mode 100644 tests/macro_benchmark/harness/bolt_client.py rename tests/{manual => macro_benchmark/harness}/harness_client.cpp (95%) delete mode 100755 tests/macro_benchmark/harness/run_bolt_client diff --git a/config/benchmarking.conf b/config/benchmarking_latency.conf similarity index 83% rename from config/benchmarking.conf rename to config/benchmarking_latency.conf index 7c3eab954..286490416 100644 --- a/config/benchmarking.conf +++ b/config/benchmarking_latency.conf @@ -16,10 +16,18 @@ # if set to -1 the GC will not run --gc-cycle-sec=-1 +# skiplist gc cycle interval +# if set to 0 the GC will not run +--skiplist_gc_interval=0 + # snapshot cycle interval # if set to -1 the snapshooter will not run --snapshot-cycle-sec=-1 +# snapshot cycle interval +# if set to -1 the snapshooter will not run +--query_execution_time_sec=-1 + # create snapshot disabled on db exit --snapshot-on-db-exit=false diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d6b8ba986..cef99463b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,9 @@ endif (UNIX) # benchmark test binaries add_subdirectory(${PROJECT_SOURCE_DIR}/benchmark) +# macro_benchmark test binaries +add_subdirectory(${PROJECT_SOURCE_DIR}/macro_benchmark) + # concurrent test binaries add_subdirectory(${PROJECT_SOURCE_DIR}/concurrent) diff --git a/tests/macro_benchmark/CMakeLists.txt b/tests/macro_benchmark/CMakeLists.txt new file mode 100644 index 000000000..adefa8f7c --- /dev/null +++ b/tests/macro_benchmark/CMakeLists.txt @@ -0,0 +1,31 @@ +find_package(Threads REQUIRED) + +# set current directory name as a test type +get_filename_component(test_type ${CMAKE_CURRENT_SOURCE_DIR} NAME) + +# get all cpp abs file names recursively starting from current directory +file(GLOB_RECURSE test_type_cpps *.cpp) +message(STATUS "Available ${test_type} cpp files are: ${test_type_cpps}") + +# for each cpp file build binary and register test +foreach(test_cpp ${test_type_cpps}) + + # get exec name (remove extension from the abs path) + get_filename_component(exec_name ${test_cpp} NAME_WE) + + # set target name in format {project_name}__{test_type}__{exec_name} + set(target_name ${project_name}__${test_type}__${exec_name}) + + # build exec file + add_executable(${target_name} ${test_cpp}) + set_property(TARGET ${target_name} PROPERTY CXX_STANDARD ${cxx_standard}) + + # OUTPUT_NAME sets the real name of a target when it is built and can be + # used to help create two targets of the same name even though CMake + # requires unique logical target names + set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) + + # link libraries + target_link_libraries(${target_name} memgraph_lib) + +endforeach() diff --git a/tests/macro_benchmark/harness/bolt_client.py b/tests/macro_benchmark/harness/bolt_client.py deleted file mode 100644 index d29c51844..000000000 --- a/tests/macro_benchmark/harness/bolt_client.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python3 - -""" -A python script that launches the memgraph client, -executes a query and prints out a JSON dict of measurements -to stdout. - -Takes a number of cmd-line arguments of the following structure: - Positional, mandatory: - - db_uri - - query - Named, optional: - - encrypt - -Required the database URI to be passed as the single -cmd line argument. - -The dict that is printed out contains: - - return_code of the client execution process - - error_msg (empty if not applicable) - - metedata dict - -Note that 'metadata' are only valid if the return_code is 0 -""" - -import sys -import os -import time -import json -import io -from contextlib import redirect_stderr -from multiprocessing import Pool -from functools import partial - -# tests/stress dir, that's the place of common.py. -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( - os.path.realpath(__file__)))), "stress")) -from common import connection_argument_parser, execute_till_success, \ - argument_driver - - -# string constants -RETURN_CODE = "return_code" -ERROR_MSG = "error_msg" -WALL_TIME = "wall_time" - - -def _prepare_for_json(obj): - if isinstance(obj, dict): - return {k: _prepare_for_json(v) for k, v in obj.items()} - if isinstance(obj, list): - return [_prepare_for_json(elem) for elem in obj] - if isinstance(obj, (str, int, float, type(None))): - return obj - return None - - -def _print_dict(fname, d): - with open(fname, "a") as f: - f.write(json.dumps(_prepare_for_json(d), indent=2)) - - -def _run_query(args, query, self): - if not hasattr(self, "driver"): - # TODO: this driver and session is never closed. - self.driver = argument_driver(args) - self.session = self.driver.session() - return execute_till_success(self.session, query)[2] -_run_query.__defaults__ = (_run_query,) - - -def main(): - argp = connection_argument_parser() - argp.add_argument("--num-workers", type=int, default=1) - argp.add_argument("--output", type=str) - - # Parse args and ensure that stdout is not polluted by argument parsing. - try: - f = io.StringIO() - with redirect_stderr(f): - args = argp.parse_args() - except: - _print_dict(args.output, {RETURN_CODE: 1, ERROR_MSG: "Invalid cmd-line arguments"}) - sys.exit(1) - - queries = filter(lambda x: x.strip() != '', sys.stdin.read().split("\n")) - - # Execute the queries. - metadatas = [] - with Pool(args.num_workers) as pool: - start = time.time() - metadatas = list(pool.map(partial(_run_query, args), queries)) - end = time.time() - delta_time = end - start - - _print_dict(args.output, { - RETURN_CODE: 0, - WALL_TIME: (None if not queries else delta_time), - "metadatas": metadatas - }) - - -if __name__ == '__main__': - main() diff --git a/tests/macro_benchmark/harness/harness.py b/tests/macro_benchmark/harness/harness.py index 40376a807..c244c4104 100755 --- a/tests/macro_benchmark/harness/harness.py +++ b/tests/macro_benchmark/harness/harness.py @@ -319,10 +319,9 @@ class _BaseRunner: def _get_argparser(self): argp = ArgumentParser("RunnerArgumentParser") - # TODO: These two options should be passed two database and client, not - # only client as we are doing at the moment. + # TODO: This option should be passed to the database and client, not + # only to the client as we are doing at the moment. argp.add_argument("--RunnerUri", default="127.0.0.1:7687") - argp.add_argument("--RunnerEncryptBolt", action="store_true") return argp def start(self): @@ -331,14 +330,17 @@ class _BaseRunner: def execute(self, queries, num_client_workers): self.log.debug("execute('%s')", str(queries)) - client = path.join(DIR_PATH, "run_bolt_client") - client_args = ["--endpoint", self.args.RunnerUri] - client_args += ["--num-workers", str(num_client_workers)] - output_fd, output = tempfile.mkstemp() - os.close(output_fd) - client_args += ["--output", output] - if self.args.RunnerEncryptBolt: - client_args.append("--ssl-enabled") + + client = os.path.normpath(os.path.join(DIR_PATH, + "../../../build/tests/macro_benchmark/harness_client")) + if not os.path.exists(client): + # Apollo builds both debug and release binaries on diff + # so we need to use the release client if the debug one + # doesn't exist + client = os.path.normpath(os.path.join(DIR_PATH, + "../../../build_release/tests/macro_benchmark/" + "harness_client")) + queries_fd, queries_path = tempfile.mkstemp() try: queries_file = os.fdopen(queries_fd, "w") @@ -349,6 +351,14 @@ class _BaseRunner: os.remove(queries_path) raise Exception("Writing queries to temporary file failed") + output_fd, output = tempfile.mkstemp() + os.close(output_fd) + + address, port = self.args.RunnerUri.split(":") + client_args = ["--address", address, "--port", port, + "--num-workers", str(num_client_workers), + "--output", output] + cpu_time_start = self.database_bin.get_usage()["cpu"] # TODO make the timeout configurable per query or something return_code = self.bolt_client.run_and_wait( @@ -362,10 +372,14 @@ class _BaseRunner: "Failed with return_code %d and stderr:\n%s", str(queries), return_code, stderr) raise Exception("BoltClient execution failed") + with open(output) as f: data = json.loads(f.read()) - data[CPU_TIME] = cpu_time_end - cpu_time_start - return data + data[CPU_TIME] = cpu_time_end - cpu_time_start + + os.remove(output) + + return data def stop(self): self.log.info("stop") @@ -391,7 +405,7 @@ class MemgraphRunner(_BaseRunner): argp.add_argument("--RunnerConfig", default=os.path.normpath(os.path.join( DIR_PATH, - "../../../config/benchmarking.conf"))) + "../../../config/benchmarking_latency.conf"))) # parse args self.log.info("Initializing Runner with arguments %r", args) self.args, _ = argp.parse_known_args(args) diff --git a/tests/manual/harness_client.cpp b/tests/macro_benchmark/harness/harness_client.cpp similarity index 95% rename from tests/manual/harness_client.cpp rename to tests/macro_benchmark/harness/harness_client.cpp index 1a704ba78..ff5600c71 100644 --- a/tests/manual/harness_client.cpp +++ b/tests/macro_benchmark/harness/harness_client.cpp @@ -18,12 +18,12 @@ using DecodedValueT = communication::bolt::DecodedValue; DEFINE_string(address, "127.0.0.1", "Server address"); DEFINE_string(port, "7687", "Server port"); -DEFINE_uint64(num_workers, 1, "Number of workers"); +DEFINE_int32(num_workers, 1, "Number of workers"); DEFINE_string(output, "", "Output file"); DEFINE_string(username, "", "Username for the database"); DEFINE_string(password, "", "Password for the database"); -const uint64_t MAX_RETRIES = 1000; +const int MAX_RETRIES = 1000; void PrintJsonDecodedValue(std::ostream &os, const DecodedValueT &value) { switch (value.type()) { @@ -91,7 +91,7 @@ int main(int argc, char **argv) { std::string query; std::vector<std::thread> threads; - SpinLock mutex; + SpinLock spinlock; uint64_t last = 0; std::vector<std::string> queries; std::vector<std::map<std::string, DecodedValueT>> metadata; @@ -119,21 +119,21 @@ int main(int argc, char **argv) { ClientT client(std::move(socket), FLAGS_username, FLAGS_password); - uint64_t pos, i; std::string str; while (true) { + uint64_t pos; { - std::lock_guard<SpinLock> lock(mutex); + std::lock_guard<SpinLock> lock(spinlock); if (last == queries.size()) { break; } pos = last++; str = queries[pos]; } + int i; for (i = 0; i < MAX_RETRIES; ++i) { try { auto ret = client.Execute(str, {}); - std::lock_guard<SpinLock> lock(mutex); metadata[pos] = ret.metadata; break; } catch (const communication::bolt::ClientQueryException &e) { diff --git a/tests/macro_benchmark/harness/run_bolt_client b/tests/macro_benchmark/harness/run_bolt_client deleted file mode 100755 index f92c3074f..000000000 --- a/tests/macro_benchmark/harness/run_bolt_client +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $DIR -source ../ve3/bin/activate -python3 bolt_client.py $@ -exit $? diff --git a/tools/apollo/build_diff b/tools/apollo/build_diff index 26897d01d..a86e690f9 100644 --- a/tools/apollo/build_diff +++ b/tools/apollo/build_diff @@ -21,7 +21,7 @@ mkdir build_release cd build_release cmake -DCMAKE_BUILD_TYPE=release .. -TIMEOUT=1000 make -j$THREADS memgraph_link_target +TIMEOUT=1000 make -j$THREADS memgraph_link_target memgraph__macro_benchmark__harness_client cd ../../parent diff --git a/tools/apollo/generate b/tools/apollo/generate index 914fea54a..10fa60f52 100755 --- a/tools/apollo/generate +++ b/tools/apollo/generate @@ -192,9 +192,10 @@ binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph") # macro benchmark tests MACRO_BENCHMARK_ARGS = "QuerySuite MemgraphRunner --groups aggregation" macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark") -stress_common = os.path.join(BASE_DIR, "tests", "stress", "common.py") +harness_client_binary = os.path.join(BUILD_RELEASE_DIR, "tests", + "macro_benchmark", "harness_client") infile = create_archive("macro_benchmark", [binary_release_path, - macro_bench_path, stress_common, config_path], + macro_bench_path, config_path, harness_client_binary], cwd = WORKSPACE_DIR) supervisor = "./{}/tests/macro_benchmark/harness/harness.py".format(BASE_DIR_NAME) args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_release_path