From 859641cb0c3892d079925e6320c0acc938acd0d4 Mon Sep 17 00:00:00 2001 From: Matej Ferencevic <matej.ferencevic@memgraph.io> Date: Thu, 28 Dec 2017 16:35:22 +0100 Subject: [PATCH] Changed macro benchmark summary format Reviewers: buda, mtomic, mislav.bradac Reviewed By: mislav.bradac Subscribers: florijan, pullbot Differential Revision: https://phabricator.memgraph.io/D972 --- .gitignore | 2 +- tests/macro_benchmark/harness | 43 ++++------- tests/macro_benchmark/query_suite.py | 61 ++++++++------- tools/apollo/macro_benchmark_summary | 109 +++++++++++++-------------- 4 files changed, 98 insertions(+), 117 deletions(-) diff --git a/.gitignore b/.gitignore index 860d3da5b..108515286 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,4 @@ ve/ ve3/ perf.data* TAGS -*.apollo_data +*.apollo_measurements diff --git a/tests/macro_benchmark/harness b/tests/macro_benchmark/harness index 75e1e4aa2..523213396 100755 --- a/tests/macro_benchmark/harness +++ b/tests/macro_benchmark/harness @@ -242,46 +242,33 @@ def main(): log.info("No scenarios to execute") return + results = [] + # Run scenarios. log.info("Executing %d scenarios", len(filtered_scenarios)) - results = [] for (group, scenario_name), scenario in sorted(filtered_scenarios.items()): log.info("Executing group.scenario '%s.%s' with elements %s", group, scenario_name, list(scenario.keys())) - for iter_result in suite.run(scenario, group, scenario_name, runner): - iter_result["group"] = group - iter_result["scenario"] = scenario_name - results.append(iter_result) - - # Save results. - run = dict() - run["suite"] = args.suite - run["runner"] = runner.__class__.__name__ - run["runner_config"] = vars(runner.args) - run.update(args.additional_run_fields) - - # Currently this output is not used anywhere, and has a tendancy to create huge files.. - # TODO(dgleich): Revise this in the near future. - #for result in results: - # jail.store_data(result) + results.append(suite.run(scenario, group, scenario_name, runner)) # Print summary. print("\n\nMacro benchmark summary:") print("{}\n".format(suite.summary)) + + # Save data points. with open(get_absolute_path(".harness_summary"), "w") as f: - print(suite.summary, file=f) + json.dump({"results": results, "headers": suite.headers}, f) # Export data points. - with open(get_absolute_path(".apollo_data"), "w") as f: - apollo_data = "" - data = list(filter(lambda x: x.strip(), suite.summary.split("\n"))) - headers = data[0].strip().split() - for row in data[1:]: - row = row.strip().split() - group, scenario = row[0:2] - for header, value in zip(headers[2:], row[2:]): - apollo_data += "{}.{}.{} {}\n".format(group, scenario, header, value) - f.write(apollo_data) + with open(get_absolute_path(".apollo_measurements"), "w") as f: + headers = list(suite.headers) + headers.remove("group_name") + headers.remove("scenario_name") + for row in results: + group, scenario = row.pop("group_name"), row.pop("scenario_name") + for header in headers: + f.write("{}.{}.{} {:.20f}\n".format(group, scenario, + header, row[header]["median"])) if __name__ == "__main__": diff --git a/tests/macro_benchmark/query_suite.py b/tests/macro_benchmark/query_suite.py index 0fc208e55..4b9bd6026 100644 --- a/tests/macro_benchmark/query_suite.py +++ b/tests/macro_benchmark/query_suite.py @@ -7,7 +7,7 @@ import json from argparse import ArgumentParser from collections import defaultdict import tempfile -from statistics import median +from statistics import median, mean, stdev from common import get_absolute_path, WALL_TIME, CPU_TIME, MAX_MEMORY, APOLLO from databases import Memgraph, Neo from clients import QueryClient @@ -28,10 +28,10 @@ class _QuerySuite: FORMAT = ["{:>24}", "{:>28}", "{:>16}", "{:>18}", "{:>22}", "{:>16}", "{:>16}", "{:>16}"] FULL_FORMAT = "".join(FORMAT) + "\n" - summary = FULL_FORMAT.format( - "group_name", "scenario_name", "parsing_time", - "planning_time", "plan_execution_time", - WALL_TIME, CPU_TIME, MAX_MEMORY) + headers = ["group_name", "scenario_name", "parsing_time", + "planning_time", "plan_execution_time", + WALL_TIME, CPU_TIME, MAX_MEMORY] + summary = summary_raw = FULL_FORMAT.format(*headers) def __init__(self, args): argp = ArgumentParser("MemgraphRunnerArgumentParser") @@ -55,21 +55,7 @@ class _QuerySuite: time.time() - start_time)) return r_val - def add_measurement(dictionary, iteration, key): - if key in dictionary: - measurement = {"target": key, - "value": float(dictionary[key]), - "unit": "s", - "type": "time", - "iteration": iteration} - measurements.append(measurement) - try: - measurement_lists[key].append(float(dictionary[key])) - except: - pass - - measurements = [] - measurement_lists = defaultdict(list) + measurements = defaultdict(list) # Run the whole test three times because memgraph is sometimes # consistently slow and with this hack we get a good median @@ -107,29 +93,42 @@ class _QuerySuite: run_result = execute("run") if self.args.perf: - self.perf_proc.terminate() + self.perf_proc.terminate() self.perf_proc.wait() - add_measurement(run_result, iteration, CPU_TIME) - add_measurement(run_result, iteration, MAX_MEMORY) + measurements["cpu_time"].append(run_result["cpu_time"]) + measurements["max_memory"].append(run_result["max_memory"]) + assert len(run_result["groups"]) == 1, \ "Multiple groups in run step not yet supported" + group = run_result["groups"][0] - add_measurement(group, iteration, WALL_TIME) - for measurement in ["parsing_time", - "plan_execution_time", - "planning_time"] : + measurements["wall_time"].append(group["wall_time"]) + + for key in ["parsing_time", "plan_execution_time", + "planning_time"]: for i in range(len(group.get("metadatas", []))): - add_measurement(group["metadatas"][i], iteration, - measurement) + if not key in group["metadatas"][i]: continue + measurements[key].append(group["metadatas"][i][key]) + execute("iterteardown") - # TODO value outlier detection and warning across iterations execute("teardown") runner.stop() self.append_scenario_summary(group_name, scenario_name, - measurement_lists, num_iterations) + measurements, num_iterations) + + # calculate mean, median and stdev of measurements + for key in measurements: + samples = measurements[key] + measurements[key] = {"mean": mean(samples), + "median": median(samples), + "stdev": stdev(samples), + "count": len(samples)} + measurements["group_name"] = group_name + measurements["scenario_name"] = scenario_name + return measurements def append_scenario_summary(self, group_name, scenario_name, diff --git a/tools/apollo/macro_benchmark_summary b/tools/apollo/macro_benchmark_summary index 040608d07..78bc5c549 100755 --- a/tools/apollo/macro_benchmark_summary +++ b/tools/apollo/macro_benchmark_summary @@ -1,59 +1,46 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import argparse +import json import os import sys -def convert2float(val): - try: - return float(val) - except: - return val - -def parse_file(fname): +def load_file(fname): with open(fname) as f: - data = f.readlines() - ret = [] - for row in data: - row = row.strip() - if row == "": continue - ret.append(list(map(convert2float, row.split()))) - return ret + data = f.read() + try: + return json.loads(data) + except json.decoder.JSONDecodeError: + return {"results": [], "headers": []} def strip_integers(row): - return list(filter(lambda x: type(x) == str, row)) + return {k: v for k, v in row.items() if type(v) == str} -def find_item(data, header, row): - headers = data[0] - row = strip_integers(row) - pos_x = -1 - for i in range(len(data)): - s = strip_integers(data[i]) - if s != row: continue - pos_x = i - break - if pos_x == -1: return None - pos_y = -1 - for j in range(len(headers)): - if headers[j] != header: continue - pos_y = j - break - if pos_y == -1: return None - return data[pos_x][pos_y] +def find_item(results_prev, header_cur, row_cur): + row_cur = strip_integers(row_cur) + row_prev = None + for result in results_prev: + s = strip_integers(result) + if s == row_cur: + row_prev = result + break + if row_prev is None: return None + if not header_cur in row_prev: return None + return row_prev[header_cur] -def compare_values(data_cur, data_prev): - ret = [] - headers = data_cur[0] - for i in range(len(data_cur)): +def compare_values(headers_cur, results_cur, headers_prev, results_prev): + ret = [list(map(lambda x: " ".join(x.split("_")).capitalize(), + headers_cur))] + for row_cur in results_cur: ret.append([]) - row_cur = data_cur[i] performance_change = False - for j in range(len(row_cur)): - item_cur = row_cur[j] + for header in headers_cur: + item_cur = row_cur[header] if type(item_cur) == str: item = " ".join(item_cur.split("_")).capitalize() else: - item_prev = find_item(data_prev, headers[j], row_cur) - if j != len(row_cur) - 1: + value_cur = item_cur["median"] + item_prev = find_item(results_prev, header, row_cur) + if header != "max_memory": fmt = "{:.3f}ms" scale = 1000.0 treshold = 0.050 @@ -61,27 +48,29 @@ def compare_values(data_cur, data_prev): fmt = "{:.2f}MiB" scale = 1.0 / 1024.0 treshold = 0.025 + # TODO: add statistics check if item_prev != None: - if item_prev != 0.0: - diff = (item_cur - item_prev) / item_prev + value_prev = item_prev["median"] + if value_prev != 0.0: + diff = (value_cur - value_prev) / value_prev else: diff = 0.0 - if diff < -treshold and item_cur > 0.0005: + if diff < -treshold and value_cur > 0.0005: performance_change = True sign = " {icon arrow-down color=green}" - elif diff > treshold and item_cur > 0.0005: + elif diff > treshold and value_cur > 0.0005: performance_change = True sign = " {icon arrow-up color=red}" else: sign = "" fmt += " //({:+.2%})//{}" - item = fmt.format(item_cur * scale, diff, sign) + item = fmt.format(value_cur * scale, diff, sign) else: fmt += " //(new)// {{icon plus color=blue}}" - item = fmt.format(item_cur * scale) + item = fmt.format(value_cur * scale) performance_change = True ret[-1].append(item) - if performance_change == False and i > 0: ret.pop() + if not performance_change: ret.pop() return ret def generate_remarkup(data): @@ -113,15 +102,21 @@ if __name__ == "__main__": args = parser.parse_args() - data_cur, data_prev = [], [] - for i, current in enumerate(args.current): - off = 0 if i == 0 else 1 - data_cur += parse_file(current)[off:] - for i, previous in enumerate(args.previous): - off = 0 if i == 0 else 1 - data_prev += parse_file(previous)[off:] + headers_cur, headers_prev = None, None + results_cur, results_prev = [], [] + for current in args.current: + data = load_file(current) + if headers_cur is None: + headers_cur = data["headers"] + results_cur += data["results"] + for previous in args.previous: + data = load_file(previous) + if headers_prev is None: + headers_prev = data["headers"] + results_prev += data["results"] - markup = generate_remarkup(compare_values(data_cur, data_prev)) + markup = generate_remarkup(compare_values(headers_cur, results_cur, + headers_prev, results_prev)) if args.output == "": sys.stdout.write(markup)