Changed macro benchmark summary format

Reviewers: buda, mtomic, mislav.bradac

Reviewed By: mislav.bradac

Subscribers: florijan, pullbot

Differential Revision: https://phabricator.memgraph.io/D972
This commit is contained in:
Matej Ferencevic 2017-12-28 16:35:22 +01:00
parent 503381549e
commit 859641cb0c
4 changed files with 98 additions and 117 deletions

2
.gitignore vendored
View File

@ -28,4 +28,4 @@ ve/
ve3/
perf.data*
TAGS
*.apollo_data
*.apollo_measurements

View File

@ -242,46 +242,33 @@ def main():
log.info("No scenarios to execute")
return
results = []
# Run scenarios.
log.info("Executing %d scenarios", len(filtered_scenarios))
results = []
for (group, scenario_name), scenario in sorted(filtered_scenarios.items()):
log.info("Executing group.scenario '%s.%s' with elements %s",
group, scenario_name, list(scenario.keys()))
for iter_result in suite.run(scenario, group, scenario_name, runner):
iter_result["group"] = group
iter_result["scenario"] = scenario_name
results.append(iter_result)
# Save results.
run = dict()
run["suite"] = args.suite
run["runner"] = runner.__class__.__name__
run["runner_config"] = vars(runner.args)
run.update(args.additional_run_fields)
# Currently this output is not used anywhere, and has a tendancy to create huge files..
# TODO(dgleich): Revise this in the near future.
#for result in results:
# jail.store_data(result)
results.append(suite.run(scenario, group, scenario_name, runner))
# Print summary.
print("\n\nMacro benchmark summary:")
print("{}\n".format(suite.summary))
# Save data points.
with open(get_absolute_path(".harness_summary"), "w") as f:
print(suite.summary, file=f)
json.dump({"results": results, "headers": suite.headers}, f)
# Export data points.
with open(get_absolute_path(".apollo_data"), "w") as f:
apollo_data = ""
data = list(filter(lambda x: x.strip(), suite.summary.split("\n")))
headers = data[0].strip().split()
for row in data[1:]:
row = row.strip().split()
group, scenario = row[0:2]
for header, value in zip(headers[2:], row[2:]):
apollo_data += "{}.{}.{} {}\n".format(group, scenario, header, value)
f.write(apollo_data)
with open(get_absolute_path(".apollo_measurements"), "w") as f:
headers = list(suite.headers)
headers.remove("group_name")
headers.remove("scenario_name")
for row in results:
group, scenario = row.pop("group_name"), row.pop("scenario_name")
for header in headers:
f.write("{}.{}.{} {:.20f}\n".format(group, scenario,
header, row[header]["median"]))
if __name__ == "__main__":

View File

@ -7,7 +7,7 @@ import json
from argparse import ArgumentParser
from collections import defaultdict
import tempfile
from statistics import median
from statistics import median, mean, stdev
from common import get_absolute_path, WALL_TIME, CPU_TIME, MAX_MEMORY, APOLLO
from databases import Memgraph, Neo
from clients import QueryClient
@ -28,10 +28,10 @@ class _QuerySuite:
FORMAT = ["{:>24}", "{:>28}", "{:>16}", "{:>18}", "{:>22}",
"{:>16}", "{:>16}", "{:>16}"]
FULL_FORMAT = "".join(FORMAT) + "\n"
summary = FULL_FORMAT.format(
"group_name", "scenario_name", "parsing_time",
"planning_time", "plan_execution_time",
WALL_TIME, CPU_TIME, MAX_MEMORY)
headers = ["group_name", "scenario_name", "parsing_time",
"planning_time", "plan_execution_time",
WALL_TIME, CPU_TIME, MAX_MEMORY]
summary = summary_raw = FULL_FORMAT.format(*headers)
def __init__(self, args):
argp = ArgumentParser("MemgraphRunnerArgumentParser")
@ -55,21 +55,7 @@ class _QuerySuite:
time.time() - start_time))
return r_val
def add_measurement(dictionary, iteration, key):
if key in dictionary:
measurement = {"target": key,
"value": float(dictionary[key]),
"unit": "s",
"type": "time",
"iteration": iteration}
measurements.append(measurement)
try:
measurement_lists[key].append(float(dictionary[key]))
except:
pass
measurements = []
measurement_lists = defaultdict(list)
measurements = defaultdict(list)
# Run the whole test three times because memgraph is sometimes
# consistently slow and with this hack we get a good median
@ -107,29 +93,42 @@ class _QuerySuite:
run_result = execute("run")
if self.args.perf:
self.perf_proc.terminate()
self.perf_proc.terminate()
self.perf_proc.wait()
add_measurement(run_result, iteration, CPU_TIME)
add_measurement(run_result, iteration, MAX_MEMORY)
measurements["cpu_time"].append(run_result["cpu_time"])
measurements["max_memory"].append(run_result["max_memory"])
assert len(run_result["groups"]) == 1, \
"Multiple groups in run step not yet supported"
group = run_result["groups"][0]
add_measurement(group, iteration, WALL_TIME)
for measurement in ["parsing_time",
"plan_execution_time",
"planning_time"] :
measurements["wall_time"].append(group["wall_time"])
for key in ["parsing_time", "plan_execution_time",
"planning_time"]:
for i in range(len(group.get("metadatas", []))):
add_measurement(group["metadatas"][i], iteration,
measurement)
if not key in group["metadatas"][i]: continue
measurements[key].append(group["metadatas"][i][key])
execute("iterteardown")
# TODO value outlier detection and warning across iterations
execute("teardown")
runner.stop()
self.append_scenario_summary(group_name, scenario_name,
measurement_lists, num_iterations)
measurements, num_iterations)
# calculate mean, median and stdev of measurements
for key in measurements:
samples = measurements[key]
measurements[key] = {"mean": mean(samples),
"median": median(samples),
"stdev": stdev(samples),
"count": len(samples)}
measurements["group_name"] = group_name
measurements["scenario_name"] = scenario_name
return measurements
def append_scenario_summary(self, group_name, scenario_name,

View File

@ -1,59 +1,46 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import argparse
import json
import os
import sys
def convert2float(val):
try:
return float(val)
except:
return val
def parse_file(fname):
def load_file(fname):
with open(fname) as f:
data = f.readlines()
ret = []
for row in data:
row = row.strip()
if row == "": continue
ret.append(list(map(convert2float, row.split())))
return ret
data = f.read()
try:
return json.loads(data)
except json.decoder.JSONDecodeError:
return {"results": [], "headers": []}
def strip_integers(row):
return list(filter(lambda x: type(x) == str, row))
return {k: v for k, v in row.items() if type(v) == str}
def find_item(data, header, row):
headers = data[0]
row = strip_integers(row)
pos_x = -1
for i in range(len(data)):
s = strip_integers(data[i])
if s != row: continue
pos_x = i
break
if pos_x == -1: return None
pos_y = -1
for j in range(len(headers)):
if headers[j] != header: continue
pos_y = j
break
if pos_y == -1: return None
return data[pos_x][pos_y]
def find_item(results_prev, header_cur, row_cur):
row_cur = strip_integers(row_cur)
row_prev = None
for result in results_prev:
s = strip_integers(result)
if s == row_cur:
row_prev = result
break
if row_prev is None: return None
if not header_cur in row_prev: return None
return row_prev[header_cur]
def compare_values(data_cur, data_prev):
ret = []
headers = data_cur[0]
for i in range(len(data_cur)):
def compare_values(headers_cur, results_cur, headers_prev, results_prev):
ret = [list(map(lambda x: " ".join(x.split("_")).capitalize(),
headers_cur))]
for row_cur in results_cur:
ret.append([])
row_cur = data_cur[i]
performance_change = False
for j in range(len(row_cur)):
item_cur = row_cur[j]
for header in headers_cur:
item_cur = row_cur[header]
if type(item_cur) == str:
item = " ".join(item_cur.split("_")).capitalize()
else:
item_prev = find_item(data_prev, headers[j], row_cur)
if j != len(row_cur) - 1:
value_cur = item_cur["median"]
item_prev = find_item(results_prev, header, row_cur)
if header != "max_memory":
fmt = "{:.3f}ms"
scale = 1000.0
treshold = 0.050
@ -61,27 +48,29 @@ def compare_values(data_cur, data_prev):
fmt = "{:.2f}MiB"
scale = 1.0 / 1024.0
treshold = 0.025
# TODO: add statistics check
if item_prev != None:
if item_prev != 0.0:
diff = (item_cur - item_prev) / item_prev
value_prev = item_prev["median"]
if value_prev != 0.0:
diff = (value_cur - value_prev) / value_prev
else:
diff = 0.0
if diff < -treshold and item_cur > 0.0005:
if diff < -treshold and value_cur > 0.0005:
performance_change = True
sign = " {icon arrow-down color=green}"
elif diff > treshold and item_cur > 0.0005:
elif diff > treshold and value_cur > 0.0005:
performance_change = True
sign = " {icon arrow-up color=red}"
else:
sign = ""
fmt += " //({:+.2%})//{}"
item = fmt.format(item_cur * scale, diff, sign)
item = fmt.format(value_cur * scale, diff, sign)
else:
fmt += " //(new)// {{icon plus color=blue}}"
item = fmt.format(item_cur * scale)
item = fmt.format(value_cur * scale)
performance_change = True
ret[-1].append(item)
if performance_change == False and i > 0: ret.pop()
if not performance_change: ret.pop()
return ret
def generate_remarkup(data):
@ -113,15 +102,21 @@ if __name__ == "__main__":
args = parser.parse_args()
data_cur, data_prev = [], []
for i, current in enumerate(args.current):
off = 0 if i == 0 else 1
data_cur += parse_file(current)[off:]
for i, previous in enumerate(args.previous):
off = 0 if i == 0 else 1
data_prev += parse_file(previous)[off:]
headers_cur, headers_prev = None, None
results_cur, results_prev = [], []
for current in args.current:
data = load_file(current)
if headers_cur is None:
headers_cur = data["headers"]
results_cur += data["results"]
for previous in args.previous:
data = load_file(previous)
if headers_prev is None:
headers_prev = data["headers"]
results_prev += data["results"]
markup = generate_remarkup(compare_values(data_cur, data_prev))
markup = generate_remarkup(compare_values(headers_cur, results_cur,
headers_prev, results_prev))
if args.output == "":
sys.stdout.write(markup)