Changed macro benchmark summary format
Reviewers: buda, mtomic, mislav.bradac Reviewed By: mislav.bradac Subscribers: florijan, pullbot Differential Revision: https://phabricator.memgraph.io/D972
This commit is contained in:
parent
503381549e
commit
859641cb0c
2
.gitignore
vendored
2
.gitignore
vendored
@ -28,4 +28,4 @@ ve/
|
||||
ve3/
|
||||
perf.data*
|
||||
TAGS
|
||||
*.apollo_data
|
||||
*.apollo_measurements
|
||||
|
@ -242,46 +242,33 @@ def main():
|
||||
log.info("No scenarios to execute")
|
||||
return
|
||||
|
||||
results = []
|
||||
|
||||
# Run scenarios.
|
||||
log.info("Executing %d scenarios", len(filtered_scenarios))
|
||||
results = []
|
||||
for (group, scenario_name), scenario in sorted(filtered_scenarios.items()):
|
||||
log.info("Executing group.scenario '%s.%s' with elements %s",
|
||||
group, scenario_name, list(scenario.keys()))
|
||||
for iter_result in suite.run(scenario, group, scenario_name, runner):
|
||||
iter_result["group"] = group
|
||||
iter_result["scenario"] = scenario_name
|
||||
results.append(iter_result)
|
||||
|
||||
# Save results.
|
||||
run = dict()
|
||||
run["suite"] = args.suite
|
||||
run["runner"] = runner.__class__.__name__
|
||||
run["runner_config"] = vars(runner.args)
|
||||
run.update(args.additional_run_fields)
|
||||
|
||||
# Currently this output is not used anywhere, and has a tendancy to create huge files..
|
||||
# TODO(dgleich): Revise this in the near future.
|
||||
#for result in results:
|
||||
# jail.store_data(result)
|
||||
results.append(suite.run(scenario, group, scenario_name, runner))
|
||||
|
||||
# Print summary.
|
||||
print("\n\nMacro benchmark summary:")
|
||||
print("{}\n".format(suite.summary))
|
||||
|
||||
# Save data points.
|
||||
with open(get_absolute_path(".harness_summary"), "w") as f:
|
||||
print(suite.summary, file=f)
|
||||
json.dump({"results": results, "headers": suite.headers}, f)
|
||||
|
||||
# Export data points.
|
||||
with open(get_absolute_path(".apollo_data"), "w") as f:
|
||||
apollo_data = ""
|
||||
data = list(filter(lambda x: x.strip(), suite.summary.split("\n")))
|
||||
headers = data[0].strip().split()
|
||||
for row in data[1:]:
|
||||
row = row.strip().split()
|
||||
group, scenario = row[0:2]
|
||||
for header, value in zip(headers[2:], row[2:]):
|
||||
apollo_data += "{}.{}.{} {}\n".format(group, scenario, header, value)
|
||||
f.write(apollo_data)
|
||||
with open(get_absolute_path(".apollo_measurements"), "w") as f:
|
||||
headers = list(suite.headers)
|
||||
headers.remove("group_name")
|
||||
headers.remove("scenario_name")
|
||||
for row in results:
|
||||
group, scenario = row.pop("group_name"), row.pop("scenario_name")
|
||||
for header in headers:
|
||||
f.write("{}.{}.{} {:.20f}\n".format(group, scenario,
|
||||
header, row[header]["median"]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -7,7 +7,7 @@ import json
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
import tempfile
|
||||
from statistics import median
|
||||
from statistics import median, mean, stdev
|
||||
from common import get_absolute_path, WALL_TIME, CPU_TIME, MAX_MEMORY, APOLLO
|
||||
from databases import Memgraph, Neo
|
||||
from clients import QueryClient
|
||||
@ -28,10 +28,10 @@ class _QuerySuite:
|
||||
FORMAT = ["{:>24}", "{:>28}", "{:>16}", "{:>18}", "{:>22}",
|
||||
"{:>16}", "{:>16}", "{:>16}"]
|
||||
FULL_FORMAT = "".join(FORMAT) + "\n"
|
||||
summary = FULL_FORMAT.format(
|
||||
"group_name", "scenario_name", "parsing_time",
|
||||
"planning_time", "plan_execution_time",
|
||||
WALL_TIME, CPU_TIME, MAX_MEMORY)
|
||||
headers = ["group_name", "scenario_name", "parsing_time",
|
||||
"planning_time", "plan_execution_time",
|
||||
WALL_TIME, CPU_TIME, MAX_MEMORY]
|
||||
summary = summary_raw = FULL_FORMAT.format(*headers)
|
||||
|
||||
def __init__(self, args):
|
||||
argp = ArgumentParser("MemgraphRunnerArgumentParser")
|
||||
@ -55,21 +55,7 @@ class _QuerySuite:
|
||||
time.time() - start_time))
|
||||
return r_val
|
||||
|
||||
def add_measurement(dictionary, iteration, key):
|
||||
if key in dictionary:
|
||||
measurement = {"target": key,
|
||||
"value": float(dictionary[key]),
|
||||
"unit": "s",
|
||||
"type": "time",
|
||||
"iteration": iteration}
|
||||
measurements.append(measurement)
|
||||
try:
|
||||
measurement_lists[key].append(float(dictionary[key]))
|
||||
except:
|
||||
pass
|
||||
|
||||
measurements = []
|
||||
measurement_lists = defaultdict(list)
|
||||
measurements = defaultdict(list)
|
||||
|
||||
# Run the whole test three times because memgraph is sometimes
|
||||
# consistently slow and with this hack we get a good median
|
||||
@ -107,29 +93,42 @@ class _QuerySuite:
|
||||
run_result = execute("run")
|
||||
|
||||
if self.args.perf:
|
||||
self.perf_proc.terminate()
|
||||
self.perf_proc.terminate()
|
||||
self.perf_proc.wait()
|
||||
|
||||
add_measurement(run_result, iteration, CPU_TIME)
|
||||
add_measurement(run_result, iteration, MAX_MEMORY)
|
||||
measurements["cpu_time"].append(run_result["cpu_time"])
|
||||
measurements["max_memory"].append(run_result["max_memory"])
|
||||
|
||||
assert len(run_result["groups"]) == 1, \
|
||||
"Multiple groups in run step not yet supported"
|
||||
|
||||
group = run_result["groups"][0]
|
||||
add_measurement(group, iteration, WALL_TIME)
|
||||
for measurement in ["parsing_time",
|
||||
"plan_execution_time",
|
||||
"planning_time"] :
|
||||
measurements["wall_time"].append(group["wall_time"])
|
||||
|
||||
for key in ["parsing_time", "plan_execution_time",
|
||||
"planning_time"]:
|
||||
for i in range(len(group.get("metadatas", []))):
|
||||
add_measurement(group["metadatas"][i], iteration,
|
||||
measurement)
|
||||
if not key in group["metadatas"][i]: continue
|
||||
measurements[key].append(group["metadatas"][i][key])
|
||||
|
||||
execute("iterteardown")
|
||||
|
||||
# TODO value outlier detection and warning across iterations
|
||||
execute("teardown")
|
||||
runner.stop()
|
||||
|
||||
self.append_scenario_summary(group_name, scenario_name,
|
||||
measurement_lists, num_iterations)
|
||||
measurements, num_iterations)
|
||||
|
||||
# calculate mean, median and stdev of measurements
|
||||
for key in measurements:
|
||||
samples = measurements[key]
|
||||
measurements[key] = {"mean": mean(samples),
|
||||
"median": median(samples),
|
||||
"stdev": stdev(samples),
|
||||
"count": len(samples)}
|
||||
measurements["group_name"] = group_name
|
||||
measurements["scenario_name"] = scenario_name
|
||||
|
||||
return measurements
|
||||
|
||||
def append_scenario_summary(self, group_name, scenario_name,
|
||||
|
@ -1,59 +1,46 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
def convert2float(val):
|
||||
try:
|
||||
return float(val)
|
||||
except:
|
||||
return val
|
||||
|
||||
def parse_file(fname):
|
||||
def load_file(fname):
|
||||
with open(fname) as f:
|
||||
data = f.readlines()
|
||||
ret = []
|
||||
for row in data:
|
||||
row = row.strip()
|
||||
if row == "": continue
|
||||
ret.append(list(map(convert2float, row.split())))
|
||||
return ret
|
||||
data = f.read()
|
||||
try:
|
||||
return json.loads(data)
|
||||
except json.decoder.JSONDecodeError:
|
||||
return {"results": [], "headers": []}
|
||||
|
||||
def strip_integers(row):
|
||||
return list(filter(lambda x: type(x) == str, row))
|
||||
return {k: v for k, v in row.items() if type(v) == str}
|
||||
|
||||
def find_item(data, header, row):
|
||||
headers = data[0]
|
||||
row = strip_integers(row)
|
||||
pos_x = -1
|
||||
for i in range(len(data)):
|
||||
s = strip_integers(data[i])
|
||||
if s != row: continue
|
||||
pos_x = i
|
||||
break
|
||||
if pos_x == -1: return None
|
||||
pos_y = -1
|
||||
for j in range(len(headers)):
|
||||
if headers[j] != header: continue
|
||||
pos_y = j
|
||||
break
|
||||
if pos_y == -1: return None
|
||||
return data[pos_x][pos_y]
|
||||
def find_item(results_prev, header_cur, row_cur):
|
||||
row_cur = strip_integers(row_cur)
|
||||
row_prev = None
|
||||
for result in results_prev:
|
||||
s = strip_integers(result)
|
||||
if s == row_cur:
|
||||
row_prev = result
|
||||
break
|
||||
if row_prev is None: return None
|
||||
if not header_cur in row_prev: return None
|
||||
return row_prev[header_cur]
|
||||
|
||||
def compare_values(data_cur, data_prev):
|
||||
ret = []
|
||||
headers = data_cur[0]
|
||||
for i in range(len(data_cur)):
|
||||
def compare_values(headers_cur, results_cur, headers_prev, results_prev):
|
||||
ret = [list(map(lambda x: " ".join(x.split("_")).capitalize(),
|
||||
headers_cur))]
|
||||
for row_cur in results_cur:
|
||||
ret.append([])
|
||||
row_cur = data_cur[i]
|
||||
performance_change = False
|
||||
for j in range(len(row_cur)):
|
||||
item_cur = row_cur[j]
|
||||
for header in headers_cur:
|
||||
item_cur = row_cur[header]
|
||||
if type(item_cur) == str:
|
||||
item = " ".join(item_cur.split("_")).capitalize()
|
||||
else:
|
||||
item_prev = find_item(data_prev, headers[j], row_cur)
|
||||
if j != len(row_cur) - 1:
|
||||
value_cur = item_cur["median"]
|
||||
item_prev = find_item(results_prev, header, row_cur)
|
||||
if header != "max_memory":
|
||||
fmt = "{:.3f}ms"
|
||||
scale = 1000.0
|
||||
treshold = 0.050
|
||||
@ -61,27 +48,29 @@ def compare_values(data_cur, data_prev):
|
||||
fmt = "{:.2f}MiB"
|
||||
scale = 1.0 / 1024.0
|
||||
treshold = 0.025
|
||||
# TODO: add statistics check
|
||||
if item_prev != None:
|
||||
if item_prev != 0.0:
|
||||
diff = (item_cur - item_prev) / item_prev
|
||||
value_prev = item_prev["median"]
|
||||
if value_prev != 0.0:
|
||||
diff = (value_cur - value_prev) / value_prev
|
||||
else:
|
||||
diff = 0.0
|
||||
if diff < -treshold and item_cur > 0.0005:
|
||||
if diff < -treshold and value_cur > 0.0005:
|
||||
performance_change = True
|
||||
sign = " {icon arrow-down color=green}"
|
||||
elif diff > treshold and item_cur > 0.0005:
|
||||
elif diff > treshold and value_cur > 0.0005:
|
||||
performance_change = True
|
||||
sign = " {icon arrow-up color=red}"
|
||||
else:
|
||||
sign = ""
|
||||
fmt += " //({:+.2%})//{}"
|
||||
item = fmt.format(item_cur * scale, diff, sign)
|
||||
item = fmt.format(value_cur * scale, diff, sign)
|
||||
else:
|
||||
fmt += " //(new)// {{icon plus color=blue}}"
|
||||
item = fmt.format(item_cur * scale)
|
||||
item = fmt.format(value_cur * scale)
|
||||
performance_change = True
|
||||
ret[-1].append(item)
|
||||
if performance_change == False and i > 0: ret.pop()
|
||||
if not performance_change: ret.pop()
|
||||
return ret
|
||||
|
||||
def generate_remarkup(data):
|
||||
@ -113,15 +102,21 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
data_cur, data_prev = [], []
|
||||
for i, current in enumerate(args.current):
|
||||
off = 0 if i == 0 else 1
|
||||
data_cur += parse_file(current)[off:]
|
||||
for i, previous in enumerate(args.previous):
|
||||
off = 0 if i == 0 else 1
|
||||
data_prev += parse_file(previous)[off:]
|
||||
headers_cur, headers_prev = None, None
|
||||
results_cur, results_prev = [], []
|
||||
for current in args.current:
|
||||
data = load_file(current)
|
||||
if headers_cur is None:
|
||||
headers_cur = data["headers"]
|
||||
results_cur += data["results"]
|
||||
for previous in args.previous:
|
||||
data = load_file(previous)
|
||||
if headers_prev is None:
|
||||
headers_prev = data["headers"]
|
||||
results_prev += data["results"]
|
||||
|
||||
markup = generate_remarkup(compare_values(data_cur, data_prev))
|
||||
markup = generate_remarkup(compare_values(headers_cur, results_cur,
|
||||
headers_prev, results_prev))
|
||||
|
||||
if args.output == "":
|
||||
sys.stdout.write(markup)
|
||||
|
Loading…
Reference in New Issue
Block a user