Use median instead of avg in harness

Reviewers: mferencevic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D725
This commit is contained in:
Mislav Bradac 2017-08-30 13:12:46 +02:00
parent fe6d64066b
commit e68f7ea536

View File

@ -13,6 +13,7 @@ from collections import OrderedDict
from collections import defaultdict from collections import defaultdict
import tempfile import tempfile
import shutil import shutil
from statistics import median
try: try:
import jail import jail
@ -200,7 +201,7 @@ class _QuerySuite:
measurements = [] measurements = []
measurement_sums = defaultdict(float) measurement_lists = defaultdict(list)
def add_measurement(dictionary, iteration, key): def add_measurement(dictionary, iteration, key):
if key in dictionary: if key in dictionary:
@ -211,7 +212,7 @@ class _QuerySuite:
"iteration": iteration} "iteration": iteration}
measurements.append(measurement) measurements.append(measurement)
try: try:
measurement_sums[key] += float(dictionary[key]) measurement_lists[key].append(float(dictionary[key]))
except: except:
pass pass
@ -255,19 +256,20 @@ class _QuerySuite:
execute("teardown") execute("teardown")
runner.stop() runner.stop()
self.append_scenario_summary(group_name, scenario_name, self.append_scenario_summary(group_name, scenario_name,
measurement_sums, num_iterations) measurement_lists, num_iterations)
return measurements return measurements
def append_scenario_summary(self, group_name, scenario_name, def append_scenario_summary(self, group_name, scenario_name,
measurement_sums, num_iterations): measurement_lists, num_iterations):
self.summary += self.FORMAT[0].format(group_name) self.summary += self.FORMAT[0].format(group_name)
self.summary += self.FORMAT[1].format(scenario_name) self.summary += self.FORMAT[1].format(scenario_name)
for i, key in enumerate(("query_parsing_time", "query_planning_time", for i, key in enumerate(("query_parsing_time", "query_planning_time",
"query_plan_execution_time", WALL_TIME, CPU_TIME)): "query_plan_execution_time", WALL_TIME, CPU_TIME)):
if key not in measurement_sums: if key not in measurement_lists:
time = "-" time = "-"
else: else:
time = "{:.10f}".format(measurement_sums[key] / num_iterations) # Median is used instead of avg to avoid effect of outliers.
time = "{:.10f}".format(median(measurement_lists[key]))
self.summary += self.FORMAT[i + 2].format(time) self.summary += self.FORMAT[i + 2].format(time)
self.summary += "\n" self.summary += "\n"