From e68f7ea53613b46b5e1a2aaf57e01b93dc921b3c Mon Sep 17 00:00:00 2001 From: Mislav Bradac Date: Wed, 30 Aug 2017 13:12:46 +0200 Subject: [PATCH] Use median instead of avg in harness Reviewers: mferencevic Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D725 --- tests/macro_benchmark/harness/harness.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/macro_benchmark/harness/harness.py b/tests/macro_benchmark/harness/harness.py index 41787b3c8..73f0fbb30 100755 --- a/tests/macro_benchmark/harness/harness.py +++ b/tests/macro_benchmark/harness/harness.py @@ -13,6 +13,7 @@ from collections import OrderedDict from collections import defaultdict import tempfile import shutil +from statistics import median try: import jail @@ -200,7 +201,7 @@ class _QuerySuite: measurements = [] - measurement_sums = defaultdict(float) + measurement_lists = defaultdict(list) def add_measurement(dictionary, iteration, key): if key in dictionary: @@ -211,7 +212,7 @@ class _QuerySuite: "iteration": iteration} measurements.append(measurement) try: - measurement_sums[key] += float(dictionary[key]) + measurement_lists[key].append(float(dictionary[key])) except: pass @@ -255,19 +256,20 @@ class _QuerySuite: execute("teardown") runner.stop() self.append_scenario_summary(group_name, scenario_name, - measurement_sums, num_iterations) + measurement_lists, num_iterations) return measurements def append_scenario_summary(self, group_name, scenario_name, - measurement_sums, num_iterations): + measurement_lists, num_iterations): self.summary += self.FORMAT[0].format(group_name) self.summary += self.FORMAT[1].format(scenario_name) for i, key in enumerate(("query_parsing_time", "query_planning_time", "query_plan_execution_time", WALL_TIME, CPU_TIME)): - if key not in measurement_sums: + if key not in measurement_lists: time = "-" else: - time = "{:.10f}".format(measurement_sums[key] / num_iterations) + # Median is used instead of avg to avoid effect of outliers. + time = "{:.10f}".format(median(measurement_lists[key])) self.summary += self.FORMAT[i + 2].format(time) self.summary += "\n"