mirror of
https://github.com/google/benchmark.git
synced 2025-04-04 00:20:38 +08:00
compare.py: sort the results (#1168)
Currently, the tooling just keeps the whatever benchmark order that was present, and this is fine nowadays, but once the benchmarks will be optionally run interleaved, that will be rather suboptimal. So, now that i have introduced family index and per-family instance index, we can define an order for the benchmarks, and sort them accordingly. There is a caveat with aggregates, we assume that they are in-order, and hopefully we won't mess that order up..
This commit is contained in:
parent
0c1da0a713
commit
6e32352c79
tools
@ -238,10 +238,10 @@ def main():
|
||||
options_contender = ['--benchmark_filter=%s' % filter_contender]
|
||||
|
||||
# Run the benchmarks and report the results
|
||||
json1 = json1_orig = gbench.util.run_or_load_benchmark(
|
||||
test_baseline, benchmark_options + options_baseline)
|
||||
json2 = json2_orig = gbench.util.run_or_load_benchmark(
|
||||
test_contender, benchmark_options + options_contender)
|
||||
json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
|
||||
test_baseline, benchmark_options + options_baseline))
|
||||
json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
|
||||
test_contender, benchmark_options + options_contender))
|
||||
|
||||
# Now, filter the benchmarks so that the difference report can work
|
||||
if filter_baseline and filter_contender:
|
||||
|
96
tools/gbench/Inputs/test4_run.json
Normal file
96
tools/gbench/Inputs/test4_run.json
Normal file
@ -0,0 +1,96 @@
|
||||
{
|
||||
"benchmarks": [
|
||||
{
|
||||
"name": "99 family 0 instance 0 repetition 0",
|
||||
"run_type": "iteration",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 0,
|
||||
"repetition_index": 0
|
||||
},
|
||||
{
|
||||
"name": "98 family 0 instance 0 repetition 1",
|
||||
"run_type": "iteration",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 0,
|
||||
"repetition_index": 1
|
||||
},
|
||||
{
|
||||
"name": "97 family 0 instance 0 aggregate",
|
||||
"run_type": "aggregate",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 0,
|
||||
"aggregate_name": "9 aggregate"
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"name": "96 family 0 instance 1 repetition 0",
|
||||
"run_type": "iteration",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 1,
|
||||
"repetition_index": 0
|
||||
},
|
||||
{
|
||||
"name": "95 family 0 instance 1 repetition 1",
|
||||
"run_type": "iteration",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 1,
|
||||
"repetition_index": 1
|
||||
},
|
||||
{
|
||||
"name": "94 family 0 instance 1 aggregate",
|
||||
"run_type": "aggregate",
|
||||
"family_index": 0,
|
||||
"per_family_instance_index": 1,
|
||||
"aggregate_name": "9 aggregate"
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
"name": "93 family 1 instance 0 repetition 0",
|
||||
"run_type": "iteration",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 0,
|
||||
"repetition_index": 0
|
||||
},
|
||||
{
|
||||
"name": "92 family 1 instance 0 repetition 1",
|
||||
"run_type": "iteration",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 0,
|
||||
"repetition_index": 1
|
||||
},
|
||||
{
|
||||
"name": "91 family 1 instance 0 aggregate",
|
||||
"run_type": "aggregate",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 0,
|
||||
"aggregate_name": "9 aggregate"
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"name": "90 family 1 instance 1 repetition 0",
|
||||
"run_type": "iteration",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 1,
|
||||
"repetition_index": 0
|
||||
},
|
||||
{
|
||||
"name": "89 family 1 instance 1 repetition 1",
|
||||
"run_type": "iteration",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 1,
|
||||
"repetition_index": 1
|
||||
},
|
||||
{
|
||||
"name": "88 family 1 instance 1 aggregate",
|
||||
"run_type": "aggregate",
|
||||
"family_index": 1,
|
||||
"per_family_instance_index": 1,
|
||||
"aggregate_name": "9 aggregate"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,9 +1,11 @@
|
||||
import unittest
|
||||
"""report.py - Utilities for reporting statistics about benchmark results
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import re
|
||||
import copy
|
||||
import random
|
||||
|
||||
from scipy.stats import mannwhitneyu
|
||||
|
||||
@ -912,6 +914,49 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
|
||||
assert_measurements(self, out, expected)
|
||||
|
||||
|
||||
class TestReportSorting(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
def load_result():
|
||||
import json
|
||||
testInputs = os.path.join(
|
||||
os.path.dirname(
|
||||
os.path.realpath(__file__)),
|
||||
'Inputs')
|
||||
testOutput = os.path.join(testInputs, 'test4_run.json')
|
||||
with open(testOutput, 'r') as f:
|
||||
json = json.load(f)
|
||||
return json
|
||||
|
||||
cls.json = load_result()
|
||||
|
||||
def test_json_diff_report_pretty_printing(self):
|
||||
import util
|
||||
|
||||
expected_names = [
|
||||
"99 family 0 instance 0 repetition 0",
|
||||
"98 family 0 instance 0 repetition 1",
|
||||
"97 family 0 instance 0 aggregate",
|
||||
"96 family 0 instance 1 repetition 0",
|
||||
"95 family 0 instance 1 repetition 1",
|
||||
"94 family 0 instance 1 aggregate",
|
||||
"93 family 1 instance 0 repetition 0",
|
||||
"92 family 1 instance 0 repetition 1",
|
||||
"91 family 1 instance 0 aggregate",
|
||||
"90 family 1 instance 1 repetition 0",
|
||||
"89 family 1 instance 1 repetition 1",
|
||||
"88 family 1 instance 1 aggregate"
|
||||
]
|
||||
|
||||
for n in range(len(self.json['benchmarks']) ** 2):
|
||||
random.shuffle(self.json['benchmarks'])
|
||||
sorted_benchmarks = util.sort_benchmark_results(self.json)[
|
||||
'benchmarks']
|
||||
self.assertEqual(len(expected_names), len(sorted_benchmarks))
|
||||
for out, expected in zip(sorted_benchmarks, expected_names):
|
||||
self.assertEqual(out['name'], expected)
|
||||
|
||||
|
||||
def assert_utest(unittest_instance, lhs, rhs):
|
||||
if lhs['utest']:
|
||||
unittest_instance.assertAlmostEqual(
|
||||
|
@ -5,6 +5,7 @@ import os
|
||||
import tempfile
|
||||
import subprocess
|
||||
import sys
|
||||
import functools
|
||||
|
||||
# Input file type enumeration
|
||||
IT_Invalid = 0
|
||||
@ -119,6 +120,23 @@ def load_benchmark_results(fname):
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def sort_benchmark_results(result):
|
||||
benchmarks = result['benchmarks']
|
||||
|
||||
# From inner key to the outer key!
|
||||
benchmarks = sorted(
|
||||
benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
|
||||
benchmarks = sorted(
|
||||
benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
|
||||
benchmarks = sorted(
|
||||
benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
|
||||
benchmarks = sorted(
|
||||
benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
|
||||
|
||||
result['benchmarks'] = benchmarks
|
||||
return result
|
||||
|
||||
|
||||
def run_benchmark(exe_name, benchmark_flags):
|
||||
"""
|
||||
Run a benchmark specified by 'exe_name' with the specified
|
||||
|
Loading…
Reference in New Issue
Block a user