Add a "compare_bench.py" tooling script. (#266)

This patch adds the compare_bench.py utility which can be used to compare the result of benchmarks.
The program is invoked like:

$ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]...
Where <old-benchmark> and <new-benchmark> either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
This commit is contained in:
Eric 2016-08-09 12:33:57 -06:00 committed by GitHub
parent de4ead7a53
commit 5eac66249c
6 changed files with 396 additions and 0 deletions

30
tools/compare_bench.py Executable file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
"""
compare_bench.py - Compare two benchmarks or their results and report the
difference.
"""
import sys
import gbench
from gbench import util, report
def main():
# Parse the command line flags
def usage():
print('compare_bench.py <test1> <test2> [benchmark options]...')
exit(1)
if '--help' in sys.argv or len(sys.argv) < 3:
usage()
tests = sys.argv[1:3]
bench_opts = sys.argv[3:]
bench_opts = list(bench_opts)
# Run the benchmarks and report the results
json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
output_lines = gbench.report.generate_difference_report(json1, json2)
print 'Comparing %s to %s' % (tests[0], tests[1])
for ln in output_lines:
print(ln)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,46 @@
{
"context": {
"date": "2016-08-02 17:44:46",
"num_cpus": 4,
"mhz_per_cpu": 4228,
"cpu_scaling_enabled": false,
"library_build_type": "release"
},
"benchmarks": [
{
"name": "BM_SameTimes",
"iterations": 1000,
"real_time": 10,
"cpu_time": 10,
"time_unit": "ns"
},
{
"name": "BM_2xFaster",
"iterations": 1000,
"real_time": 50,
"cpu_time": 50,
"time_unit": "ns"
},
{
"name": "BM_2xSlower",
"iterations": 1000,
"real_time": 50,
"cpu_time": 50,
"time_unit": "ns"
},
{
"name": "BM_10PercentFaster",
"iterations": 1000,
"real_time": 100,
"cpu_time": 100,
"time_unit": "ns"
},
{
"name": "BM_10PercentSlower",
"iterations": 1000,
"real_time": 100,
"cpu_time": 100,
"time_unit": "ns"
}
]
}

View File

@ -0,0 +1,46 @@
{
"context": {
"date": "2016-08-02 17:44:46",
"num_cpus": 4,
"mhz_per_cpu": 4228,
"cpu_scaling_enabled": false,
"library_build_type": "release"
},
"benchmarks": [
{
"name": "BM_SameTimes",
"iterations": 1000,
"real_time": 10,
"cpu_time": 10,
"time_unit": "ns"
},
{
"name": "BM_2xFaster",
"iterations": 1000,
"real_time": 25,
"cpu_time": 25,
"time_unit": "ns"
},
{
"name": "BM_2xSlower",
"iterations": 20833333,
"real_time": 100,
"cpu_time": 100,
"time_unit": "ns"
},
{
"name": "BM_10PercentFaster",
"iterations": 1000,
"real_time": 90,
"cpu_time": 90,
"time_unit": "ns"
},
{
"name": "BM_10PercentSlower",
"iterations": 1000,
"real_time": 110,
"cpu_time": 110,
"time_unit": "ns"
}
]
}

8
tools/gbench/__init__.py Normal file
View File

@ -0,0 +1,8 @@
"""Google Benchmark tooling"""
__author__ = 'Eric Fiselier'
__email__ = 'eric@efcs.ca'
__versioninfo__ = (0, 5, 0)
__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
__all__ = []

136
tools/gbench/report.py Normal file
View File

@ -0,0 +1,136 @@
"""report.py - Utilities for reporting statistics about benchmark results
"""
import os
class BenchmarkColor(object):
def __init__(self, name, code):
self.name = name
self.code = code
def __repr__(self):
return '%s%r' % (self.__class__.__name__,
(self.name, self.code))
def __format__(self, format):
return self.code
# Benchmark Colors Enumeration
BC_NONE = BenchmarkColor('NONE', '')
BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
def color_format(use_color, fmt_str, *args, **kwargs):
"""
Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
is False then all color codes in 'args' and 'kwargs' are replaced with
the empty string.
"""
assert use_color is True or use_color is False
if not use_color:
args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
for arg in args]
kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
for key, arg in kwargs.items()}
return fmt_str.format(*args, **kwargs)
def find_longest_name(benchmark_list):
"""
Return the length of the longest benchmark name in a given list of
benchmark JSON objects
"""
longest_name = 1
for bc in benchmark_list:
if len(bc['name']) > longest_name:
longest_name = len(bc['name'])
return longest_name
def calculate_change(old_val, new_val):
"""
Return a float representing the decimal change between old_val and new_val.
"""
return float(new_val - old_val) / abs(old_val)
def generate_difference_report(json1, json2, use_color=True):
"""
Calculate and report the difference between each test of two benchmarks
runs specified as 'json1' and 'json2'.
"""
first_col_width = find_longest_name(json1['benchmarks']) + 5
def find_test(name):
for b in json2['benchmarks']:
if b['name'] == name:
return b
return None
first_line = "{:<{}s} Time CPU".format(
'Benchmark', first_col_width)
output_strs = [first_line, '-' * len(first_line)]
for bn in json1['benchmarks']:
other_bench = find_test(bn['name'])
if not other_bench:
continue
def get_color(res):
if res > 0.05:
return BC_FAIL
elif res > -0.07:
return BC_WHITE
else:
return BC_CYAN
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}"
tres = calculate_change(bn['real_time'], other_bench['real_time'])
cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
output_strs += [color_format(use_color, fmt_str,
BC_HEADER, bn['name'], first_col_width,
get_color(tres), tres, get_color(cpures), cpures,
endc=BC_ENDC)]
return output_strs
###############################################################################
# Unit tests
import unittest
class TestReportDifference(unittest.TestCase):
def load_results(self):
import json
testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
testOutput1 = os.path.join(testInputs, 'test1_run1.json')
testOutput2 = os.path.join(testInputs, 'test1_run2.json')
with open(testOutput1, 'r') as f:
json1 = json.load(f)
with open(testOutput2, 'r') as f:
json2 = json.load(f)
return json1, json2
def test_basic(self):
expect_lines = [
['BM_SameTimes', '+0.00', '+0.00'],
['BM_2xFaster', '-0.50', '-0.50'],
['BM_2xSlower', '+1.00', '+1.00'],
['BM_10PercentFaster', '-0.10', '-0.10'],
['BM_10PercentSlower', '+0.10', '+0.10']
]
json1, json2 = self.load_results()
output_lines = generate_difference_report(json1, json2, use_color=False)
print output_lines
self.assertEqual(len(output_lines), len(expect_lines))
for i in xrange(0, len(output_lines)):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(len(parts), 3)
self.assertEqual(parts, expect_lines[i])
if __name__ == '__main__':
unittest.main()

130
tools/gbench/util.py Normal file
View File

@ -0,0 +1,130 @@
"""util.py - General utilities for running, loading, and processing benchmarks
"""
import json
import os
import tempfile
import subprocess
import sys
# Input file type enumeration
IT_Invalid = 0
IT_JSON = 1
IT_Executable = 2
_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
def is_executable_file(filename):
"""
Return 'True' if 'filename' names a valid file which is likely
an executable. A file is considered an executable if it starts with the
magic bytes for a EXE, Mach O, or ELF file.
"""
if not os.path.isfile(filename):
return False
with open(filename, 'r') as f:
magic_bytes = f.read(_num_magic_bytes)
if sys.platform == 'darwin':
return magic_bytes in [
'\xfe\xed\xfa\xce', # MH_MAGIC
'\xce\xfa\xed\xfe', # MH_CIGAM
'\xfe\xed\xfa\xcf', # MH_MAGIC_64
'\xcf\xfa\xed\xfe', # MH_CIGAM_64
'\xca\xfe\xba\xbe', # FAT_MAGIC
'\xbe\xba\xfe\xca' # FAT_CIGAM
]
elif sys.platform.startswith('win'):
return magic_bytes == 'MZ'
else:
return magic_bytes == '\x7FELF'
def is_json_file(filename):
"""
Returns 'True' if 'filename' names a valid JSON output file.
'False' otherwise.
"""
try:
with open(filename, 'r') as f:
json.load(f)
return True
except:
pass
return False
def classify_input_file(filename):
"""
Return a tuple (type, msg) where 'type' specifies the classified type
of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
string represeting the error.
"""
ftype = IT_Invalid
err_msg = None
if not os.path.exists(filename):
err_msg = "'%s' does not exist" % filename
elif not os.path.isfile(filename):
err_msg = "'%s' does not name a file" % filename
elif is_executable_file(filename):
ftype = IT_Executable
elif is_json_file(filename):
ftype = IT_JSON
else:
err_msg = "'%s' does not name a valid benchmark executable or JSON file"
return ftype, err_msg
def check_input_file(filename):
"""
Classify the file named by 'filename' and return the classification.
If the file is classified as 'IT_Invalid' print an error message and exit
the program.
"""
ftype, msg = classify_input_file(filename)
if ftype == IT_Invalid:
print "Invalid input file: %s" % msg
sys.exit(1)
return ftype
def load_benchmark_results(fname):
"""
Read benchmark output from a file and return the JSON object.
REQUIRES: 'fname' names a file containing JSON benchmark output.
"""
with open(fname, 'r') as f:
return json.load(f)
def run_benchmark(exe_name, benchmark_flags):
"""
Run a benchmark specified by 'exe_name' with the specified
'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
real time console output.
RETURNS: A JSON object representing the benchmark output
"""
thandle, tname = tempfile.mkstemp()
os.close(thandle)
cmd = [exe_name] + benchmark_flags
print("RUNNING: %s" % ' '.join(cmd))
exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
if exitCode != 0:
print('TEST FAILED...')
sys.exit(exitCode)
json_res = load_benchmark_results(tname)
os.unlink(tname)
return json_res
def run_or_load_benchmark(filename, benchmark_flags):
"""
Get the results for a specified benchmark. If 'filename' specifies
an executable benchmark then the results are generated by running the
benchmark. Otherwise 'filename' must name a valid JSON output file,
which is loaded and the result returned.
"""
ftype = check_input_file(filename)
if ftype == IT_JSON:
return load_benchmark_results(filename)
elif ftype == IT_Executable:
return run_benchmark(filename, benchmark_flags)
else:
assert False # This branch is unreachable