mirror of
https://github.com/google/benchmark.git
synced 2025-03-14 03:10:22 +08:00
Add a "compare_bench.py" tooling script. (#266)
This patch adds the compare_bench.py utility which can be used to compare the result of benchmarks. The program is invoked like: $ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]... Where <old-benchmark> and <new-benchmark> either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
This commit is contained in:
parent
de4ead7a53
commit
5eac66249c
30
tools/compare_bench.py
Executable file
30
tools/compare_bench.py
Executable file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
compare_bench.py - Compare two benchmarks or their results and report the
|
||||
difference.
|
||||
"""
|
||||
import sys
|
||||
import gbench
|
||||
from gbench import util, report
|
||||
|
||||
def main():
|
||||
# Parse the command line flags
|
||||
def usage():
|
||||
print('compare_bench.py <test1> <test2> [benchmark options]...')
|
||||
exit(1)
|
||||
if '--help' in sys.argv or len(sys.argv) < 3:
|
||||
usage()
|
||||
tests = sys.argv[1:3]
|
||||
bench_opts = sys.argv[3:]
|
||||
bench_opts = list(bench_opts)
|
||||
# Run the benchmarks and report the results
|
||||
json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
|
||||
json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
|
||||
output_lines = gbench.report.generate_difference_report(json1, json2)
|
||||
print 'Comparing %s to %s' % (tests[0], tests[1])
|
||||
for ln in output_lines:
|
||||
print(ln)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
46
tools/gbench/Inputs/test1_run1.json
Normal file
46
tools/gbench/Inputs/test1_run1.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"context": {
|
||||
"date": "2016-08-02 17:44:46",
|
||||
"num_cpus": 4,
|
||||
"mhz_per_cpu": 4228,
|
||||
"cpu_scaling_enabled": false,
|
||||
"library_build_type": "release"
|
||||
},
|
||||
"benchmarks": [
|
||||
{
|
||||
"name": "BM_SameTimes",
|
||||
"iterations": 1000,
|
||||
"real_time": 10,
|
||||
"cpu_time": 10,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_2xFaster",
|
||||
"iterations": 1000,
|
||||
"real_time": 50,
|
||||
"cpu_time": 50,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_2xSlower",
|
||||
"iterations": 1000,
|
||||
"real_time": 50,
|
||||
"cpu_time": 50,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_10PercentFaster",
|
||||
"iterations": 1000,
|
||||
"real_time": 100,
|
||||
"cpu_time": 100,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_10PercentSlower",
|
||||
"iterations": 1000,
|
||||
"real_time": 100,
|
||||
"cpu_time": 100,
|
||||
"time_unit": "ns"
|
||||
}
|
||||
]
|
||||
}
|
46
tools/gbench/Inputs/test1_run2.json
Normal file
46
tools/gbench/Inputs/test1_run2.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"context": {
|
||||
"date": "2016-08-02 17:44:46",
|
||||
"num_cpus": 4,
|
||||
"mhz_per_cpu": 4228,
|
||||
"cpu_scaling_enabled": false,
|
||||
"library_build_type": "release"
|
||||
},
|
||||
"benchmarks": [
|
||||
{
|
||||
"name": "BM_SameTimes",
|
||||
"iterations": 1000,
|
||||
"real_time": 10,
|
||||
"cpu_time": 10,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_2xFaster",
|
||||
"iterations": 1000,
|
||||
"real_time": 25,
|
||||
"cpu_time": 25,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_2xSlower",
|
||||
"iterations": 20833333,
|
||||
"real_time": 100,
|
||||
"cpu_time": 100,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_10PercentFaster",
|
||||
"iterations": 1000,
|
||||
"real_time": 90,
|
||||
"cpu_time": 90,
|
||||
"time_unit": "ns"
|
||||
},
|
||||
{
|
||||
"name": "BM_10PercentSlower",
|
||||
"iterations": 1000,
|
||||
"real_time": 110,
|
||||
"cpu_time": 110,
|
||||
"time_unit": "ns"
|
||||
}
|
||||
]
|
||||
}
|
8
tools/gbench/__init__.py
Normal file
8
tools/gbench/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""Google Benchmark tooling"""
|
||||
|
||||
__author__ = 'Eric Fiselier'
|
||||
__email__ = 'eric@efcs.ca'
|
||||
__versioninfo__ = (0, 5, 0)
|
||||
__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
|
||||
|
||||
__all__ = []
|
136
tools/gbench/report.py
Normal file
136
tools/gbench/report.py
Normal file
@ -0,0 +1,136 @@
|
||||
"""report.py - Utilities for reporting statistics about benchmark results
|
||||
"""
|
||||
import os
|
||||
|
||||
class BenchmarkColor(object):
|
||||
def __init__(self, name, code):
|
||||
self.name = name
|
||||
self.code = code
|
||||
|
||||
def __repr__(self):
|
||||
return '%s%r' % (self.__class__.__name__,
|
||||
(self.name, self.code))
|
||||
|
||||
def __format__(self, format):
|
||||
return self.code
|
||||
|
||||
# Benchmark Colors Enumeration
|
||||
BC_NONE = BenchmarkColor('NONE', '')
|
||||
BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
|
||||
BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
|
||||
BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
|
||||
BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
|
||||
BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
|
||||
BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
|
||||
BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
|
||||
BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
|
||||
BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
|
||||
BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
|
||||
|
||||
def color_format(use_color, fmt_str, *args, **kwargs):
|
||||
"""
|
||||
Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
|
||||
'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
|
||||
is False then all color codes in 'args' and 'kwargs' are replaced with
|
||||
the empty string.
|
||||
"""
|
||||
assert use_color is True or use_color is False
|
||||
if not use_color:
|
||||
args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
|
||||
for arg in args]
|
||||
kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
|
||||
for key, arg in kwargs.items()}
|
||||
return fmt_str.format(*args, **kwargs)
|
||||
|
||||
|
||||
def find_longest_name(benchmark_list):
|
||||
"""
|
||||
Return the length of the longest benchmark name in a given list of
|
||||
benchmark JSON objects
|
||||
"""
|
||||
longest_name = 1
|
||||
for bc in benchmark_list:
|
||||
if len(bc['name']) > longest_name:
|
||||
longest_name = len(bc['name'])
|
||||
return longest_name
|
||||
|
||||
|
||||
def calculate_change(old_val, new_val):
|
||||
"""
|
||||
Return a float representing the decimal change between old_val and new_val.
|
||||
"""
|
||||
return float(new_val - old_val) / abs(old_val)
|
||||
|
||||
|
||||
def generate_difference_report(json1, json2, use_color=True):
|
||||
"""
|
||||
Calculate and report the difference between each test of two benchmarks
|
||||
runs specified as 'json1' and 'json2'.
|
||||
"""
|
||||
first_col_width = find_longest_name(json1['benchmarks']) + 5
|
||||
def find_test(name):
|
||||
for b in json2['benchmarks']:
|
||||
if b['name'] == name:
|
||||
return b
|
||||
return None
|
||||
first_line = "{:<{}s} Time CPU".format(
|
||||
'Benchmark', first_col_width)
|
||||
output_strs = [first_line, '-' * len(first_line)]
|
||||
for bn in json1['benchmarks']:
|
||||
other_bench = find_test(bn['name'])
|
||||
if not other_bench:
|
||||
continue
|
||||
|
||||
def get_color(res):
|
||||
if res > 0.05:
|
||||
return BC_FAIL
|
||||
elif res > -0.07:
|
||||
return BC_WHITE
|
||||
else:
|
||||
return BC_CYAN
|
||||
fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}"
|
||||
tres = calculate_change(bn['real_time'], other_bench['real_time'])
|
||||
cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
|
||||
output_strs += [color_format(use_color, fmt_str,
|
||||
BC_HEADER, bn['name'], first_col_width,
|
||||
get_color(tres), tres, get_color(cpures), cpures,
|
||||
endc=BC_ENDC)]
|
||||
return output_strs
|
||||
|
||||
###############################################################################
|
||||
# Unit tests
|
||||
|
||||
import unittest
|
||||
|
||||
class TestReportDifference(unittest.TestCase):
|
||||
def load_results(self):
|
||||
import json
|
||||
testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
|
||||
testOutput1 = os.path.join(testInputs, 'test1_run1.json')
|
||||
testOutput2 = os.path.join(testInputs, 'test1_run2.json')
|
||||
with open(testOutput1, 'r') as f:
|
||||
json1 = json.load(f)
|
||||
with open(testOutput2, 'r') as f:
|
||||
json2 = json.load(f)
|
||||
return json1, json2
|
||||
|
||||
def test_basic(self):
|
||||
expect_lines = [
|
||||
['BM_SameTimes', '+0.00', '+0.00'],
|
||||
['BM_2xFaster', '-0.50', '-0.50'],
|
||||
['BM_2xSlower', '+1.00', '+1.00'],
|
||||
['BM_10PercentFaster', '-0.10', '-0.10'],
|
||||
['BM_10PercentSlower', '+0.10', '+0.10']
|
||||
]
|
||||
json1, json2 = self.load_results()
|
||||
output_lines = generate_difference_report(json1, json2, use_color=False)
|
||||
print output_lines
|
||||
self.assertEqual(len(output_lines), len(expect_lines))
|
||||
for i in xrange(0, len(output_lines)):
|
||||
parts = [x for x in output_lines[i].split(' ') if x]
|
||||
self.assertEqual(len(parts), 3)
|
||||
self.assertEqual(parts, expect_lines[i])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
130
tools/gbench/util.py
Normal file
130
tools/gbench/util.py
Normal file
@ -0,0 +1,130 @@
|
||||
"""util.py - General utilities for running, loading, and processing benchmarks
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# Input file type enumeration
|
||||
IT_Invalid = 0
|
||||
IT_JSON = 1
|
||||
IT_Executable = 2
|
||||
|
||||
_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
|
||||
def is_executable_file(filename):
|
||||
"""
|
||||
Return 'True' if 'filename' names a valid file which is likely
|
||||
an executable. A file is considered an executable if it starts with the
|
||||
magic bytes for a EXE, Mach O, or ELF file.
|
||||
"""
|
||||
if not os.path.isfile(filename):
|
||||
return False
|
||||
with open(filename, 'r') as f:
|
||||
magic_bytes = f.read(_num_magic_bytes)
|
||||
if sys.platform == 'darwin':
|
||||
return magic_bytes in [
|
||||
'\xfe\xed\xfa\xce', # MH_MAGIC
|
||||
'\xce\xfa\xed\xfe', # MH_CIGAM
|
||||
'\xfe\xed\xfa\xcf', # MH_MAGIC_64
|
||||
'\xcf\xfa\xed\xfe', # MH_CIGAM_64
|
||||
'\xca\xfe\xba\xbe', # FAT_MAGIC
|
||||
'\xbe\xba\xfe\xca' # FAT_CIGAM
|
||||
]
|
||||
elif sys.platform.startswith('win'):
|
||||
return magic_bytes == 'MZ'
|
||||
else:
|
||||
return magic_bytes == '\x7FELF'
|
||||
|
||||
|
||||
def is_json_file(filename):
|
||||
"""
|
||||
Returns 'True' if 'filename' names a valid JSON output file.
|
||||
'False' otherwise.
|
||||
"""
|
||||
try:
|
||||
with open(filename, 'r') as f:
|
||||
json.load(f)
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def classify_input_file(filename):
|
||||
"""
|
||||
Return a tuple (type, msg) where 'type' specifies the classified type
|
||||
of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
|
||||
string represeting the error.
|
||||
"""
|
||||
ftype = IT_Invalid
|
||||
err_msg = None
|
||||
if not os.path.exists(filename):
|
||||
err_msg = "'%s' does not exist" % filename
|
||||
elif not os.path.isfile(filename):
|
||||
err_msg = "'%s' does not name a file" % filename
|
||||
elif is_executable_file(filename):
|
||||
ftype = IT_Executable
|
||||
elif is_json_file(filename):
|
||||
ftype = IT_JSON
|
||||
else:
|
||||
err_msg = "'%s' does not name a valid benchmark executable or JSON file"
|
||||
return ftype, err_msg
|
||||
|
||||
|
||||
def check_input_file(filename):
|
||||
"""
|
||||
Classify the file named by 'filename' and return the classification.
|
||||
If the file is classified as 'IT_Invalid' print an error message and exit
|
||||
the program.
|
||||
"""
|
||||
ftype, msg = classify_input_file(filename)
|
||||
if ftype == IT_Invalid:
|
||||
print "Invalid input file: %s" % msg
|
||||
sys.exit(1)
|
||||
return ftype
|
||||
|
||||
|
||||
def load_benchmark_results(fname):
|
||||
"""
|
||||
Read benchmark output from a file and return the JSON object.
|
||||
REQUIRES: 'fname' names a file containing JSON benchmark output.
|
||||
"""
|
||||
with open(fname, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def run_benchmark(exe_name, benchmark_flags):
|
||||
"""
|
||||
Run a benchmark specified by 'exe_name' with the specified
|
||||
'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
|
||||
real time console output.
|
||||
RETURNS: A JSON object representing the benchmark output
|
||||
"""
|
||||
thandle, tname = tempfile.mkstemp()
|
||||
os.close(thandle)
|
||||
cmd = [exe_name] + benchmark_flags
|
||||
print("RUNNING: %s" % ' '.join(cmd))
|
||||
exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
|
||||
if exitCode != 0:
|
||||
print('TEST FAILED...')
|
||||
sys.exit(exitCode)
|
||||
json_res = load_benchmark_results(tname)
|
||||
os.unlink(tname)
|
||||
return json_res
|
||||
|
||||
|
||||
def run_or_load_benchmark(filename, benchmark_flags):
|
||||
"""
|
||||
Get the results for a specified benchmark. If 'filename' specifies
|
||||
an executable benchmark then the results are generated by running the
|
||||
benchmark. Otherwise 'filename' must name a valid JSON output file,
|
||||
which is loaded and the result returned.
|
||||
"""
|
||||
ftype = check_input_file(filename)
|
||||
if ftype == IT_JSON:
|
||||
return load_benchmark_results(filename)
|
||||
elif ftype == IT_Executable:
|
||||
return run_benchmark(filename, benchmark_flags)
|
||||
else:
|
||||
assert False # This branch is unreachable
|
Loading…
Reference in New Issue
Block a user