Add a "compare_bench.py" tooling script. (#266)

This patch adds the compare_bench.py utility which can be used to compare the result of benchmarks. The program is invoked like: $ compare_bench.py <old-benchmark> <new-benchmark> [benchmark options]... Where <old-benchmark> and <new-benchmark> either specify a benchmark executable file, or a JSON output file. The type of the input file is automatically detected. If a benchmark executable is specified then the benchmark is run to obtain the results. Otherwise the results are simply loaded from the output file.
2025-04-29 22:40:33 +08:00 · 2016-08-09 12:33:57 -06:00 · 2016-08-09 12:33:57 -06:00 · 5eac66249c
commit 5eac66249c
parent de4ead7a53
6 changed files with 396 additions and 0 deletions
--- a/tools/compare_bench.py
+++ b/tools/compare_bench.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+compare_bench.py - Compare two benchmarks or their results and report the
+                   difference.
+"""
+import sys
+import gbench
+from gbench import util, report
+
+def main():
+    # Parse the command line flags
+    def usage():
+        print('compare_bench.py <test1> <test2> [benchmark options]...')
+        exit(1)
+    if '--help' in sys.argv or len(sys.argv) < 3:
+        usage()
+    tests = sys.argv[1:3]
+    bench_opts = sys.argv[3:]
+    bench_opts = list(bench_opts)
+    # Run the benchmarks and report the results
+    json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
+    json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print 'Comparing %s to %s' % (tests[0], tests[1])
+    for ln in output_lines:
+        print(ln)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/gbench/Inputs/test1_run1.json
+++ b/tools/gbench/Inputs/test1_run1.json
@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/tools/gbench/Inputs/test1_run2.json
+++ b/tools/gbench/Inputs/test1_run2.json
@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 25,
+      "cpu_time": 25,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 20833333,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 90,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 110,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/tools/gbench/init.py
+++ b/tools/gbench/init.py
@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/tools/gbench/report.py
+++ b/tools/gbench/report.py
@ -0,0 +1,136 @@
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    return float(new_val - old_val) / abs(old_val)
+
+
+def generate_difference_report(json1, json2, use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    first_col_width = find_longest_name(json1['benchmarks']) + 5
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+    first_line = "{:<{}s}     Time           CPU".format(
+        'Benchmark', first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+    for bn in json1['benchmarks']:
+        other_bench = find_test(bn['name'])
+        if not other_bench:
+            continue
+
+        def get_color(res):
+            if res > 0.05:
+                return BC_FAIL
+            elif res > -0.07:
+                return BC_WHITE
+            else:
+                return BC_CYAN
+        fmt_str = "{}{:<{}s}{endc}    {}{:+.2f}{endc}         {}{:+.2f}{endc}"
+        tres = calculate_change(bn['real_time'], other_bench['real_time'])
+        cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+        output_strs += [color_format(use_color, fmt_str,
+            BC_HEADER, bn['name'], first_col_width,
+            get_color(tres), tres, get_color(cpures), cpures,
+            endc=BC_ENDC)]
+    return output_strs
+
+###############################################################################
+# Unit tests
+
+import unittest
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.00', '+0.00'],
+            ['BM_2xFaster', '-0.50', '-0.50'],
+            ['BM_2xSlower', '+1.00', '+1.00'],
+            ['BM_10PercentFaster', '-0.10', '-0.10'],
+            ['BM_10PercentSlower', '+0.10', '+0.10']
+        ]
+        json1, json2 = self.load_results()
+        output_lines = generate_difference_report(json1, json2, use_color=False)
+        print output_lines
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in xrange(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 3)
+            self.assertEqual(parts, expect_lines[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@ -0,0 +1,130 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid    = 0
+IT_JSON       = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, 'r') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            '\xfe\xed\xfa\xce',  # MH_MAGIC
+            '\xce\xfa\xed\xfe',  # MH_CIGAM
+            '\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            '\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            '\xca\xfe\xba\xbe',  # FAT_MAGIC
+            '\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == 'MZ'
+    else:
+        return magic_bytes == '\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file"
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print "Invalid input file: %s" % msg
+        sys.exit(1)
+    return ftype
+
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    thandle, tname = tempfile.mkstemp()
+    os.close(thandle)
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(tname)
+    os.unlink(tname)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False # This branch is unreachable