From 51ba4727bf69a5370a12a42bbb0adb2f33170d5f Mon Sep 17 00:00:00 2001
From: florijan <florijan@memgraph.io>
Date: Wed, 6 Sep 2017 10:31:26 +0200
Subject: [PATCH] Gbench plot tool added

Summary: See script docs.

Reviewers: buda, teon.banek, mislav.bradac

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D755
---
 tools/plot_gbench_json | 128 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100755 tools/plot_gbench_json

diff --git a/tools/plot_gbench_json b/tools/plot_gbench_json
new file mode 100755
index 000000000..0b34b5121
--- /dev/null
+++ b/tools/plot_gbench_json
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+A tool for plotting Google benchmark results using matplotlib. Requires
+Python3, matplotlib and gbench data in JSON format.
+
+Does a few nice things for you:
+    1. Can be used with file input (cmd line arg) or reading from stdin
+    2. Groups benchmarks into multiple plots based on benchmark name.
+       This is currently implemented to work well with template based
+       benchmarks, it might required mods.
+    3. Automatically detects the need for log-scale on both axes.
+
+Missing features:
+    1. Proper support for benchmarks that use two arguments.
+    2. Proper handling for all types of benchmark structures, name parsing
+       in this implementation is made for template-based benches.
+    3. Plotting to image files. This implementation plots to the GUI (you can
+       save images there).
+
+Usage:
+    # Generate benchmark data in json format using:
+    > ./my_bench --benchmark_out_format=json --benchmark_out=data.json
+    # Use that data with plotter:
+    > ./plot_bench_json data.json
+
+Alternatively you can route stuff and avoid using an intermediary file:
+    sh > ./my_bench --benchmark_out_format=json --benchmark_out=/dev/stderr 2>&1 >/dev/null | grep "^[{} ]" | plot_gbench_json
+
+Maybe there is a nicer way to route it?
+"""
+
+import re
+import fileinput
+import json
+from collections import defaultdict
+
+from matplotlib import pyplot as plt
+
+
+def convert_num(string):
+    """
+    Converts stuff like "100" and "3k" to numbers.
+    """
+    suffix_re = re.search("\D+$", string)
+
+    if not suffix_re:
+        return float(string)
+
+    suffix = string[suffix_re.start():]
+    number = float(string[:suffix_re.start()])
+
+    if suffix == "k":
+        number *= 1000
+    else:
+        raise ValueError("Unknown number suffix: " + suffix)
+
+    return number
+
+
+def is_exponential_growth(numbers):
+    """
+    Tries to determine if the given numbers progress more in logarithmic then
+    in linear fashion. Assumes numbers increase monotonically.
+    """
+    diffs = [n2 - n1 for (n1, n2) in zip(numbers, numbers[1:])]
+    factors = [n2 / n1 for (n1, n2) in zip(numbers, numbers[1:])]
+
+    # constant diff implies linear increase, constant factor implies exp
+    # which is more constant?
+    diff_rms = [(d - (sum(diffs) / len(diffs))) ** 2 for d in diffs]
+    factor_rms = [(f - (sum(factors) / len(factors))) ** 2 for f in factors]
+    return sum(factor_rms) < sum(diff_rms)
+
+
+def main():
+    data = json.loads("".join(fileinput.input()).strip())
+
+    # structure: {bench_name: [(x, y, time_unit), ...]
+    benchmarks = defaultdict(list)
+
+    for bench in data["benchmarks"]:
+        name, x = bench["name"].rsplit("/", 1)
+        benchmarks[name].append((convert_num(x), bench["real_time"],
+                                bench["time_unit"]))
+
+    # group benchmarks on name prefix
+    # one group will be one plot with possibly multiple lines
+    benchmarks_groups = defaultdict(dict)
+    for name, data in benchmarks.items():
+        name_split = re.split("\W", name, 1)
+        if len(name_split) == 2:
+            group, element = name_split
+            benchmarks_groups[group][element] = data
+        else:
+            benchmarks_groups["__all_benchmarks__"][name] = data
+
+    # validate all the time units per group (one plot)
+    for measurements in benchmarks_groups.values():
+        units = set()
+        for measurement in measurements.values():
+            units.update(k[2] for k in measurement)
+        if len(units) > 1:
+            raise ValueError(
+                "Multiple time units in a single plot: %r" % units)
+
+    # plot all groups
+    for group_name, measurements in benchmarks_groups.items():
+        plt.figure()
+        log_x, log_y = False, False
+        for line, values in measurements.items():
+            x, y, _ = zip(*values)
+            log_x |= is_exponential_growth(x)
+            log_y |= is_exponential_growth(y)
+            plt.plot(x, y, label=line)
+        if log_x:
+            plt.xscale("log")
+        if log_y:
+            plt.yscale("log")
+        plt.title(group_name)
+        plt.legend()
+        plt.grid()
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()