diff --git a/tests/mgbench/compare_results.py b/tests/mgbench/compare_results.py
index 2179bb408..f10ec8bab 100755
--- a/tests/mgbench/compare_results.py
+++ b/tests/mgbench/compare_results.py
@@ -14,7 +14,6 @@
 import argparse
 import json
 
-
 FIELDS = [
     {
         "name": "throughput",
@@ -85,39 +84,32 @@ def compare_results(results_from, results_to, fields):
                 if group == "__import__":
                     continue
                 for scenario, summary_to in scenarios.items():
-                    summary_from = recursive_get(
-                        results_from, dataset, variant, group, scenario,
-                        value={})
-                    if len(summary_from) > 0 and \
-                            summary_to["count"] != summary_from["count"] or \
-                            summary_to["num_workers"] != \
-                            summary_from["num_workers"]:
+                    summary_from = recursive_get(results_from, dataset, variant, group, scenario, value={})
+                    if (
+                        len(summary_from) > 0
+                        and summary_to["count"] != summary_from["count"]
+                        or summary_to["num_workers"] != summary_from["num_workers"]
+                    ):
                         raise Exception("Incompatible results!")
-                    testcode = "/".join([dataset, variant, group, scenario,
-                                         "{:02d}".format(
-                                             summary_to["num_workers"])])
+                    testcode = "/".join([dataset, variant, group, scenario, "{:02d}".format(summary_to["num_workers"])])
                     row = {}
                     performance_changed = False
                     for field in fields:
                         key = field["name"]
                         if key in summary_to:
-                            row[key] = compute_diff(
-                                summary_from.get(key, None),
-                                summary_to[key])
+                            row[key] = compute_diff(summary_from.get(key, None), summary_to[key])
                         elif key in summary_to["database"]:
                             row[key] = compute_diff(
-                                recursive_get(summary_from, "database", key,
-                                              value=None),
-                                summary_to["database"][key])
+                                recursive_get(summary_from, "database", key, value=None), summary_to["database"][key]
+                            )
                         else:
                             row[key] = compute_diff(
-                                recursive_get(summary_from, "metadata", key,
-                                              "average", value=None),
-                                summary_to["metadata"][key]["average"])
-                        if "diff" not in row[key] or \
-                                ("diff_treshold" in field and
-                                 abs(row[key]["diff"]) >=
-                                 field["diff_treshold"]):
+                                recursive_get(summary_from, "metadata", key, "average", value=None),
+                                summary_to["metadata"][key]["average"],
+                            )
+                        if "diff" not in row[key] or (
+                            "diff_treshold" in field and abs(row[key]["diff"]) >= field["diff_treshold"]
+                        ):
                             performance_changed = True
                     if performance_changed:
                         ret[testcode] = row
@@ -130,8 +122,9 @@ def generate_remarkup(fields, data):
         ret += "<table>\n"
         ret += "  <tr>\n"
         ret += "    <th>Testcode</th>\n"
-        ret += "\n".join(map(lambda x: "    <th>{}</th>".format(
-            x["name"].replace("_", " ").capitalize()), fields)) + "\n"
+        ret += (
+            "\n".join(map(lambda x: "    <th>{}</th>".format(x["name"].replace("_", " ").capitalize()), fields)) + "\n"
+        )
         ret += "  </tr>\n"
         for testcode in sorted(data.keys()):
             ret += "  <tr>\n"
@@ -147,12 +140,9 @@ def generate_remarkup(fields, data):
                     else:
                         color = "red"
                     sign = "{{icon {} color={}}}".format(arrow, color)
-                    ret += "    <td>{:.3f}{} //({:+.2%})// {}</td>\n".format(
-                        value, field["unit"], diff, sign)
+                    ret += "    <td>{:.3f}{} //({:+.2%})// {}</td>\n".format(value, field["unit"], diff, sign)
                 else:
-                    ret += "    <td>{:.3f}{} //(new)// " \
-                           "{{icon plus color=blue}}</td>\n".format(
-                               value, field["unit"])
+                    ret += "    <td>{:.3f}{} //(new)// " "{{icon plus color=blue}}</td>\n".format(value, field["unit"])
             ret += "  </tr>\n"
         ret += "</table>\n"
     else:
@@ -161,11 +151,14 @@ def generate_remarkup(fields, data):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Compare results of multiple benchmark runs.")
-    parser.add_argument("--compare", action="append", nargs=2,
-                        metavar=("from", "to"),
-                        help="compare results between `from` and `to` files")
+    parser = argparse.ArgumentParser(description="Compare results of multiple benchmark runs.")
+    parser.add_argument(
+        "--compare",
+        action="append",
+        nargs=2,
+        metavar=("from", "to"),
+        help="compare results between `from` and `to` files",
+    )
     parser.add_argument("--output", default="", help="output file name")
     args = parser.parse_args()