#!/usr/bin/env python3 # Copyright 2023 Memgraph Ltd. # # Use of this software is governed by the Business Source License # included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source # License, and you may not use this file except in compliance with the Business Source License. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. import argparse import json def load_results(fname): with open(fname) as f: return json.load(f) def compute_diff(value_from, value_to): if value_from is None: return {"value": value_to} diff = (value_to - value_from) / value_from return {"value": value_to, "diff": diff} def recursive_get(data, *args, value=None): for arg in args: if arg not in data: return value data = data[arg] return data def compare_results(results_from, results_to, fields, ignored, different_vendors): ret = {} for dataset, variants in results_to.items(): if dataset == "__run_configuration__": continue for variant, groups in variants.items(): for group, scenarios in groups.items(): if group == "__import__": continue for scenario, summary_to in scenarios.items(): if scenario in ignored: continue summary_from = recursive_get(results_from, dataset, variant, group, scenario, value={}) summary_from = summary_from["without_fine_grained_authorization"] summary_to = summary_to["without_fine_grained_authorization"] if ( len(summary_from) > 0 and (summary_to["count"] != summary_from["count"] and not different_vendors) or summary_to["num_workers"] != summary_from["num_workers"] ): raise Exception("Incompatible results!") testcode = "/".join( [ dataset, variant, group, scenario, "{:02d}".format(summary_to["num_workers"]), ] ) row = {} performance_changed = False for field in fields: key = field["name"] if key in summary_to: row[key] = compute_diff(summary_from.get(key, None), summary_to[key]) elif key in summary_to["database"]: row[key] = compute_diff( recursive_get(summary_from, "database", key, value=None), summary_to["database"][key], ) elif summary_to.get("latency_stats") != None and key in summary_to["latency_stats"]: row[key] = compute_diff( recursive_get(summary_from, "latency_stats", key, value=None), summary_to["latency_stats"][key], ) elif not different_vendors: row[key] = compute_diff( recursive_get(summary_from, "metadata", key, "average", value=None), summary_to["metadata"][key]["average"], ) if row.get(key) != None and ( "diff" not in row[key] or ("diff_treshold" in field and abs(row[key]["diff"]) >= field["diff_treshold"]) ): performance_changed = True if performance_changed: ret[testcode] = row return ret def generate_remarkup(fields, data, results_from=None, results_to=None): ret = "\n" ret += """ """ ret += "

Benchmark comparison

\n" if results_from and results_to: ret += """

Benchmark configuration

Configuration Reference vendor Vendor
Vendor name {} {}
Vendor condition {} {}
Number of workers {} {}
Single threaded runtime {} {}
Platform {} {}
""".format( results_from["vendor"], results_to["vendor"], results_from["condition"], results_to["condition"], results_from["num_workers_for_benchmark"], results_to["num_workers_for_benchmark"], results_from["single_threaded_runtime_sec"], results_to["single_threaded_runtime_sec"], results_from["platform"], results_to["platform"], ) ret += """

How to read benchmark results

Throughput and latency values:

If vendor {} is faster than the reference vendor {} , the result for throughput and latency are show in green , otherwise red . Percentage difference is visible relative to reference vendor {}.

Memory usage:

If the vendor {} uses less memory then the reference vendor {} , the result is shown in green , otherwise red . Percentage difference for memory is visible relative to reference vendor {}. """.format( results_to["vendor"], results_from["vendor"], results_from["vendor"], results_to["vendor"], results_from["vendor"], results_from["vendor"], ) ret += "

Benchmark results

\n" if len(data) > 0: ret += "\n" ret += " \n" ret += " \n" ret += ( "\n".join( map( lambda x: " ".format(x["name"].replace("_", " ").capitalize()), fields, ) ) + "\n" ) ret += " \n" for testcode in sorted(data.keys()): ret += " \n" ret += " \n".format(testcode) for field in fields: result = data[testcode].get(field["name"]) if result != None: value = result["value"] * field["scaling"] if "diff" in result: diff = result["diff"] arrow = "arrow-up" if diff >= 0 else "arrow-down" if not (field["positive_diff_better"] ^ (diff >= 0)): color = "green" else: color = "red" sign = "{{icon {} color={}}}".format(arrow, color) ret += ' \n'.format( color, value, field["unit"], diff ) else: ret += '\n'.format(value, field["unit"]) ret += " \n" ret += "
Testcode{}
{}{:.3f}{} ({:+.2%}){:.3f}{} //(new)//
\n" ret += "\n" else: ret += "No performance change detected.\n" return ret if __name__ == "__main__": parser = argparse.ArgumentParser(description="Compare results of multiple benchmark runs.") parser.add_argument( "--compare", action="append", nargs=2, metavar=("from", "to"), help="compare results between `from` and `to` files", ) parser.add_argument("--output", default="", help="output file name") # file is read line by line, each representing one test name parser.add_argument("--exclude_tests_file", help="file listing test names to be excluded") parser.add_argument( "--different-vendors", action="store_true", default=False, help="Comparing different vendors, there is no need for metadata, duration, count check.", ) parser.add_argument( "--difference-threshold", type=float, default=0.02, help="Difference threshold for memory and throughput, 0.02 = 2% ", ) args = parser.parse_args() fields = [ { "name": "throughput", "positive_diff_better": True, "scaling": 1, "unit": "QPS", "diff_treshold": 0.05, # 5% }, { "name": "duration", "positive_diff_better": False, "scaling": 1, "unit": "s", }, { "name": "parsing_time", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "planning_time", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "plan_execution_time", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "memory", "positive_diff_better": False, "scaling": 1 / 1024 / 1024, "unit": "MiB", "diff_treshold": 0.02, # 2% }, { "name": "max", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "p99", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "p90", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "p75", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "p50", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, { "name": "mean", "positive_diff_better": False, "scaling": 1000, "unit": "ms", }, ] if args.compare is None or len(args.compare) == 0: raise Exception("You must specify at least one pair of files!") if args.exclude_tests_file: with open(args.exclude_tests_file, "r") as f: ignored = [line.rstrip("\n") for line in f] else: ignored = [] cleaned = [] if args.different_vendors: ignore_on_different_vendors = {"duration", "parsing_time", "planning_time", "plan_execution_time"} for field in fields: key = field["name"] if key in ignore_on_different_vendors: continue else: cleaned.append(field) fields = cleaned if args.difference_threshold > 0.01: for field in fields: if "diff_treshold" in field.keys(): field["diff_treshold"] = args.difference_threshold data = {} for file_from, file_to in args.compare: results_from = load_results(file_from) results_to = load_results(file_to) data.update(compare_results(results_from, results_to, fields, ignored, args.different_vendors)) results_from_config = ( results_from["__run_configuration__"] if "__run_configuration__" in results_from.keys() else None ) results_to_config = results_to["__run_configuration__"] if "__run_configuration__" in results_to.keys() else None remarkup = generate_remarkup(fields, data, results_from=results_from_config, results_to=results_to_config) if args.output: with open(args.output, "w") as f: f.write(remarkup) else: print(remarkup, end="")