186 lines
5.6 KiB
Python
186 lines
5.6 KiB
Python
# Copyright 2022 Memgraph Ltd.
|
|
#
|
|
# Use of this software is governed by the Business Source License
|
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
# License, and you may not use this file except in compliance with the Business Source License.
|
|
#
|
|
# As of the Change Date specified in that file, in accordance with
|
|
# the Business Source License, use of this software will be governed
|
|
# by the Apache License, Version 2.0, included in the file
|
|
# licenses/APL.txt.
|
|
|
|
####################################
|
|
# Benchmark datastructures analyzer
|
|
####################################
|
|
# This scripts uses the output from dataset benchmark tests to plot charts
|
|
# comparing the results of different datastructures on the same operation.
|
|
#
|
|
# Note: Naming the tests is very important in order for this script to recognize
|
|
# which operation is being performed and on which DS, so it should come in this
|
|
# form: BM_Benchmark<Operation><Datastructure>/<RunArgument>
|
|
# where run_argument will be added automatically by google benchmark framework
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
class Operation(Enum):
|
|
CONTAINS = "contains"
|
|
FIND = "find"
|
|
INSERT = "insert"
|
|
RANDOM = "random"
|
|
REMOVE = "remove"
|
|
|
|
@classmethod
|
|
def to_list(cls) -> List[str]:
|
|
return list(map(lambda c: c.value, cls))
|
|
|
|
@staticmethod
|
|
def get(s: str) -> Optional["Operation"]:
|
|
try:
|
|
return Operation[s.upper()]
|
|
except ValueError:
|
|
return None
|
|
|
|
def __str__(self):
|
|
return str(self.value)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BenchmarkRow:
|
|
name: str
|
|
datastructure: str
|
|
operation: Operation
|
|
real_time: int
|
|
cpu_time: int
|
|
iterations: int
|
|
time_unit: str
|
|
run_arg: Optional[Any]
|
|
|
|
|
|
class GoogleBenchmarkResult:
|
|
def __init__(self):
|
|
self._operation = None
|
|
self._datastructures: Dict[str, List[BenchmarkRow]] = dict()
|
|
|
|
def add_result(self, row: BenchmarkRow) -> None:
|
|
if self._operation is None:
|
|
self._operation = row.operation
|
|
assert self._operation is row.operation
|
|
if row.datastructure not in self._datastructures:
|
|
self._datastructures[row.datastructure] = [row]
|
|
else:
|
|
self._datastructures[row.datastructure].append(row)
|
|
|
|
@property
|
|
def operation(self) -> Optional[Operation]:
|
|
return self._operation
|
|
|
|
@property
|
|
def datastructures(self) -> Dict[str, List[BenchmarkRow]]:
|
|
return self._datastructures
|
|
|
|
|
|
def get_operation(s: str) -> Operation:
|
|
for op in Operation.to_list():
|
|
if op.lower() in s.lower():
|
|
operation_enum = Operation.get(op)
|
|
if operation_enum is not None:
|
|
return operation_enum
|
|
else:
|
|
print("Operation not found!")
|
|
sys.exit(1)
|
|
print("Operation not found!")
|
|
sys.exit(1)
|
|
|
|
|
|
def get_row_data(line: Dict[str, Any]) -> BenchmarkRow:
|
|
"""
|
|
Naming is very important, first must come an Operation name, and then a data
|
|
structure to test.
|
|
"""
|
|
full_name = line["name"].split("BM_Benchmark")[1]
|
|
name_with_run_arg = full_name.split("/")
|
|
operation = get_operation(name_with_run_arg[0])
|
|
datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1]
|
|
|
|
run_arg = None
|
|
if len(name_with_run_arg) > 1:
|
|
run_arg = name_with_run_arg[1]
|
|
|
|
return BenchmarkRow(
|
|
name_with_run_arg[0],
|
|
datastructure,
|
|
operation,
|
|
line["real_time"],
|
|
line["cpu_time"],
|
|
line["iterations"],
|
|
line["time_unit"],
|
|
run_arg,
|
|
)
|
|
|
|
|
|
def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]:
|
|
file_path = Path(args.log_file)
|
|
if not file_path.exists():
|
|
print("Error file {file_path} not found!")
|
|
return None
|
|
with file_path.open("r") as file:
|
|
data = json.load(file)
|
|
res = GoogleBenchmarkResult()
|
|
assert "benchmarks" in data, "There must be a benchmark list inside"
|
|
for benchmark in data["benchmarks"]:
|
|
res.add_result(get_row_data(benchmark))
|
|
return res
|
|
|
|
|
|
def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None:
|
|
colors = ["red", "green", "blue", "yellow", "purple", "brown"]
|
|
assert results.operation is not None
|
|
fig = plt.figure()
|
|
for ds, benchmarks in results.datastructures.items():
|
|
if benchmarks:
|
|
# Print line chart
|
|
x_axis = [elem.real_time for elem in benchmarks]
|
|
y_axis = [elem.run_arg for elem in benchmarks]
|
|
plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}")
|
|
plt.title(f"Benchmark results for operation {results.operation.value}")
|
|
plt.xlabel(f"Time [{benchmarks[0].time_unit}]")
|
|
plt.grid(True)
|
|
plt.legend()
|
|
plt.draw()
|
|
else:
|
|
print(f"Nothing to do for {ds}...")
|
|
if save:
|
|
plt.savefig(f"{results.operation.value}.png")
|
|
plt.close(fig)
|
|
else:
|
|
plt.show()
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Process benchmark results.")
|
|
parser.add_argument("--log_file", type=str)
|
|
parser.add_argument("--save", type=bool, default=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
res = get_benchmark_res(args)
|
|
if res is None:
|
|
print("Failed to get results from log file!")
|
|
sys.exit(1)
|
|
plot_operation(res, args.save)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|