memgraph/tools/plot/benchmark_datastructures.py
2022-11-28 16:28:36 +01:00

186 lines
5.6 KiB
Python

# Copyright 2022 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
####################################
# Benchmark datastructures analyzer
####################################
# This scripts uses the output from dataset benchmark tests to plot charts
# comparing the results of different datastructures on the same operation.
#
# Note: Naming the tests is very important in order for this script to recognize
# which operation is being performed and on which DS, so it should come in this
# form: BM_Benchmark<Operation><Datastructure>/<RunArgument>
# where run_argument will be added automatically by google benchmark framework
import argparse
import json
import sys
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional
import matplotlib.pyplot as plt
class Operation(Enum):
CONTAINS = "contains"
FIND = "find"
INSERT = "insert"
RANDOM = "random"
REMOVE = "remove"
@classmethod
def to_list(cls) -> List[str]:
return list(map(lambda c: c.value, cls))
@staticmethod
def get(s: str) -> Optional["Operation"]:
try:
return Operation[s.upper()]
except ValueError:
return None
def __str__(self):
return str(self.value)
@dataclass(frozen=True)
class BenchmarkRow:
name: str
datastructure: str
operation: Operation
real_time: int
cpu_time: int
iterations: int
time_unit: str
run_arg: Optional[Any]
class GoogleBenchmarkResult:
def __init__(self):
self._operation = None
self._datastructures: Dict[str, List[BenchmarkRow]] = dict()
def add_result(self, row: BenchmarkRow) -> None:
if self._operation is None:
self._operation = row.operation
assert self._operation is row.operation
if row.datastructure not in self._datastructures:
self._datastructures[row.datastructure] = [row]
else:
self._datastructures[row.datastructure].append(row)
@property
def operation(self) -> Optional[Operation]:
return self._operation
@property
def datastructures(self) -> Dict[str, List[BenchmarkRow]]:
return self._datastructures
def get_operation(s: str) -> Operation:
for op in Operation.to_list():
if op.lower() in s.lower():
operation_enum = Operation.get(op)
if operation_enum is not None:
return operation_enum
else:
print("Operation not found!")
sys.exit(1)
print("Operation not found!")
sys.exit(1)
def get_row_data(line: Dict[str, Any]) -> BenchmarkRow:
"""
Naming is very important, first must come an Operation name, and then a data
structure to test.
"""
full_name = line["name"].split("BM_Benchmark")[1]
name_with_run_arg = full_name.split("/")
operation = get_operation(name_with_run_arg[0])
datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1]
run_arg = None
if len(name_with_run_arg) > 1:
run_arg = name_with_run_arg[1]
return BenchmarkRow(
name_with_run_arg[0],
datastructure,
operation,
line["real_time"],
line["cpu_time"],
line["iterations"],
line["time_unit"],
run_arg,
)
def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]:
file_path = Path(args.log_file)
if not file_path.exists():
print("Error file {file_path} not found!")
return None
with file_path.open("r") as file:
data = json.load(file)
res = GoogleBenchmarkResult()
assert "benchmarks" in data, "There must be a benchmark list inside"
for benchmark in data["benchmarks"]:
res.add_result(get_row_data(benchmark))
return res
def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None:
colors = ["red", "green", "blue", "yellow", "purple", "brown"]
assert results.operation is not None
fig = plt.figure()
for ds, benchmarks in results.datastructures.items():
if benchmarks:
# Print line chart
x_axis = [elem.real_time for elem in benchmarks]
y_axis = [elem.run_arg for elem in benchmarks]
plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}")
plt.title(f"Benchmark results for operation {results.operation.value}")
plt.xlabel(f"Time [{benchmarks[0].time_unit}]")
plt.grid(True)
plt.legend()
plt.draw()
else:
print(f"Nothing to do for {ds}...")
if save:
plt.savefig(f"{results.operation.value}.png")
plt.close(fig)
else:
plt.show()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Process benchmark results.")
parser.add_argument("--log_file", type=str)
parser.add_argument("--save", type=bool, default=True)
return parser.parse_args()
def main():
args = parse_args()
res = get_benchmark_res(args)
if res is None:
print("Failed to get results from log file!")
sys.exit(1)
plot_operation(res, args.save)
if __name__ == "__main__":
main()