From 500691318a005c712ae4a3ca89e4b6e743d92bd4 Mon Sep 17 00:00:00 2001 From: jbajic <jure.bajic@memgraph.com> Date: Mon, 28 Nov 2022 14:15:41 +0100 Subject: [PATCH] Add analyze script --- tests/benchmark/data_structures_contains.cpp | 10 +- tests/benchmark/data_structures_find.cpp | 8 +- tests/benchmark/data_structures_insert.cpp | 8 +- tests/benchmark/data_structures_remove.cpp | 8 +- tools/plot/benchmark_datastructures.py | 172 +++++++++++++++++++ tools/plot/benchmark_datastructures.sh | 16 ++ 6 files changed, 205 insertions(+), 17 deletions(-) create mode 100644 tools/plot/benchmark_datastructures.py create mode 100755 tools/plot/benchmark_datastructures.sh diff --git a/tests/benchmark/data_structures_contains.cpp b/tests/benchmark/data_structures_contains.cpp index 08596c7a7..bf1a74a8d 100644 --- a/tests/benchmark/data_structures_contains.cpp +++ b/tests/benchmark/data_structures_contains.cpp @@ -103,7 +103,7 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) { tlx::btree_map<storage::v3::PrimaryKey, storage::v3::LexicographicallyOrderedVertex> bpp_tree; PrepareData(bpp_tree, state.range(0)); - // So we can also have elements that does don't exist + // So we can also have elements that does don't exists std::mt19937 i_generator(std::random_device{}()); std::uniform_int_distribution<int64_t> i_distribution(0, state.range(0) * 2); int64_t found_elems{0}; @@ -118,13 +118,13 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) { state.SetItemsProcessed(found_elems); } -BENCHMARK(BM_BenchmarkContainsSkipList)->Arg(1000); +BENCHMARK(BM_BenchmarkContainsSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkContainsStdMap)->Arg(1000); +BENCHMARK(BM_BenchmarkContainsStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkContainsStdSet)->Arg(1000); +BENCHMARK(BM_BenchmarkContainsStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkContainsBppTree)->Arg(1000); +BENCHMARK(BM_BenchmarkContainsBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark diff --git a/tests/benchmark/data_structures_find.cpp b/tests/benchmark/data_structures_find.cpp index 042066d68..d3b3bdd60 100644 --- a/tests/benchmark/data_structures_find.cpp +++ b/tests/benchmark/data_structures_find.cpp @@ -117,13 +117,13 @@ static void BM_BenchmarkFindBppTree(::benchmark::State &state) { state.SetItemsProcessed(found_elems); } -BENCHMARK(BM_BenchmarkFindSkipList)->Arg(1000); +BENCHMARK(BM_BenchmarkFindSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkFindStdMap)->Arg(1000); +BENCHMARK(BM_BenchmarkFindStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkFindStdSet)->Arg(1000); +BENCHMARK(BM_BenchmarkFindStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkFindBppTree)->Arg(1000); +BENCHMARK(BM_BenchmarkFindBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark diff --git a/tests/benchmark/data_structures_insert.cpp b/tests/benchmark/data_structures_insert.cpp index 4d427985b..8882b444e 100644 --- a/tests/benchmark/data_structures_insert.cpp +++ b/tests/benchmark/data_structures_insert.cpp @@ -99,13 +99,13 @@ static void BM_BenchmarkInsertBppTree(::benchmark::State &state) { } } -BENCHMARK(BM_BenchmarkInsertSkipList)->Arg(1000); +BENCHMARK(BM_BenchmarkInsertSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkInsertStdMap)->Arg(1000); +BENCHMARK(BM_BenchmarkInsertStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkInsertStdSet)->Arg(1000); +BENCHMARK(BM_BenchmarkInsertStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkInsertBppTree)->Arg(1000); +BENCHMARK(BM_BenchmarkInsertBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark diff --git a/tests/benchmark/data_structures_remove.cpp b/tests/benchmark/data_structures_remove.cpp index 3fbd54a38..5296a9130 100644 --- a/tests/benchmark/data_structures_remove.cpp +++ b/tests/benchmark/data_structures_remove.cpp @@ -125,13 +125,13 @@ static void BM_BenchmarkRemoveBppTree(::benchmark::State &state) { state.SetItemsProcessed(removed_elems); } -BENCHMARK(BM_BenchmarkRemoveSkipList)->Arg(1000); +BENCHMARK(BM_BenchmarkRemoveSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkRemoveStdMap)->Arg(1000); +BENCHMARK(BM_BenchmarkRemoveStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkRemoveStdSet)->Arg(1000); +BENCHMARK(BM_BenchmarkRemoveStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); -BENCHMARK(BM_BenchmarkRemoveBppTree)->Arg(1000); +BENCHMARK(BM_BenchmarkRemoveBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond); } // namespace memgraph::benchmark diff --git a/tools/plot/benchmark_datastructures.py b/tools/plot/benchmark_datastructures.py new file mode 100644 index 000000000..29fa61be7 --- /dev/null +++ b/tools/plot/benchmark_datastructures.py @@ -0,0 +1,172 @@ +# Copyright 2022 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import argparse +import json +import sys +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, Optional + +import matplotlib.pyplot as plt + + +class Operation(Enum): + CONTAINS = "contains" + FIND = "find" + INSERT = "insert" + RANDOM = "random" + REMOVE = "remove" + + @classmethod + def to_list(cls) -> List[str]: + return list(map(lambda c: c.value, cls)) + + @staticmethod + def get(s: str) -> Optional["Operation"]: + try: + return Operation[s.upper()] + except ValueError: + return None + + def __str__(self): + return str(self.value) + + +@dataclass(frozen=True) +class BenchmarkRow: + name: str + datastructure: str + operation: Operation + real_time: int + cpu_time: int + iterations: int + time_unit: str + run_arg: Optional[Any] + + +class GoogleBenchmarkResult: + def __init__(self): + self._operation = None + self._datastructures: Dict[str, List[BenchmarkRow]] = dict() + + def add_result(self, row: BenchmarkRow) -> None: + if self._operation is None: + self._operation = row.operation + assert self._operation is row.operation + if row.datastructure not in self._datastructures: + self._datastructures[row.datastructure] = [row] + else: + self._datastructures[row.datastructure].append(row) + + @property + def operation(self) -> Optional[Operation]: + return self._operation + + @property + def datastructures(self) -> Dict[str, List[BenchmarkRow]]: + return self._datastructures + + +def get_operation(s: str) -> Operation: + for op in Operation.to_list(): + if op.lower() in s.lower(): + operation_enum = Operation.get(op) + if operation_enum is not None: + return operation_enum + else: + print("Operation not found!") + sys.exit(1) + print("Operation not found!") + sys.exit(1) + + +def get_row_data(line: Dict[str, Any]) -> BenchmarkRow: + """ + Naming is very important, first must come an Operation name, and then a data + structure to test. + """ + full_name = line["name"].split("BM_Benchmark")[1] + name_with_run_arg = full_name.split("/") + operation = get_operation(name_with_run_arg[0]) + datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1] + + run_arg = None + if len(name_with_run_arg) > 1: + run_arg = name_with_run_arg[1] + + return BenchmarkRow( + name_with_run_arg[0], + datastructure, + operation, + line["real_time"], + line["cpu_time"], + line["iterations"], + line["time_unit"], + run_arg, + ) + + +def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]: + file_path = Path(args.log_file) + if not file_path.exists(): + print("Error file {file_path} not found!") + return None + with file_path.open("r") as file: + data = json.load(file) + res = GoogleBenchmarkResult() + assert "benchmarks" in data, "There must be a benchmark list inside" + for benchmark in data["benchmarks"]: + res.add_result(get_row_data(benchmark)) + return res + + +def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None: + colors = ["red", "green", "blue", "yellow", "purple", "brown"] + assert results.operation is not None + fig = plt.figure() + for ds, benchmarks in results.datastructures.items(): + if benchmarks: + # Print line chart + x_axis = [elem.real_time for elem in benchmarks] + y_axis = [elem.run_arg for elem in benchmarks] + plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}") + plt.title(f"Benchmark results for operation {results.operation.value}") + plt.xlabel(f"Time [{benchmarks[0].time_unit}]") + plt.legend() + else: + print(f"Nothing to do for {ds}...") + if save: + plt.savefig(f"{results.operation.value}.png") + plt.close(fig) + else: + plt.show() + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Process benchmark results.") + parser.add_argument("--log_file", type=str) + parser.add_argument("--save", type=bool, default=True) + return parser.parse_args() + + +def main(): + args = parse_args() + res = get_benchmark_res(args) + if res is None: + print("Failed to get results from log file!") + sys.exit(1) + plot_operation(res, args.save) + + +if __name__ == "__main__": + main() diff --git a/tools/plot/benchmark_datastructures.sh b/tools/plot/benchmark_datastructures.sh new file mode 100755 index 000000000..450b38466 --- /dev/null +++ b/tools/plot/benchmark_datastructures.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -euox pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +WORKSPACE_DIR=${SCRIPT_DIR}/../../ + +BENCHMARK_FILES=$(find ${WORKSPACE_DIR}/tests/benchmark -type f -iname data_structures_*) +echo $BENCHMARK_FILES +for bench_file in ${BENCHMARK_FILES}; do + echo "Running $name" + base_name=$(basename $bench_file) + name=${base_name%%.*} + ${WORKSPACE_DIR}/build/tests/benchmark/${name} --benchmark_format=json --benchmark_out=${name}_output.json + python3 ${WORKSPACE_DIR}/tools/plot/benchmark_datastructures.py --log-file=${name}_output.json +done