From 500691318a005c712ae4a3ca89e4b6e743d92bd4 Mon Sep 17 00:00:00 2001
From: jbajic <jure.bajic@memgraph.com>
Date: Mon, 28 Nov 2022 14:15:41 +0100
Subject: [PATCH] Add analyze script

---
 tests/benchmark/data_structures_contains.cpp |  10 +-
 tests/benchmark/data_structures_find.cpp     |   8 +-
 tests/benchmark/data_structures_insert.cpp   |   8 +-
 tests/benchmark/data_structures_remove.cpp   |   8 +-
 tools/plot/benchmark_datastructures.py       | 172 +++++++++++++++++++
 tools/plot/benchmark_datastructures.sh       |  16 ++
 6 files changed, 205 insertions(+), 17 deletions(-)
 create mode 100644 tools/plot/benchmark_datastructures.py
 create mode 100755 tools/plot/benchmark_datastructures.sh

diff --git a/tests/benchmark/data_structures_contains.cpp b/tests/benchmark/data_structures_contains.cpp
index 08596c7a7..bf1a74a8d 100644
--- a/tests/benchmark/data_structures_contains.cpp
+++ b/tests/benchmark/data_structures_contains.cpp
@@ -103,7 +103,7 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) {
   tlx::btree_map<storage::v3::PrimaryKey, storage::v3::LexicographicallyOrderedVertex> bpp_tree;
   PrepareData(bpp_tree, state.range(0));
 
-  // So we can also have elements that does don't exist
+  // So we can also have elements that does don't exists
   std::mt19937 i_generator(std::random_device{}());
   std::uniform_int_distribution<int64_t> i_distribution(0, state.range(0) * 2);
   int64_t found_elems{0};
@@ -118,13 +118,13 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) {
   state.SetItemsProcessed(found_elems);
 }
 
-BENCHMARK(BM_BenchmarkContainsSkipList)->Arg(1000);
+BENCHMARK(BM_BenchmarkContainsSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkContainsStdMap)->Arg(1000);
+BENCHMARK(BM_BenchmarkContainsStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkContainsStdSet)->Arg(1000);
+BENCHMARK(BM_BenchmarkContainsStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkContainsBppTree)->Arg(1000);
+BENCHMARK(BM_BenchmarkContainsBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
 }  // namespace memgraph::benchmark
 
diff --git a/tests/benchmark/data_structures_find.cpp b/tests/benchmark/data_structures_find.cpp
index 042066d68..d3b3bdd60 100644
--- a/tests/benchmark/data_structures_find.cpp
+++ b/tests/benchmark/data_structures_find.cpp
@@ -117,13 +117,13 @@ static void BM_BenchmarkFindBppTree(::benchmark::State &state) {
   state.SetItemsProcessed(found_elems);
 }
 
-BENCHMARK(BM_BenchmarkFindSkipList)->Arg(1000);
+BENCHMARK(BM_BenchmarkFindSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkFindStdMap)->Arg(1000);
+BENCHMARK(BM_BenchmarkFindStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkFindStdSet)->Arg(1000);
+BENCHMARK(BM_BenchmarkFindStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkFindBppTree)->Arg(1000);
+BENCHMARK(BM_BenchmarkFindBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
 }  // namespace memgraph::benchmark
 
diff --git a/tests/benchmark/data_structures_insert.cpp b/tests/benchmark/data_structures_insert.cpp
index 4d427985b..8882b444e 100644
--- a/tests/benchmark/data_structures_insert.cpp
+++ b/tests/benchmark/data_structures_insert.cpp
@@ -99,13 +99,13 @@ static void BM_BenchmarkInsertBppTree(::benchmark::State &state) {
   }
 }
 
-BENCHMARK(BM_BenchmarkInsertSkipList)->Arg(1000);
+BENCHMARK(BM_BenchmarkInsertSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkInsertStdMap)->Arg(1000);
+BENCHMARK(BM_BenchmarkInsertStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkInsertStdSet)->Arg(1000);
+BENCHMARK(BM_BenchmarkInsertStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkInsertBppTree)->Arg(1000);
+BENCHMARK(BM_BenchmarkInsertBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
 }  // namespace memgraph::benchmark
 
diff --git a/tests/benchmark/data_structures_remove.cpp b/tests/benchmark/data_structures_remove.cpp
index 3fbd54a38..5296a9130 100644
--- a/tests/benchmark/data_structures_remove.cpp
+++ b/tests/benchmark/data_structures_remove.cpp
@@ -125,13 +125,13 @@ static void BM_BenchmarkRemoveBppTree(::benchmark::State &state) {
   state.SetItemsProcessed(removed_elems);
 }
 
-BENCHMARK(BM_BenchmarkRemoveSkipList)->Arg(1000);
+BENCHMARK(BM_BenchmarkRemoveSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkRemoveStdMap)->Arg(1000);
+BENCHMARK(BM_BenchmarkRemoveStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkRemoveStdSet)->Arg(1000);
+BENCHMARK(BM_BenchmarkRemoveStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
-BENCHMARK(BM_BenchmarkRemoveBppTree)->Arg(1000);
+BENCHMARK(BM_BenchmarkRemoveBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
 
 }  // namespace memgraph::benchmark
 
diff --git a/tools/plot/benchmark_datastructures.py b/tools/plot/benchmark_datastructures.py
new file mode 100644
index 000000000..29fa61be7
--- /dev/null
+++ b/tools/plot/benchmark_datastructures.py
@@ -0,0 +1,172 @@
+# Copyright 2022 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import argparse
+import json
+import sys
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import matplotlib.pyplot as plt
+
+
+class Operation(Enum):
+    CONTAINS = "contains"
+    FIND = "find"
+    INSERT = "insert"
+    RANDOM = "random"
+    REMOVE = "remove"
+
+    @classmethod
+    def to_list(cls) -> List[str]:
+        return list(map(lambda c: c.value, cls))
+
+    @staticmethod
+    def get(s: str) -> Optional["Operation"]:
+        try:
+            return Operation[s.upper()]
+        except ValueError:
+            return None
+
+    def __str__(self):
+        return str(self.value)
+
+
+@dataclass(frozen=True)
+class BenchmarkRow:
+    name: str
+    datastructure: str
+    operation: Operation
+    real_time: int
+    cpu_time: int
+    iterations: int
+    time_unit: str
+    run_arg: Optional[Any]
+
+
+class GoogleBenchmarkResult:
+    def __init__(self):
+        self._operation = None
+        self._datastructures: Dict[str, List[BenchmarkRow]] = dict()
+
+    def add_result(self, row: BenchmarkRow) -> None:
+        if self._operation is None:
+            self._operation = row.operation
+        assert self._operation is row.operation
+        if row.datastructure not in self._datastructures:
+            self._datastructures[row.datastructure] = [row]
+        else:
+            self._datastructures[row.datastructure].append(row)
+
+    @property
+    def operation(self) -> Optional[Operation]:
+        return self._operation
+
+    @property
+    def datastructures(self) -> Dict[str, List[BenchmarkRow]]:
+        return self._datastructures
+
+
+def get_operation(s: str) -> Operation:
+    for op in Operation.to_list():
+        if op.lower() in s.lower():
+            operation_enum = Operation.get(op)
+            if operation_enum is not None:
+                return operation_enum
+            else:
+                print("Operation not found!")
+                sys.exit(1)
+    print("Operation not found!")
+    sys.exit(1)
+
+
+def get_row_data(line: Dict[str, Any]) -> BenchmarkRow:
+    """
+    Naming is very important, first must come an Operation name, and then a data
+    structure to test.
+    """
+    full_name = line["name"].split("BM_Benchmark")[1]
+    name_with_run_arg = full_name.split("/")
+    operation = get_operation(name_with_run_arg[0])
+    datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1]
+
+    run_arg = None
+    if len(name_with_run_arg) > 1:
+        run_arg = name_with_run_arg[1]
+
+    return BenchmarkRow(
+        name_with_run_arg[0],
+        datastructure,
+        operation,
+        line["real_time"],
+        line["cpu_time"],
+        line["iterations"],
+        line["time_unit"],
+        run_arg,
+    )
+
+
+def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]:
+    file_path = Path(args.log_file)
+    if not file_path.exists():
+        print("Error file {file_path} not found!")
+        return None
+    with file_path.open("r") as file:
+        data = json.load(file)
+        res = GoogleBenchmarkResult()
+        assert "benchmarks" in data, "There must be a benchmark list inside"
+        for benchmark in data["benchmarks"]:
+            res.add_result(get_row_data(benchmark))
+        return res
+
+
+def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None:
+    colors = ["red", "green", "blue", "yellow", "purple", "brown"]
+    assert results.operation is not None
+    fig = plt.figure()
+    for ds, benchmarks in results.datastructures.items():
+        if benchmarks:
+            # Print line chart
+            x_axis = [elem.real_time for elem in benchmarks]
+            y_axis = [elem.run_arg for elem in benchmarks]
+            plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}")
+            plt.title(f"Benchmark results for operation {results.operation.value}")
+            plt.xlabel(f"Time [{benchmarks[0].time_unit}]")
+            plt.legend()
+        else:
+            print(f"Nothing to do for {ds}...")
+    if save:
+        plt.savefig(f"{results.operation.value}.png")
+        plt.close(fig)
+    else:
+        plt.show()
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Process benchmark results.")
+    parser.add_argument("--log_file", type=str)
+    parser.add_argument("--save", type=bool, default=True)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    res = get_benchmark_res(args)
+    if res is None:
+        print("Failed to get results from log file!")
+        sys.exit(1)
+    plot_operation(res, args.save)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/plot/benchmark_datastructures.sh b/tools/plot/benchmark_datastructures.sh
new file mode 100755
index 000000000..450b38466
--- /dev/null
+++ b/tools/plot/benchmark_datastructures.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -euox pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+WORKSPACE_DIR=${SCRIPT_DIR}/../../
+
+BENCHMARK_FILES=$(find ${WORKSPACE_DIR}/tests/benchmark -type f -iname data_structures_*)
+echo $BENCHMARK_FILES
+for bench_file in ${BENCHMARK_FILES}; do
+    echo "Running $name"
+    base_name=$(basename $bench_file)
+    name=${base_name%%.*}
+    ${WORKSPACE_DIR}/build/tests/benchmark/${name} --benchmark_format=json --benchmark_out=${name}_output.json
+    python3 ${WORKSPACE_DIR}/tools/plot/benchmark_datastructures.py --log-file=${name}_output.json
+done