Add analyze script

This commit is contained in:
jbajic 2022-11-28 14:15:41 +01:00
parent 36a7abb170
commit 500691318a
6 changed files with 205 additions and 17 deletions

View File

@ -103,7 +103,7 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) {
tlx::btree_map<storage::v3::PrimaryKey, storage::v3::LexicographicallyOrderedVertex> bpp_tree;
PrepareData(bpp_tree, state.range(0));
// So we can also have elements that does don't exist
// So we can also have elements that does don't exists
std::mt19937 i_generator(std::random_device{}());
std::uniform_int_distribution<int64_t> i_distribution(0, state.range(0) * 2);
int64_t found_elems{0};
@ -118,13 +118,13 @@ static void BM_BenchmarkContainsBppTree(::benchmark::State &state) {
state.SetItemsProcessed(found_elems);
}
BENCHMARK(BM_BenchmarkContainsSkipList)->Arg(1000);
BENCHMARK(BM_BenchmarkContainsSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkContainsStdMap)->Arg(1000);
BENCHMARK(BM_BenchmarkContainsStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkContainsStdSet)->Arg(1000);
BENCHMARK(BM_BenchmarkContainsStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkContainsBppTree)->Arg(1000);
BENCHMARK(BM_BenchmarkContainsBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
} // namespace memgraph::benchmark

View File

@ -117,13 +117,13 @@ static void BM_BenchmarkFindBppTree(::benchmark::State &state) {
state.SetItemsProcessed(found_elems);
}
BENCHMARK(BM_BenchmarkFindSkipList)->Arg(1000);
BENCHMARK(BM_BenchmarkFindSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkFindStdMap)->Arg(1000);
BENCHMARK(BM_BenchmarkFindStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkFindStdSet)->Arg(1000);
BENCHMARK(BM_BenchmarkFindStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkFindBppTree)->Arg(1000);
BENCHMARK(BM_BenchmarkFindBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
} // namespace memgraph::benchmark

View File

@ -99,13 +99,13 @@ static void BM_BenchmarkInsertBppTree(::benchmark::State &state) {
}
}
BENCHMARK(BM_BenchmarkInsertSkipList)->Arg(1000);
BENCHMARK(BM_BenchmarkInsertSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkInsertStdMap)->Arg(1000);
BENCHMARK(BM_BenchmarkInsertStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkInsertStdSet)->Arg(1000);
BENCHMARK(BM_BenchmarkInsertStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkInsertBppTree)->Arg(1000);
BENCHMARK(BM_BenchmarkInsertBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
} // namespace memgraph::benchmark

View File

@ -125,13 +125,13 @@ static void BM_BenchmarkRemoveBppTree(::benchmark::State &state) {
state.SetItemsProcessed(removed_elems);
}
BENCHMARK(BM_BenchmarkRemoveSkipList)->Arg(1000);
BENCHMARK(BM_BenchmarkRemoveSkipList)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkRemoveStdMap)->Arg(1000);
BENCHMARK(BM_BenchmarkRemoveStdMap)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkRemoveStdSet)->Arg(1000);
BENCHMARK(BM_BenchmarkRemoveStdSet)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
BENCHMARK(BM_BenchmarkRemoveBppTree)->Arg(1000);
BENCHMARK(BM_BenchmarkRemoveBppTree)->RangeMultiplier(10)->Range(1000, 1000000)->Unit(::benchmark::kMillisecond);
} // namespace memgraph::benchmark

View File

@ -0,0 +1,172 @@
# Copyright 2022 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import argparse
import json
import sys
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional
import matplotlib.pyplot as plt
class Operation(Enum):
CONTAINS = "contains"
FIND = "find"
INSERT = "insert"
RANDOM = "random"
REMOVE = "remove"
@classmethod
def to_list(cls) -> List[str]:
return list(map(lambda c: c.value, cls))
@staticmethod
def get(s: str) -> Optional["Operation"]:
try:
return Operation[s.upper()]
except ValueError:
return None
def __str__(self):
return str(self.value)
@dataclass(frozen=True)
class BenchmarkRow:
name: str
datastructure: str
operation: Operation
real_time: int
cpu_time: int
iterations: int
time_unit: str
run_arg: Optional[Any]
class GoogleBenchmarkResult:
def __init__(self):
self._operation = None
self._datastructures: Dict[str, List[BenchmarkRow]] = dict()
def add_result(self, row: BenchmarkRow) -> None:
if self._operation is None:
self._operation = row.operation
assert self._operation is row.operation
if row.datastructure not in self._datastructures:
self._datastructures[row.datastructure] = [row]
else:
self._datastructures[row.datastructure].append(row)
@property
def operation(self) -> Optional[Operation]:
return self._operation
@property
def datastructures(self) -> Dict[str, List[BenchmarkRow]]:
return self._datastructures
def get_operation(s: str) -> Operation:
for op in Operation.to_list():
if op.lower() in s.lower():
operation_enum = Operation.get(op)
if operation_enum is not None:
return operation_enum
else:
print("Operation not found!")
sys.exit(1)
print("Operation not found!")
sys.exit(1)
def get_row_data(line: Dict[str, Any]) -> BenchmarkRow:
"""
Naming is very important, first must come an Operation name, and then a data
structure to test.
"""
full_name = line["name"].split("BM_Benchmark")[1]
name_with_run_arg = full_name.split("/")
operation = get_operation(name_with_run_arg[0])
datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1]
run_arg = None
if len(name_with_run_arg) > 1:
run_arg = name_with_run_arg[1]
return BenchmarkRow(
name_with_run_arg[0],
datastructure,
operation,
line["real_time"],
line["cpu_time"],
line["iterations"],
line["time_unit"],
run_arg,
)
def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]:
file_path = Path(args.log_file)
if not file_path.exists():
print("Error file {file_path} not found!")
return None
with file_path.open("r") as file:
data = json.load(file)
res = GoogleBenchmarkResult()
assert "benchmarks" in data, "There must be a benchmark list inside"
for benchmark in data["benchmarks"]:
res.add_result(get_row_data(benchmark))
return res
def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None:
colors = ["red", "green", "blue", "yellow", "purple", "brown"]
assert results.operation is not None
fig = plt.figure()
for ds, benchmarks in results.datastructures.items():
if benchmarks:
# Print line chart
x_axis = [elem.real_time for elem in benchmarks]
y_axis = [elem.run_arg for elem in benchmarks]
plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}")
plt.title(f"Benchmark results for operation {results.operation.value}")
plt.xlabel(f"Time [{benchmarks[0].time_unit}]")
plt.legend()
else:
print(f"Nothing to do for {ds}...")
if save:
plt.savefig(f"{results.operation.value}.png")
plt.close(fig)
else:
plt.show()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Process benchmark results.")
parser.add_argument("--log_file", type=str)
parser.add_argument("--save", type=bool, default=True)
return parser.parse_args()
def main():
args = parse_args()
res = get_benchmark_res(args)
if res is None:
print("Failed to get results from log file!")
sys.exit(1)
plot_operation(res, args.save)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,16 @@
#!/bin/bash
set -euox pipefail
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
WORKSPACE_DIR=${SCRIPT_DIR}/../../
BENCHMARK_FILES=$(find ${WORKSPACE_DIR}/tests/benchmark -type f -iname data_structures_*)
echo $BENCHMARK_FILES
for bench_file in ${BENCHMARK_FILES}; do
echo "Running $name"
base_name=$(basename $bench_file)
name=${base_name%%.*}
${WORKSPACE_DIR}/build/tests/benchmark/${name} --benchmark_format=json --benchmark_out=${name}_output.json
python3 ${WORKSPACE_DIR}/tools/plot/benchmark_datastructures.py --log-file=${name}_output.json
done