memgraph/tests/mgbench/graph_bench.py
2023-09-22 19:05:16 +02:00

309 lines
9.3 KiB
Python

#!/usr/bin/env python3
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import argparse
import json
import subprocess
from pathlib import Path
def parse_arguments():
parser = argparse.ArgumentParser(
description="Run graph database benchmarks on supported databases(Memgraph and Neo4j)",
)
parser.add_argument(
"--vendor",
nargs=2,
action="append",
metavar=("vendor_name", "vendor_binary"),
help="Forward name and paths to vendors binary"
"Example: --vendor memgraph /path/to/binary --vendor neo4j /path/to/binary",
)
parser.add_argument(
"--dataset-name",
default="",
help="Dataset name you wish to execute",
)
parser.add_argument(
"--dataset-size",
default="",
help="Pick a dataset variant you wish to execute",
)
parser.add_argument("--dataset-group", default="", help="Select a group of queries")
parser.add_argument(
"--realistic",
nargs=5,
action="append",
metavar=("num_of_queries", "write", "read", "update", "analytical"),
help="Forward config for group run",
)
parser.add_argument(
"--mixed",
nargs=6,
action="append",
metavar=(
"num_of_queries",
"write",
"read",
"update",
"analytical",
"query_percentage",
),
help="Forward config for query",
)
parser.add_argument(
"--num-workers-for-benchmark",
type=int,
default=12,
help="number of workers used to execute the benchmark",
)
parser.add_argument(
"--query-count-lower-bound",
type=int,
default=300,
help="number of workers used to execute the benchmark (works only for isolated run)",
)
parser.add_argument(
"--single-threaded-runtime-sec",
type=int,
default=30,
help="Duration of single threaded benchmark per query (works only for isolated run)",
)
args = parser.parse_args()
return args
def run_full_benchmarks(
vendor,
binary,
dataset,
dataset_size,
dataset_group,
realistic,
mixed,
workers,
query_count_lower_bound,
single_threaded_runtime_sec,
):
configurations = [
# Basic isolated test cold
[
"--export-results",
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_cold_isolated.json",
],
# Basic isolated test hot
[
"--export-results",
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_hot_isolated.json",
"--warm-up",
"hot",
],
# Basic isolated test vulcanic
[
"--export-results",
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_vulcanic_isolated.json",
"--warm-up",
"vulcanic",
],
]
assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!"
if realistic:
# Configurations for full workload
for count, write, read, update, analytical in realistic:
cold = [
"--export-results",
vendor
+ "_"
+ str(workers)
+ "_"
+ dataset
+ "_"
+ dataset_size
+ "_cold_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
"--workload-realistic",
count,
write,
read,
update,
analytical,
]
hot = [
"--export-results",
vendor
+ "_"
+ str(workers)
+ "_"
+ dataset
+ "_"
+ dataset_size
+ "_hot_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
"--warm-up",
"hot",
"--workload-realistic",
count,
write,
read,
update,
analytical,
]
configurations.append(cold)
configurations.append(hot)
if mixed:
# Configurations for workload per query
for count, write, read, update, analytical, query in mixed:
cold = [
"--export-results",
vendor
+ "_"
+ str(workers)
+ "_"
+ dataset
+ "_"
+ dataset_size
+ "_cold_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
"--workload-mixed",
count,
write,
read,
update,
analytical,
query,
]
hot = [
"--export-results",
vendor
+ "_"
+ str(workers)
+ "_"
+ dataset
+ "_"
+ dataset_size
+ "_hot_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
"--warm-up",
"hot",
"--workload-mixed",
count,
write,
read,
update,
analytical,
query,
]
configurations.append(cold)
configurations.append(hot)
default_args = [
"python3",
"benchmark.py",
"vendor-native",
"--vendor-binary",
binary,
"--vendor-name",
vendor,
"--num-workers-for-benchmark",
str(workers),
"--single-threaded-runtime-sec",
str(single_threaded_runtime_sec),
"--query-count-lower-bound",
str(query_count_lower_bound),
"--no-authorization",
dataset + "/" + dataset_size + "/" + dataset_group + "/*",
]
for config in configurations:
full_config = default_args + config
print(full_config)
subprocess.run(args=full_config, check=True)
def collect_all_results(vendor_name, dataset, dataset_size, dataset_group, workers):
working_directory = Path().absolute()
print(working_directory)
results = sorted(
working_directory.glob(vendor_name + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_*.json")
)
summary = {dataset: {dataset_size: {dataset_group: {}}}}
for file in results:
if "summary" in file.name:
continue
f = file.open()
data = json.loads(f.read())
if data["__run_configuration__"]["condition"] == "hot":
for key, value in data[dataset][dataset_size][dataset_group].items():
key_condition = key + "_hot"
summary[dataset][dataset_size][dataset_group][key_condition] = value
elif data["__run_configuration__"]["condition"] == "cold":
for key, value in data[dataset][dataset_size][dataset_group].items():
key_condition = key + "_cold"
summary[dataset][dataset_size][dataset_group][key_condition] = value
elif data["__run_configuration__"]["condition"] == "vulcanic":
for key, value in data[dataset][dataset_size][dataset_group].items():
key_condition = key + "_vulcanic"
summary[dataset][dataset_size][dataset_group][key_condition] = value
print(summary)
json_object = json.dumps(summary, indent=4)
print(json_object)
with open(vendor_name + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_summary.json", "w") as f:
json.dump(summary, f)
if __name__ == "__main__":
args = parse_arguments()
realistic = args.realistic
mixed = args.mixed
vendor_names = {"memgraph", "neo4j"}
for vendor_name, vendor_binary in args.vendor:
path = Path(vendor_binary)
if vendor_name.lower() in vendor_names and path.is_file():
run_full_benchmarks(
vendor_name,
vendor_binary,
args.dataset_name,
args.dataset_size,
args.dataset_group,
realistic,
mixed,
args.num_workers_for_benchmark,
args.query_count_lower_bound,
args.single_threaded_runtime_sec,
)
collect_all_results(
vendor_name, args.dataset_name, args.dataset_size, args.dataset_group, args.num_workers_for_benchmark
)
else:
raise Exception(
"Check that vendor: {} is supported and you are passing right path: {} to binary.".format(
vendor_name, path
)
)