2023-04-19 14:21:55 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# Copyright 2023 Memgraph Ltd.
|
|
|
|
#
|
|
|
|
# Use of this software is governed by the Business Source License
|
|
|
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
|
|
# License, and you may not use this file except in compliance with the Business Source License.
|
|
|
|
#
|
|
|
|
# As of the Change Date specified in that file, in accordance with
|
|
|
|
# the Business Source License, use of this software will be governed
|
|
|
|
# by the Apache License, Version 2.0, included in the file
|
|
|
|
# licenses/APL.txt.
|
|
|
|
|
2022-11-28 15:47:22 +08:00
|
|
|
import argparse
|
|
|
|
import json
|
|
|
|
import subprocess
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
def parse_arguments():
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description="Run graph database benchmarks on supported databases(Memgraph and Neo4j)",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--vendor",
|
|
|
|
nargs=2,
|
|
|
|
action="append",
|
|
|
|
metavar=("vendor_name", "vendor_binary"),
|
|
|
|
help="Forward name and paths to vendors binary"
|
|
|
|
"Example: --vendor memgraph /path/to/binary --vendor neo4j /path/to/binary",
|
|
|
|
)
|
2023-03-22 04:44:11 +08:00
|
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
"--dataset-name",
|
|
|
|
default="",
|
|
|
|
help="Dataset name you wish to execute",
|
|
|
|
)
|
|
|
|
|
2022-11-28 15:47:22 +08:00
|
|
|
parser.add_argument(
|
|
|
|
"--dataset-size",
|
2023-03-22 04:44:11 +08:00
|
|
|
default="",
|
|
|
|
help="Pick a dataset variant you wish to execute",
|
2022-11-28 15:47:22 +08:00
|
|
|
)
|
|
|
|
|
2023-03-22 04:44:11 +08:00
|
|
|
parser.add_argument("--dataset-group", default="", help="Select a group of queries")
|
2022-11-28 15:47:22 +08:00
|
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
"--realistic",
|
|
|
|
nargs=5,
|
|
|
|
action="append",
|
|
|
|
metavar=("num_of_queries", "write", "read", "update", "analytical"),
|
|
|
|
help="Forward config for group run",
|
|
|
|
)
|
|
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
"--mixed",
|
|
|
|
nargs=6,
|
|
|
|
action="append",
|
|
|
|
metavar=(
|
|
|
|
"num_of_queries",
|
|
|
|
"write",
|
|
|
|
"read",
|
|
|
|
"update",
|
|
|
|
"analytical",
|
|
|
|
"query_percentage",
|
|
|
|
),
|
|
|
|
help="Forward config for query",
|
|
|
|
)
|
|
|
|
|
2023-04-19 14:21:55 +08:00
|
|
|
parser.add_argument(
|
|
|
|
"--num-workers-for-benchmark",
|
|
|
|
type=int,
|
|
|
|
default=12,
|
|
|
|
help="number of workers used to execute the benchmark",
|
|
|
|
)
|
|
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
"--query-count-lower-bound",
|
|
|
|
type=int,
|
|
|
|
default=300,
|
|
|
|
help="number of workers used to execute the benchmark (works only for isolated run)",
|
|
|
|
)
|
|
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
"--single-threaded-runtime-sec",
|
|
|
|
type=int,
|
|
|
|
default=30,
|
|
|
|
help="Duration of single threaded benchmark per query (works only for isolated run)",
|
|
|
|
)
|
|
|
|
|
2022-11-28 15:47:22 +08:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
2023-04-19 14:21:55 +08:00
|
|
|
def run_full_benchmarks(
|
|
|
|
vendor,
|
|
|
|
binary,
|
|
|
|
dataset,
|
|
|
|
dataset_size,
|
|
|
|
dataset_group,
|
|
|
|
realistic,
|
|
|
|
mixed,
|
|
|
|
workers,
|
|
|
|
query_count_lower_bound,
|
|
|
|
single_threaded_runtime_sec,
|
|
|
|
):
|
2022-11-28 15:47:22 +08:00
|
|
|
configurations = [
|
2023-03-22 04:44:11 +08:00
|
|
|
# Basic isolated test cold
|
|
|
|
[
|
|
|
|
"--export-results",
|
2023-04-19 14:21:55 +08:00
|
|
|
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_cold_isolated.json",
|
2023-03-22 04:44:11 +08:00
|
|
|
],
|
|
|
|
# Basic isolated test hot
|
2022-11-28 15:47:22 +08:00
|
|
|
[
|
|
|
|
"--export-results",
|
2023-04-19 14:21:55 +08:00
|
|
|
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_hot_isolated.json",
|
2023-03-22 04:44:11 +08:00
|
|
|
"--warm-up",
|
|
|
|
"hot",
|
2022-11-28 15:47:22 +08:00
|
|
|
],
|
2023-03-22 04:44:11 +08:00
|
|
|
# Basic isolated test vulcanic
|
2022-11-28 15:47:22 +08:00
|
|
|
[
|
|
|
|
"--export-results",
|
2023-04-19 14:21:55 +08:00
|
|
|
vendor + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_vulcanic_isolated.json",
|
2023-03-22 04:44:11 +08:00
|
|
|
"--warm-up",
|
|
|
|
"vulcanic",
|
2022-11-28 15:47:22 +08:00
|
|
|
],
|
|
|
|
]
|
|
|
|
|
2023-09-23 01:05:16 +08:00
|
|
|
assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!"
|
|
|
|
|
2023-03-22 04:44:11 +08:00
|
|
|
if realistic:
|
|
|
|
# Configurations for full workload
|
|
|
|
for count, write, read, update, analytical in realistic:
|
|
|
|
cold = [
|
|
|
|
"--export-results",
|
|
|
|
vendor
|
|
|
|
+ "_"
|
2023-04-19 14:21:55 +08:00
|
|
|
+ str(workers)
|
|
|
|
+ "_"
|
2023-03-22 04:44:11 +08:00
|
|
|
+ dataset
|
|
|
|
+ "_"
|
|
|
|
+ dataset_size
|
|
|
|
+ "_cold_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
|
|
|
|
"--workload-realistic",
|
|
|
|
count,
|
|
|
|
write,
|
|
|
|
read,
|
|
|
|
update,
|
|
|
|
analytical,
|
|
|
|
]
|
|
|
|
|
|
|
|
hot = [
|
|
|
|
"--export-results",
|
|
|
|
vendor
|
|
|
|
+ "_"
|
2023-04-19 14:21:55 +08:00
|
|
|
+ str(workers)
|
|
|
|
+ "_"
|
2023-03-22 04:44:11 +08:00
|
|
|
+ dataset
|
|
|
|
+ "_"
|
|
|
|
+ dataset_size
|
|
|
|
+ "_hot_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
|
|
|
|
"--warm-up",
|
|
|
|
"hot",
|
|
|
|
"--workload-realistic",
|
|
|
|
count,
|
|
|
|
write,
|
|
|
|
read,
|
|
|
|
update,
|
|
|
|
analytical,
|
|
|
|
]
|
|
|
|
|
|
|
|
configurations.append(cold)
|
|
|
|
configurations.append(hot)
|
|
|
|
|
|
|
|
if mixed:
|
|
|
|
# Configurations for workload per query
|
|
|
|
for count, write, read, update, analytical, query in mixed:
|
|
|
|
cold = [
|
|
|
|
"--export-results",
|
|
|
|
vendor
|
|
|
|
+ "_"
|
2023-04-19 14:21:55 +08:00
|
|
|
+ str(workers)
|
|
|
|
+ "_"
|
2023-03-22 04:44:11 +08:00
|
|
|
+ dataset
|
|
|
|
+ "_"
|
|
|
|
+ dataset_size
|
|
|
|
+ "_cold_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
|
|
|
|
"--workload-mixed",
|
|
|
|
count,
|
|
|
|
write,
|
|
|
|
read,
|
|
|
|
update,
|
|
|
|
analytical,
|
|
|
|
query,
|
|
|
|
]
|
|
|
|
hot = [
|
|
|
|
"--export-results",
|
|
|
|
vendor
|
|
|
|
+ "_"
|
2023-04-19 14:21:55 +08:00
|
|
|
+ str(workers)
|
|
|
|
+ "_"
|
2023-03-22 04:44:11 +08:00
|
|
|
+ dataset
|
|
|
|
+ "_"
|
|
|
|
+ dataset_size
|
|
|
|
+ "_hot_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
|
|
|
|
"--warm-up",
|
|
|
|
"hot",
|
|
|
|
"--workload-mixed",
|
|
|
|
count,
|
|
|
|
write,
|
|
|
|
read,
|
|
|
|
update,
|
|
|
|
analytical,
|
|
|
|
query,
|
|
|
|
]
|
|
|
|
|
|
|
|
configurations.append(cold)
|
|
|
|
configurations.append(hot)
|
2022-11-28 15:47:22 +08:00
|
|
|
|
|
|
|
default_args = [
|
|
|
|
"python3",
|
|
|
|
"benchmark.py",
|
2023-04-19 14:21:55 +08:00
|
|
|
"vendor-native",
|
2022-11-28 15:47:22 +08:00
|
|
|
"--vendor-binary",
|
|
|
|
binary,
|
|
|
|
"--vendor-name",
|
|
|
|
vendor,
|
|
|
|
"--num-workers-for-benchmark",
|
2023-04-19 14:21:55 +08:00
|
|
|
str(workers),
|
|
|
|
"--single-threaded-runtime-sec",
|
|
|
|
str(single_threaded_runtime_sec),
|
|
|
|
"--query-count-lower-bound",
|
|
|
|
str(query_count_lower_bound),
|
2022-11-28 15:47:22 +08:00
|
|
|
"--no-authorization",
|
2023-03-22 04:44:11 +08:00
|
|
|
dataset + "/" + dataset_size + "/" + dataset_group + "/*",
|
2022-11-28 15:47:22 +08:00
|
|
|
]
|
|
|
|
|
|
|
|
for config in configurations:
|
|
|
|
full_config = default_args + config
|
|
|
|
print(full_config)
|
|
|
|
subprocess.run(args=full_config, check=True)
|
|
|
|
|
|
|
|
|
2023-04-19 14:21:55 +08:00
|
|
|
def collect_all_results(vendor_name, dataset, dataset_size, dataset_group, workers):
|
2022-11-28 15:47:22 +08:00
|
|
|
working_directory = Path().absolute()
|
|
|
|
print(working_directory)
|
2023-04-19 14:21:55 +08:00
|
|
|
results = sorted(
|
|
|
|
working_directory.glob(vendor_name + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_*.json")
|
|
|
|
)
|
2023-03-22 04:44:11 +08:00
|
|
|
summary = {dataset: {dataset_size: {dataset_group: {}}}}
|
2022-11-28 15:47:22 +08:00
|
|
|
|
|
|
|
for file in results:
|
|
|
|
if "summary" in file.name:
|
|
|
|
continue
|
|
|
|
f = file.open()
|
|
|
|
data = json.loads(f.read())
|
|
|
|
if data["__run_configuration__"]["condition"] == "hot":
|
2023-03-22 04:44:11 +08:00
|
|
|
for key, value in data[dataset][dataset_size][dataset_group].items():
|
2022-11-28 15:47:22 +08:00
|
|
|
key_condition = key + "_hot"
|
2023-03-22 04:44:11 +08:00
|
|
|
summary[dataset][dataset_size][dataset_group][key_condition] = value
|
2022-11-28 15:47:22 +08:00
|
|
|
elif data["__run_configuration__"]["condition"] == "cold":
|
2023-03-22 04:44:11 +08:00
|
|
|
for key, value in data[dataset][dataset_size][dataset_group].items():
|
2022-11-28 15:47:22 +08:00
|
|
|
key_condition = key + "_cold"
|
2023-03-22 04:44:11 +08:00
|
|
|
summary[dataset][dataset_size][dataset_group][key_condition] = value
|
|
|
|
elif data["__run_configuration__"]["condition"] == "vulcanic":
|
|
|
|
for key, value in data[dataset][dataset_size][dataset_group].items():
|
|
|
|
key_condition = key + "_vulcanic"
|
|
|
|
summary[dataset][dataset_size][dataset_group][key_condition] = value
|
2022-11-28 15:47:22 +08:00
|
|
|
print(summary)
|
|
|
|
|
|
|
|
json_object = json.dumps(summary, indent=4)
|
|
|
|
print(json_object)
|
2023-04-19 14:21:55 +08:00
|
|
|
with open(vendor_name + "_" + str(workers) + "_" + dataset + "_" + dataset_size + "_summary.json", "w") as f:
|
2022-11-28 15:47:22 +08:00
|
|
|
json.dump(summary, f)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
args = parse_arguments()
|
|
|
|
|
|
|
|
realistic = args.realistic
|
|
|
|
mixed = args.mixed
|
|
|
|
|
|
|
|
vendor_names = {"memgraph", "neo4j"}
|
|
|
|
for vendor_name, vendor_binary in args.vendor:
|
|
|
|
path = Path(vendor_binary)
|
2023-03-22 04:44:11 +08:00
|
|
|
if vendor_name.lower() in vendor_names and path.is_file():
|
2022-11-28 15:47:22 +08:00
|
|
|
run_full_benchmarks(
|
|
|
|
vendor_name,
|
|
|
|
vendor_binary,
|
2023-03-22 04:44:11 +08:00
|
|
|
args.dataset_name,
|
2022-11-28 15:47:22 +08:00
|
|
|
args.dataset_size,
|
|
|
|
args.dataset_group,
|
|
|
|
realistic,
|
|
|
|
mixed,
|
2023-04-19 14:21:55 +08:00
|
|
|
args.num_workers_for_benchmark,
|
|
|
|
args.query_count_lower_bound,
|
|
|
|
args.single_threaded_runtime_sec,
|
|
|
|
)
|
|
|
|
collect_all_results(
|
|
|
|
vendor_name, args.dataset_name, args.dataset_size, args.dataset_group, args.num_workers_for_benchmark
|
2022-11-28 15:47:22 +08:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
raise Exception(
|
|
|
|
"Check that vendor: {} is supported and you are passing right path: {} to binary.".format(
|
|
|
|
vendor_name, path
|
|
|
|
)
|
|
|
|
)
|