memgraph/tests/macro_benchmark/harness.py

289 lines
11 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import os
import time
import itertools
import json
import subprocess
from argparse import ArgumentParser
from collections import OrderedDict
from common import get_absolute_path
from query_suite import QuerySuite, QueryParallelSuite
from long_running_suite import LongRunningSuite
log = logging.getLogger(__name__)
try:
import jail
APOLLO = True
except:
import jail_faker as jail
APOLLO = False
class Loader:
"""
Loads file contents. Supported types are:
.py - executable that prints out Cypher queries
.cypher - contains Cypher queries in textual form
.json - contains a configuration
A QueryLoader object is callable.
A call to it returns a generator that yields loaded data
(Cypher queries, configuration). In that sense one
QueryLoader is reusable. The generator approach makes it possible
to generated different queries each time when executing a .py file.
"""
def __init__(self, file_path):
self.file_path = file_path
def _queries(self, data):
""" Helper function for breaking down and filtering queries"""
for element in filter(lambda x: x is not None,
map(str.strip, data.replace("\n", " ").split(";"))):
yield element
def __call__(self):
""" Yields queries found in the given file_path one by one """
log.debug("Generating queries from file_path: %s",
self.file_path)
_, extension = os.path.splitext(self.file_path)
if extension == ".cypher":
with open(self.file_path) as f:
return self._queries(f.read())
elif extension == ".py":
return self._queries(subprocess.check_output(
["python3", self.file_path]).decode("ascii"))
elif extension == ".json":
with open(self.file_path) as f:
return [json.load(f)].__iter__()
else:
raise Exception("Unsupported filetype {} ".format(extension))
def __repr__(self):
return "(Loader<%s>)" % self.file_path
def load_scenarios(args, known_keys, suite_groups):
"""
Scans through folder structure starting with groups_root and
loads query scenarios.
Expected folder structure is:
groups_root/
groupname1/
config.json
common.py
setup.FILE_TYPE
teardown.FILE_TYPE
itersetup.FILE_TYPE
iterteardown.FILE_TYPE
scenario1.config.json
scenario1.run.FILE_TYPE-------(mandatory)
scenario1.setup.FILE_TYPE
scenario1.teardown.FILE_TYPE
scenario1.itersetup.FILE_TYPE
scenario1.iterteardown.FILE_TYPE
scenario2...
...
groupname2/
...
Per query configs (setup, teardown, itersetup, iterteardown)
override group configs for that scenario. Group configs must have one
extension (.FILE_TYPE) and
scenario configs must have 2 extensions (.scenario_name.FILE_TYPE).
Each suite doesn't need to implement all query steps and filetypes.
See documentation in each suite for supported ones.
Args:
args: additional args parsed by this function
group_paths: str, root folder that contains group folders
Return:
{group: (scenario, {config: query_generator_function})
"""
argp = ArgumentParser("QuerySuite.scenarios argument parser")
argp.add_argument("--query-scenarios-root",
default=get_absolute_path("groups"), dest="root")
args, _ = argp.parse_known_args()
log.info("Loading query scenarios from root: %s", args.root)
def fill_config_dict(config_dict, base, config_files):
for config_file in config_files:
log.debug("Processing config file %s", config_file)
config_name = config_file.split(".")[-2]
config_dict[config_name] = Loader(os.path.join(base, config_file))
# Validate that the scenario does not contain any illegal keys (defense
# against typos in file naming).
unknown_keys = set(config_dict) - known_keys
if unknown_keys:
raise Exception("Unknown QuerySuite config elements: '%r'" %
unknown_keys)
def dir_content(root, predicate):
return [p for p in os.listdir(root)
if predicate(os.path.join(root, p))]
group_scenarios = OrderedDict()
for group in dir_content(args.root, os.path.isdir):
if group not in suite_groups: continue
log.info("Loading group: '%s'", group)
group_scenarios[group] = []
# Filter out hidden files: .gitignore, ...
files = dir_content(os.path.join(args.root, group),
lambda x: os.path.isfile(x) and os.path.basename(x)[0] != ".")
# Process group default config.
group_config = {}
fill_config_dict(group_config, os.path.join(args.root, group),
[f for f in files if f.count(".") == 1])
# Group files on scenario.
for scenario_name, scenario_files in itertools.groupby(
filter(lambda f: f.count(".") == 2, sorted(files)),
lambda x: x.split(".")[0]):
log.info("Loading scenario: '%s'", scenario_name)
scenario = dict(group_config)
fill_config_dict(scenario,
os.path.join(args.root, group),
scenario_files)
group_scenarios[group].append((scenario_name, scenario))
log.debug("Loaded config for scenario '%s'\n%r", scenario_name,
scenario)
return group_scenarios
def main():
argp = ArgumentParser(description=__doc__)
# positional, mandatory args
argp.add_argument("suite", help="Suite to run.")
argp.add_argument("runner", help="Engine to use.")
# named, optional arguments
argp.add_argument("--groups", nargs="+", help="Groups to run. If none are"
" provided, all available grups are run.")
argp.add_argument("--scenarios", nargs="+", help="Scenarios to run. If "
"none are provided, all available are run.")
argp.add_argument("--logging", default="INFO", choices=["INFO", "DEBUG"],
help="Logging level")
argp.add_argument("--additional-run-fields", default={}, type=json.loads,
help="Additional fields to add to the 'run', in JSON")
argp.add_argument("--no-strict", default=False, action="store_true",
help="Ignores nonexisting groups instead of raising an "
"exception")
args, remaining_args = argp.parse_known_args()
if args.logging:
logging.basicConfig(level=args.logging)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("neo4j.bolt").setLevel(logging.WARNING)
log.info("Memgraph benchmark suite harness")
log.info("Executing for suite '%s', runner '%s'", args.suite, args.runner)
# Create suites.
suites = {"QuerySuite": QuerySuite,
"QueryParallelSuite": QueryParallelSuite,
"LongRunningSuite": LongRunningSuite}
if args.suite not in suites:
raise Exception(
"Suite '{}' isn't registered. Registered suites are: {}".format(
args.suite, suites))
suite = suites[args.suite](remaining_args)
# Load scenarios.
group_scenarios = load_scenarios(
remaining_args, suite.KNOWN_KEYS, suite.groups())
log.info("Loaded %d groups, with a total of %d scenarios",
len(group_scenarios),
sum([len(x) for x in group_scenarios.values()]))
# Create runner.
runners = suite.runners()
if args.runner not in runners:
raise Exception("Runner '{}' not registered for suite '{}'".format(
args.runner, args.suite))
runner = runners[args.runner](remaining_args)
# Validate groups (if provided).
groups = []
if args.groups:
for group in args.groups:
if group not in suite.groups():
msg = "Group '{}' isn't registered for suite '{}'".format(
group, suite)
if args.no_strict:
log.warn(msg)
else:
raise Exception(msg)
else:
groups.append(group)
else:
# No groups provided, use all suite group
groups = suite.groups()
# Filter scenarios.
# TODO enable scenario filtering on regex
filtered_scenarios = OrderedDict()
for group, scenarios in group_scenarios.items():
if group not in groups:
log.info("Skipping group '%s'", group)
continue
for scenario_name, scenario in scenarios:
if args.scenarios and scenario_name not in args.scenarios:
continue
filtered_scenarios[(group, scenario_name)] = scenario
if len(filtered_scenarios) == 0:
log.info("No scenarios to execute")
return
# Run scenarios.
log.info("Executing %d scenarios", len(filtered_scenarios))
results = []
for (group, scenario_name), scenario in sorted(filtered_scenarios.items()):
log.info("Executing group.scenario '%s.%s' with elements %s",
group, scenario_name, list(scenario.keys()))
for iter_result in suite.run(scenario, group, scenario_name, runner):
iter_result["group"] = group
iter_result["scenario"] = scenario_name
results.append(iter_result)
# Save results.
run = dict()
run["suite"] = args.suite
run["runner"] = runner.__class__.__name__
run["runner_config"] = vars(runner.args)
run.update(args.additional_run_fields)
# Currently this output is not used anywhere, and has a tendancy to create huge files..
# TODO(dgleich): Revise this in the near future.
#for result in results:
# jail.store_data(result)
# Print summary.
print("\n\nMacro benchmark summary:")
print("{}\n".format(suite.summary))
with open(get_absolute_path(".harness_summary"), "w") as f:
print(suite.summary, file=f)
# Export data points.
with open(get_absolute_path(".apollo_data"), "w") as f:
apollo_data = ""
data = list(filter(lambda x: x.strip(), suite.summary.split("\n")))
headers = data[0].strip().split()
for row in data[1:]:
row = row.strip().split()
group, scenario = row[0:2]
for header, value in zip(headers[2:], row[2:]):
apollo_data += "{}.{}.{} {}\n".format(group, scenario, header, value)
f.write(apollo_data)
if __name__ == "__main__":
main()