Add first parallel benchmark

Reviewers: buda

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D557
This commit is contained in:
Mislav Bradac 2017-07-17 19:14:42 +02:00
parent 1ad3b7f906
commit fee59a7ec2
8 changed files with 96 additions and 53 deletions

View File

@ -0,0 +1,4 @@
{
"iterations": 5,
"num_client_workers": 16
}

View File

@ -0,0 +1 @@
print("MATCH (n) RETURN count(n), count(n.x);" * 128)

View File

@ -0,0 +1 @@
print("MATCH (n) RETURN min(n.x), max(n.x), avg(n.x);" * 128)

View File

@ -0,0 +1,9 @@
BATCH_SIZE = 100
VERTEX_COUNT = 10000
for i in range(VERTEX_COUNT):
print("CREATE (n%d {x: %d})" % (i, i))
# batch CREATEs because we can't execute all at once
if (i != 0 and i % BATCH_SIZE == 0) or \
(i + 1 == VERTEX_COUNT):
print(";")

View File

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
"""
A python script that launches the memgraph client,
@ -24,21 +24,27 @@ Note that 'metadata' are only valid if the return_code is 0
"""
import sys
import os
# tests/stress dir, that's the place of common.py.
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
os.path.realpath(__file__)))), "stress"))
import time
import json
from argparse import ArgumentParser
from contextlib import redirect_stderr
import io
from multiprocessing import Pool
from common import connection_argument_parser, execute_till_success, \
argument_session
from functools import partial
from neo4j.v1 import GraphDatabase, basic_auth
# string constants
RETURN_CODE = "return_code"
ERROR_MSG = "error_msg"
WALL_TIME = "wall_time"
def _prepare_for_json(obj):
if isinstance(obj, dict):
return {k: _prepare_for_json(v) for k, v in obj.items()}
@ -53,14 +59,16 @@ def _print_dict(d):
print(json.dumps(_prepare_for_json(d), indent=2))
def main():
argp = ArgumentParser("Bolt client execution process")
# positional args
argp.add_argument("db_uri")
# named, optional
argp.add_argument("--encrypt", action="store_true")
def _run_query(args, query):
with argument_session(args) as session:
return execute_till_success(session, query)[2]
# parse ags, ensure that stdout is not polluted by argument parsing
def main():
argp = connection_argument_parser()
argp.add_argument("--num-workers", type=int, default=1)
# Parse args and ensure that stdout is not polluted by argument parsing.
try:
f = io.StringIO()
with redirect_stderr(f):
@ -71,32 +79,20 @@ def main():
queries = sys.stdin.read().split("\n")
driver = GraphDatabase.driver(
args.db_uri,
auth=basic_auth("", ""),
encrypted=args.encrypt)
session = driver.session()
# execute the queries
# Execute the queries.
metadatas = []
start = time.time()
for query in queries:
result = session.run(query)
metadatas.append(result.summary().metadata)
end = time.time()
delta_time = end - start
with Pool(args.num_workers) as pool:
start = time.time()
metadatas = list(pool.map(partial(_run_query, args), queries))
end = time.time()
delta_time = end - start
_print_dict({
RETURN_CODE: 0,
WALL_TIME: (None if not queries else
delta_time / float(len(queries))),
WALL_TIME: (None if not queries else delta_time),
"metadatas": metadatas
})
session.close()
driver.close()
if __name__ == '__main__':
main()

View File

@ -21,7 +21,7 @@ from perf import Perf
log = logging.getLogger(__name__)
class QuerySuite:
class _QuerySuite:
"""
Executes a Query-based benchmark scenario. Query-based scenarios
consist of setup steps (Cypher queries) executed before the benchmark,
@ -179,9 +179,10 @@ class QuerySuite:
scenario_config = scenario.get("config")
scenario_config = next(scenario_config()) if scenario_config else {}
def execute(config_name):
def execute(config_name, num_client_workers=1):
queries = scenario.get(config_name)
return runner.execute(queries()) if queries else None
return runner.execute(queries(), num_client_workers) if queries \
else None
measurements = []
@ -206,7 +207,7 @@ class QuerySuite:
for _ in range(min(scenario_config.get("iterations", 1),
scenario_config.get("warmup", 3))):
execute("itersetup")
execute("run")
execute("run", scenario_config.get("num_client_workers", 1))
execute("iterteardown")
if self.perf:
@ -221,16 +222,16 @@ class QuerySuite:
# most likely run faster
execute("itersetup")
# TODO measure CPU time (expose it from the runner)
run_result = execute("run")
assert len(run_result.get("metadatas", [])), \
"Scenario run must have exactly one query"
run_result = execute("run",
scenario_config.get("num_client_workers", 1))
add_measurement(run_result, iteration, WALL_TIME)
add_measurement(run_result["metadatas"][0], iteration,
"query_parsing_time")
add_measurement(run_result["metadatas"][0], iteration,
"query_plan_execution_time")
add_measurement(run_result["metadatas"][0], iteration,
"query_planning_time")
if len(run_result.get("metadatas", [])) == 1:
add_measurement(run_result["metadatas"][0], iteration,
"query_parsing_time")
add_measurement(run_result["metadatas"][0], iteration,
"query_plan_execution_time")
add_measurement(run_result["metadatas"][0], iteration,
"query_planning_time")
execute("iterteardown")
if self.perf:
@ -258,14 +259,38 @@ class QuerySuite:
def runners(self):
""" Which runners can execute a QuerySuite scenario """
return ["MemgraphRunner"]
assert False, "This is a base class, use one of derived suites"
def groups(self):
""" Which groups can be executed by a QuerySuite scenario """
assert False, "This is a base class, use one of derived suites"
return ["create", "match", "expression", "aggregation", "return",
"update", "delete", "hardcoded"]
class QuerySuite(_QuerySuite):
def __init__(self, args):
_QuerySuite.__init__(self, args)
def runners(self):
return ["MemgraphRunner"]
def groups(self):
return ["create", "match", "expression", "aggregation", "return",
"update", "delete"]
class QueryParallelSuite(_QuerySuite):
def __init__(self, args):
_QuerySuite.__init__(self, args)
def runners(self):
return ["MemgraphRunner"]
def groups(self):
return ["aggregation_parallel"]
class MemgraphRunner:
"""
Knows how to start and stop Memgraph (backend) some client frontent
@ -287,7 +312,7 @@ class MemgraphRunner:
default=os.path.join(os.path.dirname(__file__),
"../../../build/memgraph"))
argp.add_argument("--MemgraphRunnerConfig", required=False)
argp.add_argument("--MemgraphRunnerURI", default="bolt://localhost:7687")
argp.add_argument("--MemgraphRunnerURI", default="localhost:7687")
argp.add_argument("--MemgraphRunnerEncryptBolt", action="store_true")
self.args, _ = argp.parse_known_args(args)
@ -304,12 +329,13 @@ class MemgraphRunner:
time.sleep(1.0)
return self.memgraph_bin.get_pid()
def execute(self, queries):
def execute(self, queries, num_client_workers):
log.debug("MemgraphRunner.execute('%s')", str(queries))
client_args = [path.join(path.dirname(__file__), "bolt_client.py")]
client_args.append(self.args.MemgraphRunnerURI)
if (self.args.MemgraphRunnerEncryptBolt):
client_args.append("--encrypt")
client_args += ["--endpoint", self.args.MemgraphRunnerURI]
client_args += ["--num-workers", str(num_client_workers)]
if self.args.MemgraphRunnerEncryptBolt:
client_args.append("--ssl-enabled")
queries_fd, queries_path = tempfile.mkstemp()
try:
queries_file = os.fdopen(queries_fd, "w")
@ -370,7 +396,7 @@ def main():
log.info("Executing for suite '%s', runner '%s'", args.suite, args.runner)
# Create suite
suites = {"QuerySuite": QuerySuite}
suites = {"QuerySuite": QuerySuite, "QueryParallelSuite": QueryParallelSuite}
if args.suite not in suites:
raise Exception(
"Suite '{}' isn't registered. Registered suites are: {}".format(
@ -413,7 +439,7 @@ def main():
continue
filtered_scenarios[(group, scenario_name)] = scenario
if (len(filtered_scenarios) == 0):
if len(filtered_scenarios) == 0:
log.info("No scenarios to execute")
return

View File

@ -68,7 +68,7 @@ if __name__ == "__main__":
received = {k: v for (k, v) in data.items()}
else:
received = data
# get expected elements
expected = expected[0]

View File

@ -77,7 +77,13 @@ def execute_till_success(session, query, max_retries=1000):
no_failures = 0
while True:
try:
return session.run(query).data(), no_failures
result = session.run(query)
# neo4.Address object can't be pickled so we need to convert it to
# str in metadata dictionary. This is important so that we can use
# this function in multiprocessing.Pool.map.
metadata = {k: str(v) for k, v in
result.summary().metadata.items()}
return result.data(), no_failures, metadata
except Exception:
no_failures += 1
if no_failures >= max_retries:
@ -146,7 +152,7 @@ def connection_argument_parser():
help='DBMS instance username.')
parser.add_argument('--password', type=int, default='1234',
help='DBMS instance password.')
parser.add_argument('--ssl-enabled', action='store_false',
parser.add_argument('--ssl-enabled', action='store_true',
help="Is SSL enabled?")
return parser