Fix delete and aggregation benchmarks

Reviewers: buda

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D539
This commit is contained in:
Mislav Bradac 2017-07-11 17:45:34 +02:00
parent 955962d036
commit ec32ae8bad
10 changed files with 46 additions and 23 deletions

View File

@ -1,3 +1,3 @@
{
"iterations": 20
"iterations": 5
}

View File

@ -1,8 +1,9 @@
BATCH_SIZE = 50
VERTEX_COUNT = 500
BATCH_SIZE = 100
VERTEX_COUNT = 50000
for i in range(VERTEX_COUNT):
print("CREATE (n%d {x: %d})" % (i, i))
# batch CREATEs because we can't execute all at once
if i != 0 and i % BATCH_SIZE == 0:
if (i != 0 and i % BATCH_SIZE == 0) or \
(i + 1 == VERTEX_COUNT):
print(";")

View File

@ -1,3 +1,3 @@
{
"iterations": 20
"iterations": 3
}

View File

@ -1,3 +1,3 @@
from setup import create_edges
create_edges(400, 200)
create_edges(50000, 1000)

View File

@ -1,3 +1,3 @@
from setup import create_vertices
create_vertices(200)
create_vertices(1000)

View File

@ -3,7 +3,7 @@
from random import randint
BATCH_SIZE = 50
BATCH_SIZE = 100
def create_vertices(vertex_count):
@ -16,10 +16,19 @@ def create_vertices(vertex_count):
def create_edges(edge_count, vertex_count):
""" vertex_count is the number of already existing vertices in graph """
matches = []
merges = []
for edge in range(edge_count):
print("MATCH (a {id: %d}), (b {id: %d}) MERGE (a)-[:Type]->(b)" % (
randint(0, vertex_count - 1), randint(0, vertex_count - 1)))
print(";")
matches.append("MATCH (a%d {id: %d}), (b%d {id: %d})" %
(edge, randint(0, vertex_count - 1),
edge, randint(0, vertex_count - 1)))
merges.append("CREATE (a%d)-[:Type]->(b%d)" % (edge, edge))
if (edge != 0 and edge % BATCH_SIZE == 0) or \
((edge + 1) == edge_count):
print(" ".join(matches + merges))
print(";")
matches = []
merges = []
if __name__ == '__main__':

View File

@ -1,3 +1,3 @@
from setup import create_vertices
create_vertices(200)
create_vertices(50000)

View File

@ -1,4 +1,4 @@
from setup import create_vertices, create_edges
create_vertices(200)
create_edges(400, 200)
create_vertices(1000)
create_edges(50000, 1000)

View File

@ -57,7 +57,6 @@ def main():
argp = ArgumentParser("Bolt client execution process")
# positional args
argp.add_argument("db_uri")
argp.add_argument("queries", nargs="*")
# named, optional
argp.add_argument("--encrypt", action="store_true")
@ -70,6 +69,8 @@ def main():
_print_dict({RETURN_CODE: 1, ERROR_MSG: "Invalid cmd-line arguments"})
sys.exit(1)
queries = sys.stdin.read().split("\n")
driver = GraphDatabase.driver(
args.db_uri,
auth=basic_auth("", ""),
@ -80,7 +81,7 @@ def main():
# execute the queries
metadatas = []
start = time.time()
for query in args.queries:
for query in queries:
result = session.run(query)
metadatas.append(result.summary().metadata)
end = time.time()
@ -88,8 +89,8 @@ def main():
_print_dict({
RETURN_CODE: 0,
WALL_TIME: (None if not args.queries else
delta_time / float(len(args.queries))),
WALL_TIME: (None if not queries else
delta_time / float(len(queries))),
"metadatas": metadatas
})

View File

@ -12,6 +12,7 @@ from subprocess import check_output
from argparse import ArgumentParser
from collections import OrderedDict
from collections import defaultdict
import tempfile
import jail_faker as jail
from bolt_client import WALL_TIME
@ -60,7 +61,8 @@ class QuerySuite():
def _queries(self, data):
""" Helper function for breaking down and filtering queries"""
for element in filter(None, map(str.strip, data.split(";"))):
for element in filter(
None, map(str.strip, data.replace("\n", " ").split(";"))):
yield element
def __call__(self):
@ -200,7 +202,7 @@ class QuerySuite():
# warmup phase
for _ in range(min(scenario_config.get("iterations", 1),
scenario_config.get("warmup", 5))):
scenario_config.get("warmup", 3))):
execute("itersetup")
execute("run")
execute("iterteardown")
@ -217,7 +219,7 @@ class QuerySuite():
# most likely run faster
execute("itersetup")
# TODO measure CPU time (expose it from the runner)
run_result = execute('run')
run_result = execute("run")
assert len(run_result.get("metadatas", [])), \
"Scenario run must have exactly one query"
add_measurement(run_result, iteration, WALL_TIME)
@ -304,12 +306,22 @@ class MemgraphRunner:
log.debug("MemgraphRunner.execute('%s')", str(queries))
client_args = [path.join(path.dirname(__file__), "bolt_client.py")]
client_args.append(self.args.MemgraphRunnerURI)
client_args += queries
if (self.args.MemgraphRunnerEncryptBolt):
client_args.append("--encrypt")
queries_fd, queries_path = tempfile.mkstemp()
try:
queries_file = os.fdopen(queries_fd, "w")
queries_file.write("\n".join(queries))
queries_file.close()
except:
queries_file.close()
os.remove(queries_path)
raise Exception("Writing queries to temporary file failed")
# TODO make the timeout configurable per query or something
return_code = self.bolt_client.run_and_wait(
"python3", client_args, timeout=120)
"python3", client_args, timeout=120, stdin=queries_path)
os.remove(queries_path)
if return_code != 0:
with open(self.bolt_client.get_stderr()) as f:
stderr = f.read()