Harness MATCH tests refactored

Summary:
1. Test setup rewritten to take cca 8 seconds. Note that edges are created by using:
`MATCH (a) WITH a MATCH (b) WITH b WHERE rand() < X CREATE (a)-[:ET]->(b)`
Where `X` is a threshold calculated so the desired edge count is the expectation. This seems the only feasable way of generating a large number of edges since query execution does not depend on edge count, but on vertex count.

2. Using the new `assert` function to verify graph state. I recommend doing that in all the harness tests (I don't think we currently have something better).

3. All tests rewritten to take around 200ms per iteration.

4. Test are using SKIP to avoid sending data to the client, but ensure that appropriate operations get executed. This currently seems like the best way of removing unwanted side-effects.

Harness will cost us our sanity. And it doesn't even provide good quality regression testing we really need :(

Reviewers: buda, mislav.bradac, mferencevic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D752
This commit is contained in:
florijan 2017-09-05 13:54:28 +02:00
parent 020afd7f4b
commit be16409da2
15 changed files with 57 additions and 39 deletions

View File

@ -1,3 +1,3 @@
{ {
"iterations": 3 "iterations": 10
} }

View File

@ -0,0 +1 @@
MATCH (n)-[r1]->(m)-[r2]->(n) RETURN r1 SKIP 100000000

View File

@ -1 +0,0 @@
print("MATCH (n)-[r1]->(m)-[r2]->(n) RETURN *")

View File

@ -0,0 +1 @@
MATCH (n1)-[r1]->(n2)-[r2]->(n3)-[r3]->(n4)<-[r4]-(n5) RETURN n5 SKIP 100000 LIMIT 1

View File

@ -1 +0,0 @@
print("MATCH (n1)-[r1]->(n2)<-[r2]-(n3)-[r3]->(n4) RETURN *")

View File

@ -0,0 +1 @@
UNWIND range(0, 5) AS i MATCH (n)-[r]->(m) RETURN r SKIP 10000000

View File

@ -1 +0,0 @@
print("MATCH (n)-[r]->(m) RETURN *")

View File

@ -1,3 +0,0 @@
from setup import ID, rint, VERTEX_COUNT
print("MATCH (n)-[r]->(m) WHERE n.%s = %d AND m.%s = %d RETURN *" % (
ID, rint(VERTEX_COUNT), ID, rint(VERTEX_COUNT)))

View File

@ -8,25 +8,29 @@ from random import randint
def rint(upper_bound_exclusive): def rint(upper_bound_exclusive):
return randint(0, upper_bound_exclusive - 1) return randint(0, upper_bound_exclusive - 1)
VERTEX_COUNT = 10000 VERTEX_COUNT = 1500
EDGE_COUNT = VERTEX_COUNT * 3 EDGE_COUNT = VERTEX_COUNT * 15
# numbers of *different* labels, edge types and properties # numbers of *different* labels, edge types and properties
LABEL_COUNT = 10 LABEL_COUNT = 10
EDGE_TYPE_COUNT = 10
MAX_LABELS = 3 # maximum number of labels in a vertex MAX_LABELS = 5 # maximum number of labels in a vertex
MAX_PROPS = 4 # maximum number of properties in a vertex/edge MAX_PROPS = 4 # maximum number of properties in a vertex/edge
MAX_PROP_VALUE = 1000
# some consts used in mutiple files # some consts used in mutiple files
LABEL_INDEX = "LabelIndex"
LABEL_PREFIX = "Label" LABEL_PREFIX = "Label"
PROP_PREFIX = "Property" PROP_PREFIX = "Prop"
ID = "id" ID = "id"
def labels(): def labels():
return "".join(":%s%d" % (LABEL_PREFIX, rint(LABEL_COUNT)) labels = ":" + LABEL_INDEX
for _ in range(randint(1, MAX_LABELS - 1))) for _ in range(rint(MAX_LABELS)):
labels += ":" + LABEL_PREFIX + str(rint(LABEL_COUNT))
return labels
def properties(id): def properties(id):
@ -34,22 +38,22 @@ def properties(id):
Note that if PropX is generated, then all the PropY where Y < X Note that if PropX is generated, then all the PropY where Y < X
are generated. Thus most labels have Prop0, and least have PropMAX_PROPS. are generated. Thus most labels have Prop0, and least have PropMAX_PROPS.
""" """
return "{%s: %d, %s}" % (ID, id, ",".join( props = {"%s%d" % (PROP_PREFIX, i): rint(MAX_PROP_VALUE)
["%s%d: %d" % (PROP_PREFIX, prop_ind, rint(100)) for i in range(rint(MAX_PROPS))}
for prop_ind in range(randint(1, MAX_PROPS - 1))])) props[ID] = id
return "{" + ", ".join("%s: %s" % kv for kv in props.items()) + "}"
def vertex(vertex_index): def vertex(vertex_index):
return "(%s %s)" % (labels(), properties(vertex_index)) return "(%s %s)" % (labels(), properties(vertex_index))
def edge(edge_index):
return "[:EdgeType%d %s]" % (rint(EDGE_TYPE_COUNT), properties(edge_index))
def main(): def main():
# create an index to speed setup up
print("CREATE INDEX ON :%s(%s);" % (LABEL_INDEX, ID))
# we batch CREATEs because to speed creation up # we batch CREATEs because to speed creation up
BATCH_SIZE = 50 BATCH_SIZE = 30
# create vertices # create vertices
for vertex_index in range(VERTEX_COUNT): for vertex_index in range(VERTEX_COUNT):
@ -57,14 +61,14 @@ def main():
if (vertex_index != 0 and vertex_index % BATCH_SIZE == 0) or \ if (vertex_index != 0 and vertex_index % BATCH_SIZE == 0) or \
vertex_index + 1 == VERTEX_COUNT: vertex_index + 1 == VERTEX_COUNT:
print(";") print(";")
print("MATCH (n) RETURN assert(count(n) = %d);" % VERTEX_COUNT)
# create edges # create edges
for edge_index in range(EDGE_COUNT): print("MATCH (a) WITH a MATCH (b) WITH a, b WHERE rand() < %f "
print("MATCH (a {%s: %d}), (b {%s: %d}) MERGE (a)-%s->(b)" % ( " CREATE (a)-[:EdgeType]->(b);" % (EDGE_COUNT / VERTEX_COUNT ** 2))
ID, randint(0, VERTEX_COUNT - 1), print("MATCH (n)-[r]->() WITH count(r) AS c "
ID, randint(0, VERTEX_COUNT - 1), "RETURN assert(c >= %d AND c <= %d);" % (
edge(edge_index))) EDGE_COUNT * 0.98, EDGE_COUNT * 1.02))
print(";")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,5 @@
from setup import LABEL_INDEX, ID, VERTEX_COUNT, rint
print("UNWIND range(0, 10000) AS i "
"MATCH (n:%s {%s: %d}) RETURN n SKIP 1000000" % (
LABEL_INDEX, ID, rint(VERTEX_COUNT)))

View File

@ -1,2 +1,5 @@
from setup import LABEL_COUNT, rint from setup import LABEL_COUNT, LABEL_PREFIX
print("MATCH (n:Label%d) RETURN n" % rint(LABEL_COUNT))
for i in range(LABEL_COUNT):
print("UNWIND range(0, 30) AS i MATCH (n:%s%d) "
"RETURN n SKIP 1000000;" % (LABEL_PREFIX, i))

View File

@ -1,3 +1,5 @@
from setup import LABEL_PREFIX, PROP_PREFIX, MAX_PROPS, LABEL_COUNT, rint from setup import LABEL_PREFIX, PROP_PREFIX, MAX_PROPS, MAX_PROP_VALUE, LABEL_COUNT, rint
print("MATCH (n:%s%d {%s%d: %d}) RETURN n" % (
LABEL_PREFIX, rint(LABEL_COUNT), PROP_PREFIX, rint(MAX_PROPS), rint(10))) for i in range(LABEL_COUNT):
print("UNWIND range(0, 50) AS i MATCH (n:%s%d {%s%d: %d}) RETURN n SKIP 10000;" % (
LABEL_PREFIX, i, PROP_PREFIX, rint(MAX_PROPS), rint(MAX_PROP_VALUE)))

View File

@ -1,3 +1,4 @@
from setup import PROP_PREFIX, MAX_PROPS, rint from setup import PROP_PREFIX, MAX_PROPS, rint, MAX_PROP_VALUE
print("MATCH (n {%s%d: %d}) RETURN n" % (
PROP_PREFIX, rint(MAX_PROPS), rint(10))) print("UNWIND range(0, 50) AS i MATCH (n {%s%d: %d}) RETURN n SKIP 10000" % (
PROP_PREFIX, rint(MAX_PROPS), rint(MAX_PROP_VALUE)))

View File

@ -217,8 +217,14 @@ class _QuerySuite:
def execute(config_name, num_client_workers=1): def execute(config_name, num_client_workers=1):
queries = scenario.get(config_name) queries = scenario.get(config_name)
return runner.execute(queries(), num_client_workers) if queries \ start_time = time.time()
else None if queries:
r_val = runner.execute(queries(), num_client_workers)
else:
r_val = None
log.info("\t%s done in %.2f seconds" % (config_name,
time.time() - start_time))
return r_val
measurements = [] measurements = []

View File

@ -193,7 +193,7 @@ binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")
# macro benchmark tests # macro benchmark tests
MACRO_BENCHMARK_ARGS = ( MACRO_BENCHMARK_ARGS = (
"QuerySuite MemgraphRunner " "QuerySuite MemgraphRunner "
"--groups aggregation 1000_create unwind_create dense_expand " "--groups aggregation 1000_create unwind_create dense_expand match "
"--no-strict") "--no-strict")
macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark") macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests", harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests",