be16409da2
Summary: 1. Test setup rewritten to take cca 8 seconds. Note that edges are created by using: `MATCH (a) WITH a MATCH (b) WITH b WHERE rand() < X CREATE (a)-[:ET]->(b)` Where `X` is a threshold calculated so the desired edge count is the expectation. This seems the only feasable way of generating a large number of edges since query execution does not depend on edge count, but on vertex count. 2. Using the new `assert` function to verify graph state. I recommend doing that in all the harness tests (I don't think we currently have something better). 3. All tests rewritten to take around 200ms per iteration. 4. Test are using SKIP to avoid sending data to the client, but ensure that appropriate operations get executed. This currently seems like the best way of removing unwanted side-effects. Harness will cost us our sanity. And it doesn't even provide good quality regression testing we really need :( Reviewers: buda, mislav.bradac, mferencevic Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D752
76 lines
2.1 KiB
Python
76 lines
2.1 KiB
Python
"""
|
|
Generates a random graph with some configurable statistics.
|
|
"""
|
|
|
|
from random import randint
|
|
|
|
|
|
def rint(upper_bound_exclusive):
|
|
return randint(0, upper_bound_exclusive - 1)
|
|
|
|
VERTEX_COUNT = 1500
|
|
EDGE_COUNT = VERTEX_COUNT * 15
|
|
|
|
# numbers of *different* labels, edge types and properties
|
|
LABEL_COUNT = 10
|
|
|
|
MAX_LABELS = 5 # maximum number of labels in a vertex
|
|
MAX_PROPS = 4 # maximum number of properties in a vertex/edge
|
|
MAX_PROP_VALUE = 1000
|
|
|
|
# some consts used in mutiple files
|
|
LABEL_INDEX = "LabelIndex"
|
|
LABEL_PREFIX = "Label"
|
|
PROP_PREFIX = "Prop"
|
|
ID = "id"
|
|
|
|
|
|
|
|
def labels():
|
|
labels = ":" + LABEL_INDEX
|
|
for _ in range(rint(MAX_LABELS)):
|
|
labels += ":" + LABEL_PREFIX + str(rint(LABEL_COUNT))
|
|
return labels
|
|
|
|
|
|
def properties(id):
|
|
""" Generates a properties string with [0, MAX_PROPS) properties.
|
|
Note that if PropX is generated, then all the PropY where Y < X
|
|
are generated. Thus most labels have Prop0, and least have PropMAX_PROPS.
|
|
"""
|
|
props = {"%s%d" % (PROP_PREFIX, i): rint(MAX_PROP_VALUE)
|
|
for i in range(rint(MAX_PROPS))}
|
|
props[ID] = id
|
|
return "{" + ", ".join("%s: %s" % kv for kv in props.items()) + "}"
|
|
|
|
|
|
def vertex(vertex_index):
|
|
return "(%s %s)" % (labels(), properties(vertex_index))
|
|
|
|
|
|
def main():
|
|
# create an index to speed setup up
|
|
print("CREATE INDEX ON :%s(%s);" % (LABEL_INDEX, ID))
|
|
|
|
# we batch CREATEs because to speed creation up
|
|
BATCH_SIZE = 30
|
|
|
|
# create vertices
|
|
for vertex_index in range(VERTEX_COUNT):
|
|
print("CREATE %s" % vertex(vertex_index))
|
|
if (vertex_index != 0 and vertex_index % BATCH_SIZE == 0) or \
|
|
vertex_index + 1 == VERTEX_COUNT:
|
|
print(";")
|
|
print("MATCH (n) RETURN assert(count(n) = %d);" % VERTEX_COUNT)
|
|
|
|
# create edges
|
|
print("MATCH (a) WITH a MATCH (b) WITH a, b WHERE rand() < %f "
|
|
" CREATE (a)-[:EdgeType]->(b);" % (EDGE_COUNT / VERTEX_COUNT ** 2))
|
|
print("MATCH (n)-[r]->() WITH count(r) AS c "
|
|
"RETURN assert(c >= %d AND c <= %d);" % (
|
|
EDGE_COUNT * 0.98, EDGE_COUNT * 1.02))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|