From be16409da2e28657ad628ba2cf3f7951a7534596 Mon Sep 17 00:00:00 2001
From: florijan <florijan@memgraph.io>
Date: Tue, 5 Sep 2017 13:54:28 +0200
Subject: [PATCH] Harness MATCH tests refactored

Summary:
1. Test setup rewritten to take cca 8 seconds. Note that edges are created by using:
`MATCH (a) WITH a MATCH (b) WITH b WHERE rand() < X CREATE (a)-[:ET]->(b)`
Where `X` is a threshold calculated so the desired edge count is the expectation. This seems the only feasable way of generating a large number of edges since query execution does not depend on edge count, but on vertex count.

2. Using the new `assert` function to verify graph state. I recommend doing that in all the harness tests (I don't think we currently have something better).

3. All tests rewritten to take around 200ms per iteration.

4. Test are using SKIP to avoid sending data to the client, but ensure that appropriate operations get executed. This currently seems like the best way of removing unwanted side-effects.

Harness will cost us our sanity. And it doesn't even provide good quality regression testing we really need :(

Reviewers: buda, mislav.bradac, mferencevic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D752
---
 .../harness/groups/match/config.json          |  2 +-
 .../groups/match/pattern_cycle.run.cypher     |  1 +
 .../harness/groups/match/pattern_cycle.run.py |  1 -
 .../groups/match/pattern_long.run.cypher      |  1 +
 .../harness/groups/match/pattern_long.run.py  |  1 -
 .../groups/match/pattern_short.run.cypher     |  1 +
 .../harness/groups/match/pattern_short.run.py |  1 -
 .../harness/groups/match/pattern_where.run.py |  3 --
 .../harness/groups/match/setup.py             | 46 ++++++++++---------
 .../groups/match/vertex_on_index.run.py       |  5 ++
 .../groups/match/vertex_on_label.run.py       |  7 ++-
 .../match/vertex_on_label_property.run.py     |  8 ++--
 .../groups/match/vertex_on_property.run.py    |  7 +--
 tests/macro_benchmark/harness/harness.py      | 10 +++-
 tools/apollo/generate                         |  2 +-
 15 files changed, 57 insertions(+), 39 deletions(-)
 create mode 100644 tests/macro_benchmark/harness/groups/match/pattern_cycle.run.cypher
 delete mode 100644 tests/macro_benchmark/harness/groups/match/pattern_cycle.run.py
 create mode 100644 tests/macro_benchmark/harness/groups/match/pattern_long.run.cypher
 delete mode 100644 tests/macro_benchmark/harness/groups/match/pattern_long.run.py
 create mode 100644 tests/macro_benchmark/harness/groups/match/pattern_short.run.cypher
 delete mode 100644 tests/macro_benchmark/harness/groups/match/pattern_short.run.py
 delete mode 100644 tests/macro_benchmark/harness/groups/match/pattern_where.run.py
 create mode 100644 tests/macro_benchmark/harness/groups/match/vertex_on_index.run.py

diff --git a/tests/macro_benchmark/harness/groups/match/config.json b/tests/macro_benchmark/harness/groups/match/config.json
index 7f86768bd..fb3b3f6fb 100644
--- a/tests/macro_benchmark/harness/groups/match/config.json
+++ b/tests/macro_benchmark/harness/groups/match/config.json
@@ -1,3 +1,3 @@
 {
-    "iterations": 3
+    "iterations": 10
 }
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.cypher b/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.cypher
new file mode 100644
index 000000000..fa8875c9f
--- /dev/null
+++ b/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.cypher
@@ -0,0 +1 @@
+MATCH (n)-[r1]->(m)-[r2]->(n) RETURN r1 SKIP 100000000
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.py b/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.py
deleted file mode 100644
index 861823945..000000000
--- a/tests/macro_benchmark/harness/groups/match/pattern_cycle.run.py
+++ /dev/null
@@ -1 +0,0 @@
-print("MATCH (n)-[r1]->(m)-[r2]->(n) RETURN *")
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_long.run.cypher b/tests/macro_benchmark/harness/groups/match/pattern_long.run.cypher
new file mode 100644
index 000000000..7125ec5fe
--- /dev/null
+++ b/tests/macro_benchmark/harness/groups/match/pattern_long.run.cypher
@@ -0,0 +1 @@
+MATCH (n1)-[r1]->(n2)-[r2]->(n3)-[r3]->(n4)<-[r4]-(n5) RETURN n5 SKIP 100000 LIMIT 1
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_long.run.py b/tests/macro_benchmark/harness/groups/match/pattern_long.run.py
deleted file mode 100644
index c5a2a3768..000000000
--- a/tests/macro_benchmark/harness/groups/match/pattern_long.run.py
+++ /dev/null
@@ -1 +0,0 @@
-print("MATCH (n1)-[r1]->(n2)<-[r2]-(n3)-[r3]->(n4) RETURN *")
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_short.run.cypher b/tests/macro_benchmark/harness/groups/match/pattern_short.run.cypher
new file mode 100644
index 000000000..4a22980f5
--- /dev/null
+++ b/tests/macro_benchmark/harness/groups/match/pattern_short.run.cypher
@@ -0,0 +1 @@
+UNWIND range(0, 5) AS i MATCH (n)-[r]->(m) RETURN r SKIP 10000000
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_short.run.py b/tests/macro_benchmark/harness/groups/match/pattern_short.run.py
deleted file mode 100644
index aa7fa257d..000000000
--- a/tests/macro_benchmark/harness/groups/match/pattern_short.run.py
+++ /dev/null
@@ -1 +0,0 @@
-print("MATCH (n)-[r]->(m) RETURN *")
diff --git a/tests/macro_benchmark/harness/groups/match/pattern_where.run.py b/tests/macro_benchmark/harness/groups/match/pattern_where.run.py
deleted file mode 100644
index 322a9ec29..000000000
--- a/tests/macro_benchmark/harness/groups/match/pattern_where.run.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from setup import ID, rint, VERTEX_COUNT
-print("MATCH (n)-[r]->(m) WHERE n.%s = %d AND m.%s = %d RETURN *" % (
-    ID, rint(VERTEX_COUNT), ID, rint(VERTEX_COUNT)))
diff --git a/tests/macro_benchmark/harness/groups/match/setup.py b/tests/macro_benchmark/harness/groups/match/setup.py
index e231333cc..0cc0521b0 100644
--- a/tests/macro_benchmark/harness/groups/match/setup.py
+++ b/tests/macro_benchmark/harness/groups/match/setup.py
@@ -8,25 +8,29 @@ from random import randint
 def rint(upper_bound_exclusive):
     return randint(0, upper_bound_exclusive - 1)
 
-VERTEX_COUNT = 10000
-EDGE_COUNT = VERTEX_COUNT * 3
+VERTEX_COUNT = 1500
+EDGE_COUNT = VERTEX_COUNT * 15
 
 # numbers of *different* labels, edge types and properties
 LABEL_COUNT = 10
-EDGE_TYPE_COUNT = 10
 
-MAX_LABELS = 3  # maximum number of labels in a vertex
+MAX_LABELS = 5  # maximum number of labels in a vertex
 MAX_PROPS = 4   # maximum number of properties in a vertex/edge
+MAX_PROP_VALUE = 1000
 
 # some consts used in mutiple files
+LABEL_INDEX = "LabelIndex"
 LABEL_PREFIX = "Label"
-PROP_PREFIX = "Property"
+PROP_PREFIX = "Prop"
 ID = "id"
 
 
+
 def labels():
-    return "".join(":%s%d" % (LABEL_PREFIX, rint(LABEL_COUNT))
-                   for _ in range(randint(1, MAX_LABELS - 1)))
+    labels = ":" + LABEL_INDEX
+    for _ in range(rint(MAX_LABELS)):
+        labels += ":" + LABEL_PREFIX + str(rint(LABEL_COUNT))
+    return labels
 
 
 def properties(id):
@@ -34,22 +38,22 @@ def properties(id):
     Note that if PropX is generated, then all the PropY where Y < X
     are generated. Thus most labels have Prop0, and least have PropMAX_PROPS.
     """
-    return "{%s: %d, %s}" % (ID, id, ",".join(
-        ["%s%d: %d" % (PROP_PREFIX, prop_ind, rint(100))
-         for prop_ind in range(randint(1, MAX_PROPS - 1))]))
+    props = {"%s%d" % (PROP_PREFIX, i): rint(MAX_PROP_VALUE)
+             for i in range(rint(MAX_PROPS))}
+    props[ID] = id
+    return "{" + ", ".join("%s: %s" % kv for kv in props.items()) + "}"
 
 
 def vertex(vertex_index):
     return "(%s %s)" % (labels(), properties(vertex_index))
 
 
-def edge(edge_index):
-    return "[:EdgeType%d %s]" % (rint(EDGE_TYPE_COUNT), properties(edge_index))
-
-
 def main():
+    # create an index to speed setup up
+    print("CREATE INDEX ON :%s(%s);" % (LABEL_INDEX, ID))
+
     # we batch CREATEs because to speed creation up
-    BATCH_SIZE = 50
+    BATCH_SIZE = 30
 
     # create vertices
     for vertex_index in range(VERTEX_COUNT):
@@ -57,14 +61,14 @@ def main():
         if (vertex_index != 0 and vertex_index % BATCH_SIZE == 0) or \
                 vertex_index + 1 == VERTEX_COUNT:
             print(";")
+    print("MATCH (n) RETURN assert(count(n) = %d);" % VERTEX_COUNT)
 
     # create edges
-    for edge_index in range(EDGE_COUNT):
-        print("MATCH (a {%s: %d}), (b {%s: %d}) MERGE (a)-%s->(b)" % (
-            ID, randint(0, VERTEX_COUNT - 1),
-            ID, randint(0, VERTEX_COUNT - 1),
-            edge(edge_index)))
-        print(";")
+    print("MATCH (a) WITH a MATCH (b) WITH a, b WHERE rand() < %f "
+          " CREATE (a)-[:EdgeType]->(b);" % (EDGE_COUNT / VERTEX_COUNT ** 2))
+    print("MATCH (n)-[r]->() WITH count(r) AS c "
+          "RETURN assert(c >= %d AND c <= %d);" % (
+            EDGE_COUNT * 0.98, EDGE_COUNT * 1.02))
 
 
 if __name__ == "__main__":
diff --git a/tests/macro_benchmark/harness/groups/match/vertex_on_index.run.py b/tests/macro_benchmark/harness/groups/match/vertex_on_index.run.py
new file mode 100644
index 000000000..39501e3f0
--- /dev/null
+++ b/tests/macro_benchmark/harness/groups/match/vertex_on_index.run.py
@@ -0,0 +1,5 @@
+from setup import LABEL_INDEX, ID, VERTEX_COUNT, rint
+
+print("UNWIND range(0, 10000) AS i "
+      "MATCH (n:%s {%s: %d}) RETURN n SKIP 1000000" % (
+        LABEL_INDEX, ID, rint(VERTEX_COUNT)))
diff --git a/tests/macro_benchmark/harness/groups/match/vertex_on_label.run.py b/tests/macro_benchmark/harness/groups/match/vertex_on_label.run.py
index f39a24605..d51f7dab3 100644
--- a/tests/macro_benchmark/harness/groups/match/vertex_on_label.run.py
+++ b/tests/macro_benchmark/harness/groups/match/vertex_on_label.run.py
@@ -1,2 +1,5 @@
-from setup import LABEL_COUNT, rint
-print("MATCH (n:Label%d) RETURN n" % rint(LABEL_COUNT))
+from setup import LABEL_COUNT, LABEL_PREFIX
+
+for i in range(LABEL_COUNT):
+    print("UNWIND range(0, 30) AS i MATCH (n:%s%d) "
+          "RETURN n SKIP 1000000;" % (LABEL_PREFIX, i))
diff --git a/tests/macro_benchmark/harness/groups/match/vertex_on_label_property.run.py b/tests/macro_benchmark/harness/groups/match/vertex_on_label_property.run.py
index fb2c61533..13c4b8e19 100644
--- a/tests/macro_benchmark/harness/groups/match/vertex_on_label_property.run.py
+++ b/tests/macro_benchmark/harness/groups/match/vertex_on_label_property.run.py
@@ -1,3 +1,5 @@
-from setup import LABEL_PREFIX, PROP_PREFIX, MAX_PROPS, LABEL_COUNT, rint
-print("MATCH (n:%s%d {%s%d: %d}) RETURN n" % (
-    LABEL_PREFIX, rint(LABEL_COUNT), PROP_PREFIX, rint(MAX_PROPS), rint(10)))
+from setup import LABEL_PREFIX, PROP_PREFIX, MAX_PROPS, MAX_PROP_VALUE, LABEL_COUNT, rint
+
+for i in range(LABEL_COUNT):
+    print("UNWIND range(0, 50) AS i MATCH (n:%s%d {%s%d: %d}) RETURN n SKIP 10000;" % (
+        LABEL_PREFIX, i, PROP_PREFIX, rint(MAX_PROPS), rint(MAX_PROP_VALUE)))
diff --git a/tests/macro_benchmark/harness/groups/match/vertex_on_property.run.py b/tests/macro_benchmark/harness/groups/match/vertex_on_property.run.py
index f996d8575..17d5554c5 100644
--- a/tests/macro_benchmark/harness/groups/match/vertex_on_property.run.py
+++ b/tests/macro_benchmark/harness/groups/match/vertex_on_property.run.py
@@ -1,3 +1,4 @@
-from setup import PROP_PREFIX, MAX_PROPS, rint
-print("MATCH (n {%s%d: %d}) RETURN n" % (
-    PROP_PREFIX, rint(MAX_PROPS), rint(10)))
+from setup import PROP_PREFIX, MAX_PROPS, rint, MAX_PROP_VALUE
+
+print("UNWIND range(0, 50) AS i MATCH (n {%s%d: %d}) RETURN n SKIP 10000" % (
+    PROP_PREFIX, rint(MAX_PROPS), rint(MAX_PROP_VALUE)))
diff --git a/tests/macro_benchmark/harness/harness.py b/tests/macro_benchmark/harness/harness.py
index 1493399bf..7fc456e85 100755
--- a/tests/macro_benchmark/harness/harness.py
+++ b/tests/macro_benchmark/harness/harness.py
@@ -217,8 +217,14 @@ class _QuerySuite:
 
         def execute(config_name, num_client_workers=1):
             queries = scenario.get(config_name)
-            return runner.execute(queries(), num_client_workers) if queries \
-                else None
+            start_time = time.time()
+            if queries:
+                r_val = runner.execute(queries(), num_client_workers)
+            else:
+                r_val = None
+            log.info("\t%s done in %.2f seconds" % (config_name,
+                                                    time.time() - start_time))
+            return r_val
 
         measurements = []
 
diff --git a/tools/apollo/generate b/tools/apollo/generate
index 9e1b5243b..74072b871 100755
--- a/tools/apollo/generate
+++ b/tools/apollo/generate
@@ -193,7 +193,7 @@ binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")
 # macro benchmark tests
 MACRO_BENCHMARK_ARGS = (
     "QuerySuite MemgraphRunner "
-    "--groups aggregation 1000_create unwind_create dense_expand "
+    "--groups aggregation 1000_create unwind_create dense_expand match "
     "--no-strict")
 macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
 harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests",