From 7be66f0c540ed0d926e0751736e2d6ff90e8d916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= <benjamin.antal@memgraph.io>
Date: Wed, 1 Feb 2023 14:24:04 +0100
Subject: [PATCH] Add unwind based dataset creator

---
 tests/mgbench/dataset_creator_unwind.py       | 139 ++++++++++++++++++
 .../accesscontrol_large.shard_configuration   |   6 +-
 .../accesscontrol_medium.shard_configuration  |   6 +-
 .../accesscontrol_small.shard_configuration   |   6 +-
 4 files changed, 154 insertions(+), 3 deletions(-)
 create mode 100644 tests/mgbench/dataset_creator_unwind.py

diff --git a/tests/mgbench/dataset_creator_unwind.py b/tests/mgbench/dataset_creator_unwind.py
new file mode 100644
index 000000000..c9f9a12df
--- /dev/null
+++ b/tests/mgbench/dataset_creator_unwind.py
@@ -0,0 +1,139 @@
+# Copyright 2022 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import argparse
+import random
+
+import helpers
+
+# Explaination of datasets:
+#   - empty_only_index: contains index; contains no data
+#   - small: contains index; contains data (small dataset)
+#
+# Datamodel is as follow:
+#
+#                               ┌──────────────┐
+#                               │ Permission   │
+#         ┌────────────────┐    │  Schema:uuid │   ┌────────────┐
+#         │:IS_FOR_IDENTITY├────┤  Index:name  ├───┤:IS_FOR_FILE│
+#         └┬───────────────┘    └──────────────┘   └────────────┤
+#          │                                                    │
+#   ┌──────▼──────────────┐                                  ┌──▼────────────────┐
+#   │  Identity           │                                  │ File              │
+#   │   Schema:uuid       │                                  │  Schema:uuid      │
+#   │   Index:email       │                                  │  Index:name       │
+#   └─────────────────────┘                                  │  Index:platformId │
+#                                                            └───────────────────┘
+#
+#   - File: attributes: ["uuid", "name", "platformId"]
+#   - Permission: attributes: ["uuid", "name"]
+#   - Identity: attributes: ["uuid", "email"]
+#
+# Indexes:
+#   - File: [File(uuid), File(platformId), File(name)]
+#   - Permission: [Permission(uuid), Permission(name)]
+#   - Identity: [Identity(uuid), Identity(email)]
+#
+# Edges:
+#   - (:Permission)-[:IS_FOR_FILE]->(:File)
+#   - (:Permission)-[:IS_FOR_IDENTITYR]->(:Identity)
+#
+# AccessControl specific: uuid is the schema
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--number_of_identities", type=int, default=10)
+    parser.add_argument("--number_of_files", type=int, default=10)
+    parser.add_argument("--percentage_of_permissions", type=float, default=1.0)
+    parser.add_argument("--filename", default="dataset.cypher")
+
+    args = parser.parse_args()
+
+    number_of_identities = args.number_of_identities
+    number_of_files = args.number_of_files
+    percentage_of_permissions = args.percentage_of_permissions
+    filename = args.filename
+
+    assert number_of_identities >= 0
+    assert number_of_files >= 0
+    assert percentage_of_permissions > 0.0 and percentage_of_permissions <= 1.0
+    assert filename != ""
+
+    with open(filename, "w") as f:
+        f.write("MATCH (n) DETACH DELETE n;\n")
+
+        # Create the indexes
+        f.write("CREATE INDEX ON :File;\n")
+        f.write("CREATE INDEX ON :Permission;\n")
+        f.write("CREATE INDEX ON :Identity;\n")
+        f.write("CREATE INDEX ON :File(platformId);\n")
+        f.write("CREATE INDEX ON :File(name);\n")
+        f.write("CREATE INDEX ON :Permission(name);\n")
+        f.write("CREATE INDEX ON :Identity(email);\n")
+
+        # Create extra index: in distributed, this will be the schema
+        f.write("CREATE INDEX ON :File(uuid);\n")
+        f.write("CREATE INDEX ON :Permission(uuid);\n")
+        f.write("CREATE INDEX ON :Identity(uuid);\n")
+
+        uuid = 1
+
+        # Create the nodes File
+        f.write("UNWIND [")
+        for index in range(0, number_of_files):
+            if index != 0:
+                f.write(",")
+            f.write(f'\n  {{uuid: {uuid}, platformId: "platform_id", name: "name_file_{uuid}"}}')
+            uuid += 1
+        f.write("\n] AS props CREATE (:File {uuid: props.uuid, platformId: props.platformId, name: props.name});\n")
+
+        identities = []
+        f.write("UNWIND [")
+        # Create the nodes Identity
+        for index in range(0, number_of_identities):
+            if index != 0:
+                f.write(",")
+            f.write(f'\n  {{uuid: {uuid}, name: "mail_{uuid}@something.com"}}')
+            uuid += 1
+        f.write("\n] AS props CREATE (:Identity {uuid: props.uuid, name: props.name});\n")
+
+        f.write("UNWIND [")
+        wrote_anything = False
+        for outer_index in range(0, number_of_files):
+            for inner_index in range(0, number_of_identities):
+
+                file_uuid = outer_index + 1
+                identity_uuid = number_of_files + inner_index + 1
+
+                if random.random() <= percentage_of_permissions:
+
+                    if wrote_anything:
+                        f.write(",")
+
+                    f.write(
+                        f'\n  {{permUuid: {uuid}, permName: "name_permission_{uuid}", fileUuid: {file_uuid}, identityUuid: {identity_uuid}}}'
+                    )
+                    wrote_anything = True
+                    uuid += 1
+        f.write(
+            """
+\n] AS props
+MATCH (file:File {uuid:props.fileUuid}), (identity:Identity {uuid: props.identityUuid})
+CREATE (permission:Permission {uuid: props.permUuid, name: props.permName})
+CREATE (permission)-[: IS_FOR_FILE]->(file)
+CREATE (permission)-[: IS_FOR_IDENTITY]->(identity);
+"""
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/mgbench/splitfiles/accesscontrol_large.shard_configuration b/tests/mgbench/splitfiles/accesscontrol_large.shard_configuration
index 34dca66be..d2138ec93 100644
--- a/tests/mgbench/splitfiles/accesscontrol_large.shard_configuration
+++ b/tests/mgbench/splitfiles/accesscontrol_large.shard_configuration
@@ -1,8 +1,12 @@
-4
+8
 uuid
 email
 name
 platformId
+permUuid
+permName
+fileUuid
+identityUuid
 2
 IS_FOR_IDENTITY
 IS_FOR_FILE
diff --git a/tests/mgbench/splitfiles/accesscontrol_medium.shard_configuration b/tests/mgbench/splitfiles/accesscontrol_medium.shard_configuration
index a807e783f..f05ee8993 100644
--- a/tests/mgbench/splitfiles/accesscontrol_medium.shard_configuration
+++ b/tests/mgbench/splitfiles/accesscontrol_medium.shard_configuration
@@ -1,8 +1,12 @@
-4
+8
 uuid
 email
 name
 platformId
+permUuid
+permName
+fileUuid
+identityUuid
 2
 IS_FOR_IDENTITY
 IS_FOR_FILE
diff --git a/tests/mgbench/splitfiles/accesscontrol_small.shard_configuration b/tests/mgbench/splitfiles/accesscontrol_small.shard_configuration
index 9c11b6258..2cce1ccef 100644
--- a/tests/mgbench/splitfiles/accesscontrol_small.shard_configuration
+++ b/tests/mgbench/splitfiles/accesscontrol_small.shard_configuration
@@ -1,8 +1,12 @@
-4
+8
 uuid
 email
 name
 platformId
+permUuid
+permName
+fileUuid
+identityUuid
 2
 IS_FOR_IDENTITY
 IS_FOR_FILE