Rename User->File

Use parser for argument i.o. simple variable in script
This commit is contained in:
jeremy 2022-10-18 15:47:13 +02:00
parent f063c1b1ad
commit 58243f4a26
2 changed files with 88 additions and 66 deletions

View File

@ -10,8 +10,8 @@
# licenses/APL.txt. # licenses/APL.txt.
import random import random
import helpers import helpers
import argparse
# Explaination of datasets: # Explaination of datasets:
# - empty_only_index: contains index; contains no data # - empty_only_index: contains index; contains no data
@ -22,73 +22,95 @@ import helpers
# ┌──────────────┐ # ┌──────────────┐
# │ Permission │ # │ Permission │
# ┌────────────────┐ │ Schema:uuid │ ┌────────────┐ # ┌────────────────┐ │ Schema:uuid │ ┌────────────┐
# │:IS_FOR_IDENTITY├────┤ Index:name ├───┤:IS_FOR_USER # │:IS_FOR_IDENTITY├────┤ Index:name ├───┤:IS_FOR_FILE
# └┬───────────────┘ └──────────────┘ └────────────┤ # └┬───────────────┘ └──────────────┘ └────────────┤
# │ │ # │ │
# ┌──────▼──────────────┐ ┌──▼─────────── # ┌──────▼──────────────┐ ┌──▼────────────────
# │ Identity │ │ User # │ Identity │ │ File
# │ Schema:uuid │ │ Schema:uuid # │ Schema:uuid │ │ Schema:uuid
# │ Index:platformId │ │ Index:email # │ Index:email │ │ Index:name
# │ Index:name │ └──────────────┘ # └─────────────────────┘ │ Index:platformId │
# ─────────────────────┘ # └───────────────────┘
# #
# # - File: attributes: ["uuid", "name", "platformId"]
# - User: attributes: ["uuid", "name", "platformId"]
# - Permission: attributes: ["uuid", "name"] # - Permission: attributes: ["uuid", "name"]
# - Identity: attributes: ["uuid", "email"] # - Identity: attributes: ["uuid", "email"]
# #
# Indexes: # Indexes:
# - User: [User(uuid), User(platformId), User(name)] # - File: [File(uuid), File(platformId), File(name)]
# - Permission: [Permission(uuid), Permission(name)] # - Permission: [Permission(uuid), Permission(name)]
# - Identity: [Identity(uuid), Identity(email)] # - Identity: [Identity(uuid), Identity(email)]
# #
# Edges: # Edges:
# - (:Permission)-[:IS_FOR_USER]->(:User) # - (:Permission)-[:IS_FOR_FILE]->(:File)
# - (:Permission)-[:IS_FOR_IDENTITYR]->(:Identity) # - (:Permission)-[:IS_FOR_IDENTITYR]->(:Identity)
# #
# Distributed specific: uuid is the schema # AccessControl specific: uuid is the schema
filename = "dataset.cypher" if __name__ == "__main__":
f = open(filename, "x") parser = argparse.ArgumentParser()
parser.add_argument("--number_of_identities", type=int, default=10)
parser.add_argument("--number_of_files", type=int, default=10)
parser.add_argument("--percentage_of_permissions", type=float, default=1.0)
parser.add_argument("--filename", default="dataset.cypher")
f.write("MATCH (n) DETACH DELETE n;\n") args = parser.parse_args()
# Create the indexes number_of_identities = args.number_of_identities
f.write("CREATE INDEX ON :User;\n") number_of_files = args.number_of_files
f.write("CREATE INDEX ON :Permission;\n") percentage_of_permissions = args.percentage_of_permissions
f.write("CREATE INDEX ON :Identity;\n") filename = args.filename
f.write("CREATE INDEX ON :User(platformId);\n")
f.write("CREATE INDEX ON :User(name);\n")
f.write("CREATE INDEX ON :Permission(name);\n")
f.write("CREATE INDEX ON :Identity(email);\n")
# Create extra index: in distributed, this will be the schema assert number_of_identities > 0
f.write("CREATE INDEX ON :User(uuid);\n") assert number_of_files > 0
f.write("CREATE INDEX ON :Permission(uuid);\n") assert percentage_of_permissions > 0.0 and percentage_of_permissions <= 1.0
f.write("CREATE INDEX ON :Identity(uuid);\n") assert filename != ""
platform_ids = [f"somePlatformId_{id}" for id in range(10)] f = open(filename, "w")
# This is the number of clusters to change if you want a bigger dataset f.write("MATCH (n) DETACH DELETE n;\n")
number_of_clusters = 3000000
for index in range(1, number_of_clusters + 1): # Create the indexes
platform_id = platform_ids[random.randint(0, len(platform_ids) - 1)] f.write("CREATE INDEX ON :File;\n")
user_uuid = index f.write("CREATE INDEX ON :Permission;\n")
platform_uuid = number_of_clusters + index f.write("CREATE INDEX ON :Identity;\n")
identity_uuid = 2 * number_of_clusters + index f.write("CREATE INDEX ON :File(platformId);\n")
f.write("CREATE INDEX ON :File(name);\n")
f.write("CREATE INDEX ON :Permission(name);\n")
f.write("CREATE INDEX ON :Identity(email);\n")
# Create the nodes # Create extra index: in distributed, this will be the schema
f.write(f'CREATE (:User {{uuid: {user_uuid}, platformId: "{platform_id}", name: "name_user_{user_uuid}"}});\n') f.write("CREATE INDEX ON :File(uuid);\n")
f.write(f'CREATE (:Permission {{uuid: {platform_uuid}, name: "name_permission_{platform_uuid}"}});\n') f.write("CREATE INDEX ON :Permission(uuid);\n")
f.write(f'CREATE (:Permission {{uuid: {identity_uuid}, name: "mail_{identity_uuid}@something.com"}});\n') f.write("CREATE INDEX ON :Identity(uuid);\n")
# Create the edges uuid = 1
f.write(
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (user:User {{uuid: {user_uuid}}}) CREATE (permission)-[e: IS_FOR_USER]->(user);\n"
)
f.write(
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (identity:Identity {{uuid: {identity_uuid}}}) CREATE (permission)-[e: IS_FOR_IDENTITY]->(identity);\n"
)
f.close() files = []
# Create the nodes File
for index in range(0, number_of_files):
f.write(f'CREATE (:File {{uuid: {uuid}, platformId: platform_id, name: "name_file_{uuid}"}});\n')
uuid += 1
identities = []
# Create the nodes Identity
for index in range(0, number_of_identities):
f.write(f'CREATE (:Identity {{uuid: {uuid}, name: "mail_{uuid}@something.com"}});\n')
uuid += 1
for outer_index in range(0, number_of_files):
for inner_index in range(0, number_of_identities):
file_uuid = outer_index
identity_uuid = number_of_files + inner_index
if random.random() <= percentage_of_permissions:
f.write(f'CREATE (:Permission {{uuid: {uuid}, name: "name_permission_{uuid}"}});\n')
f.write(
f"MATCH (permission:Permission {{uuid: {uuid}}}), (file:File {{uuid: {file_uuid}}}) CREATE (permission)-[e: IS_FOR_FILE]->(file);\n"
)
f.write(
f"MATCH (permission:Permission {{uuid: {uuid}}}), (identity:Identity {{uuid: {identity_uuid}}}) CREATE (permission)-[e: IS_FOR_IDENTITY]->(identity);\n"
)
uuid += 1
f.close()

View File

@ -282,7 +282,7 @@ class Pokec(Dataset):
return ("MATCH (n {id: $id}) RETURN n", {"id": self._get_random_vertex()}) return ("MATCH (n {id: $id}) RETURN n", {"id": self._get_random_vertex()})
class Distributed(Dataset): class AccessControl(Dataset):
# Explaination of datasets: # Explaination of datasets:
# - empty_only_index: contains index; contains no data # - empty_only_index: contains index; contains no data
@ -290,21 +290,21 @@ class Distributed(Dataset):
# #
# See dataset_creator.py to understand the datamodel and generate a dataset # See dataset_creator.py to understand the datamodel and generate a dataset
NAME = "distributed" NAME = "accesscontrol"
VARIANTS = ["empty_only_index", "small", "medium", "large"] VARIANTS = ["empty_only_index", "small", "medium", "large"]
DEFAULT_VARIANT = "empty_only_index" DEFAULT_VARIANT = "empty_only_index"
URLS = { URLS = {
"empty_only_index": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_empty_only_index.setup.cypher.gz", "empty_only_index": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_empty_only_index.setup.cypher.gz",
"small": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_small.setup.cypher.gz", "small": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_small.setup.cypher.gz",
"medium": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_medium.setup.cypher.gz", "medium": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_medium.setup.cypher.gz",
"large": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_large.setup.cypher.gz", "large": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_large.setup.cypher.gz",
} }
SIZES = { SIZES = {
"empty_only_index": { "empty_only_index": {
"vertices": 0, "vertices": 0,
"edges": -1, # not used "edges": -1, # not used
"uuid_ranges": { "uuid_ranges": {
"User": {"first_uuid": 0, "last_uuid": 0}, "File": {"first_uuid": 0, "last_uuid": 0},
"Permission": {"first_uuid": 0, "last_uuid": 0}, "Permission": {"first_uuid": 0, "last_uuid": 0},
"Identity": {"first_uuid": 0, "last_uuid": 0}, "Identity": {"first_uuid": 0, "last_uuid": 0},
}, },
@ -313,7 +313,7 @@ class Distributed(Dataset):
"vertices": 30, "vertices": 30,
"edges": -1, # not used "edges": -1, # not used
"uuid_ranges": { "uuid_ranges": {
"User": {"first_uuid": 1, "last_uuid": 10}, "File": {"first_uuid": 1, "last_uuid": 10},
"Permission": {"first_uuid": 11, "last_uuid": 20}, "Permission": {"first_uuid": 11, "last_uuid": 20},
"Identity": {"first_uuid": 21, "last_uuid": 30}, "Identity": {"first_uuid": 21, "last_uuid": 30},
}, },
@ -322,7 +322,7 @@ class Distributed(Dataset):
"vertices": 30000, "vertices": 30000,
"edges": -1, # not used "edges": -1, # not used
"uuid_ranges": { "uuid_ranges": {
"User": {"first_uuid": 1, "last_uuid": 10000}, "File": {"first_uuid": 1, "last_uuid": 10000},
"Permission": {"first_uuid": 10001, "last_uuid": 20000}, "Permission": {"first_uuid": 10001, "last_uuid": 20000},
"Identity": {"first_uuid": 10001, "last_uuid": 30000}, "Identity": {"first_uuid": 10001, "last_uuid": 30000},
}, },
@ -331,7 +331,7 @@ class Distributed(Dataset):
"vertices": 3000000, "vertices": 3000000,
"edges": -1, # not used "edges": -1, # not used
"uuid_ranges": { "uuid_ranges": {
"User": {"first_uuid": 1, "last_uuid": 1000000}, "File": {"first_uuid": 1, "last_uuid": 1000000},
"Permission": {"first_uuid": 100001, "last_uuid": 2000000}, "Permission": {"first_uuid": 100001, "last_uuid": 2000000},
"Identity": {"first_uuid": 1000001, "last_uuid": 3000000}, "Identity": {"first_uuid": 1000001, "last_uuid": 3000000},
}, },
@ -339,7 +339,7 @@ class Distributed(Dataset):
} }
def _get_random_uuid(self, type): def _get_random_uuid(self, type):
assert type in ["User", "Permission", "Identity"] assert type in ["File", "Permission", "Identity"]
first_uuid = Dataset.get_size(self)["uuid_ranges"][type]["first_uuid"] first_uuid = Dataset.get_size(self)["uuid_ranges"][type]["first_uuid"]
last_uuid = Dataset.get_size(self)["uuid_ranges"][type]["last_uuid"] last_uuid = Dataset.get_size(self)["uuid_ranges"][type]["last_uuid"]
@ -353,17 +353,17 @@ class Distributed(Dataset):
def benchmark__create__vertex(self): def benchmark__create__vertex(self):
self.next_value_idx += 1 self.next_value_idx += 1
query = (f"CREATE (:User {{uuid: {self.next_value_idx}}});", {}) query = (f"CREATE (:File {{uuid: {self.next_value_idx}}});", {})
return query return query
def benchmark__create__edges(self): def benchmark__create__edges(self):
permission_uuid = self._get_random_uuid("Permission") permission_uuid = self._get_random_uuid("Permission")
user_uuid = self._get_random_uuid("User") file_uuid = self._get_random_uuid("File")
query = ( query = (
"MATCH (permission:Permission {uuid: $permission_uuid}), (user:User {uuid: $user_uuid}) " "MATCH (permission:Permission {uuid: $permission_uuid}), (file:File {uuid: $file_uuid}) "
"CREATE (permission)-[:IS_FOR_USER]->(user)", "CREATE (permission)-[:IS_FOR_FILE]->(file)",
{"permission_uuid": permission_uuid, "user_uuid": user_uuid}, {"permission_uuid": permission_uuid, "file_uuid": file_uuid},
) )
return query return query
@ -375,10 +375,10 @@ class Distributed(Dataset):
def benchmark__match__match_on_labelled_vertices(self): def benchmark__match__match_on_labelled_vertices(self):
self.next_value_idx += 1 self.next_value_idx += 1
query = ("MATCH (n:User) RETURN *", {}) query = ("MATCH (n:File) RETURN *", {})
return query return query
def benchmark__match__match_all_verteices_with_edges(self): def benchmark__match__match_all_verteices_with_edges(self):
self.next_value_idx += 1 self.next_value_idx += 1
query = ("MATCH (permission:Permission)-[e:IS_FOR_USER]->(user:User) RETURN *", {}) query = ("MATCH (permission:Permission)-[e:IS_FOR_FILE]->(file:File) RETURN *", {})
return query return query