Rename User->File
Use parser for argument i.o. simple variable in script
This commit is contained in:
parent
f063c1b1ad
commit
58243f4a26
@ -10,8 +10,8 @@
|
||||
# licenses/APL.txt.
|
||||
|
||||
import random
|
||||
|
||||
import helpers
|
||||
import argparse
|
||||
|
||||
# Explaination of datasets:
|
||||
# - empty_only_index: contains index; contains no data
|
||||
@ -22,73 +22,95 @@ import helpers
|
||||
# ┌──────────────┐
|
||||
# │ Permission │
|
||||
# ┌────────────────┐ │ Schema:uuid │ ┌────────────┐
|
||||
# │:IS_FOR_IDENTITY├────┤ Index:name ├───┤:IS_FOR_USER│
|
||||
# │:IS_FOR_IDENTITY├────┤ Index:name ├───┤:IS_FOR_FILE│
|
||||
# └┬───────────────┘ └──────────────┘ └────────────┤
|
||||
# │ │
|
||||
# ┌──────▼──────────────┐ ┌──▼───────────┐
|
||||
# │ Identity │ │ User │
|
||||
# │ Schema:uuid │ │ Schema:uuid │
|
||||
# │ Index:platformId │ │ Index:email │
|
||||
# │ Index:name │ └──────────────┘
|
||||
# └─────────────────────┘
|
||||
# ┌──────▼──────────────┐ ┌──▼────────────────┐
|
||||
# │ Identity │ │ File │
|
||||
# │ Schema:uuid │ │ Schema:uuid │
|
||||
# │ Index:email │ │ Index:name │
|
||||
# └─────────────────────┘ │ Index:platformId │
|
||||
# └───────────────────┘
|
||||
#
|
||||
#
|
||||
# - User: attributes: ["uuid", "name", "platformId"]
|
||||
# - File: attributes: ["uuid", "name", "platformId"]
|
||||
# - Permission: attributes: ["uuid", "name"]
|
||||
# - Identity: attributes: ["uuid", "email"]
|
||||
#
|
||||
# Indexes:
|
||||
# - User: [User(uuid), User(platformId), User(name)]
|
||||
# - File: [File(uuid), File(platformId), File(name)]
|
||||
# - Permission: [Permission(uuid), Permission(name)]
|
||||
# - Identity: [Identity(uuid), Identity(email)]
|
||||
#
|
||||
# Edges:
|
||||
# - (:Permission)-[:IS_FOR_USER]->(:User)
|
||||
# - (:Permission)-[:IS_FOR_FILE]->(:File)
|
||||
# - (:Permission)-[:IS_FOR_IDENTITYR]->(:Identity)
|
||||
#
|
||||
# Distributed specific: uuid is the schema
|
||||
# AccessControl specific: uuid is the schema
|
||||
|
||||
filename = "dataset.cypher"
|
||||
f = open(filename, "x")
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--number_of_identities", type=int, default=10)
|
||||
parser.add_argument("--number_of_files", type=int, default=10)
|
||||
parser.add_argument("--percentage_of_permissions", type=float, default=1.0)
|
||||
parser.add_argument("--filename", default="dataset.cypher")
|
||||
|
||||
f.write("MATCH (n) DETACH DELETE n;\n")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create the indexes
|
||||
f.write("CREATE INDEX ON :User;\n")
|
||||
f.write("CREATE INDEX ON :Permission;\n")
|
||||
f.write("CREATE INDEX ON :Identity;\n")
|
||||
f.write("CREATE INDEX ON :User(platformId);\n")
|
||||
f.write("CREATE INDEX ON :User(name);\n")
|
||||
f.write("CREATE INDEX ON :Permission(name);\n")
|
||||
f.write("CREATE INDEX ON :Identity(email);\n")
|
||||
number_of_identities = args.number_of_identities
|
||||
number_of_files = args.number_of_files
|
||||
percentage_of_permissions = args.percentage_of_permissions
|
||||
filename = args.filename
|
||||
|
||||
# Create extra index: in distributed, this will be the schema
|
||||
f.write("CREATE INDEX ON :User(uuid);\n")
|
||||
f.write("CREATE INDEX ON :Permission(uuid);\n")
|
||||
f.write("CREATE INDEX ON :Identity(uuid);\n")
|
||||
assert number_of_identities > 0
|
||||
assert number_of_files > 0
|
||||
assert percentage_of_permissions > 0.0 and percentage_of_permissions <= 1.0
|
||||
assert filename != ""
|
||||
|
||||
platform_ids = [f"somePlatformId_{id}" for id in range(10)]
|
||||
f = open(filename, "w")
|
||||
|
||||
# This is the number of clusters to change if you want a bigger dataset
|
||||
number_of_clusters = 3000000
|
||||
f.write("MATCH (n) DETACH DELETE n;\n")
|
||||
|
||||
for index in range(1, number_of_clusters + 1):
|
||||
platform_id = platform_ids[random.randint(0, len(platform_ids) - 1)]
|
||||
user_uuid = index
|
||||
platform_uuid = number_of_clusters + index
|
||||
identity_uuid = 2 * number_of_clusters + index
|
||||
# Create the indexes
|
||||
f.write("CREATE INDEX ON :File;\n")
|
||||
f.write("CREATE INDEX ON :Permission;\n")
|
||||
f.write("CREATE INDEX ON :Identity;\n")
|
||||
f.write("CREATE INDEX ON :File(platformId);\n")
|
||||
f.write("CREATE INDEX ON :File(name);\n")
|
||||
f.write("CREATE INDEX ON :Permission(name);\n")
|
||||
f.write("CREATE INDEX ON :Identity(email);\n")
|
||||
|
||||
# Create the nodes
|
||||
f.write(f'CREATE (:User {{uuid: {user_uuid}, platformId: "{platform_id}", name: "name_user_{user_uuid}"}});\n')
|
||||
f.write(f'CREATE (:Permission {{uuid: {platform_uuid}, name: "name_permission_{platform_uuid}"}});\n')
|
||||
f.write(f'CREATE (:Permission {{uuid: {identity_uuid}, name: "mail_{identity_uuid}@something.com"}});\n')
|
||||
# Create extra index: in distributed, this will be the schema
|
||||
f.write("CREATE INDEX ON :File(uuid);\n")
|
||||
f.write("CREATE INDEX ON :Permission(uuid);\n")
|
||||
f.write("CREATE INDEX ON :Identity(uuid);\n")
|
||||
|
||||
# Create the edges
|
||||
f.write(
|
||||
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (user:User {{uuid: {user_uuid}}}) CREATE (permission)-[e: IS_FOR_USER]->(user);\n"
|
||||
)
|
||||
f.write(
|
||||
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (identity:Identity {{uuid: {identity_uuid}}}) CREATE (permission)-[e: IS_FOR_IDENTITY]->(identity);\n"
|
||||
)
|
||||
uuid = 1
|
||||
|
||||
f.close()
|
||||
files = []
|
||||
# Create the nodes File
|
||||
for index in range(0, number_of_files):
|
||||
f.write(f'CREATE (:File {{uuid: {uuid}, platformId: platform_id, name: "name_file_{uuid}"}});\n')
|
||||
uuid += 1
|
||||
|
||||
identities = []
|
||||
# Create the nodes Identity
|
||||
for index in range(0, number_of_identities):
|
||||
f.write(f'CREATE (:Identity {{uuid: {uuid}, name: "mail_{uuid}@something.com"}});\n')
|
||||
uuid += 1
|
||||
|
||||
for outer_index in range(0, number_of_files):
|
||||
for inner_index in range(0, number_of_identities):
|
||||
file_uuid = outer_index
|
||||
identity_uuid = number_of_files + inner_index
|
||||
|
||||
if random.random() <= percentage_of_permissions:
|
||||
f.write(f'CREATE (:Permission {{uuid: {uuid}, name: "name_permission_{uuid}"}});\n')
|
||||
f.write(
|
||||
f"MATCH (permission:Permission {{uuid: {uuid}}}), (file:File {{uuid: {file_uuid}}}) CREATE (permission)-[e: IS_FOR_FILE]->(file);\n"
|
||||
)
|
||||
f.write(
|
||||
f"MATCH (permission:Permission {{uuid: {uuid}}}), (identity:Identity {{uuid: {identity_uuid}}}) CREATE (permission)-[e: IS_FOR_IDENTITY]->(identity);\n"
|
||||
)
|
||||
uuid += 1
|
||||
|
||||
f.close()
|
||||
|
@ -282,7 +282,7 @@ class Pokec(Dataset):
|
||||
return ("MATCH (n {id: $id}) RETURN n", {"id": self._get_random_vertex()})
|
||||
|
||||
|
||||
class Distributed(Dataset):
|
||||
class AccessControl(Dataset):
|
||||
|
||||
# Explaination of datasets:
|
||||
# - empty_only_index: contains index; contains no data
|
||||
@ -290,21 +290,21 @@ class Distributed(Dataset):
|
||||
#
|
||||
# See dataset_creator.py to understand the datamodel and generate a dataset
|
||||
|
||||
NAME = "distributed"
|
||||
NAME = "accesscontrol"
|
||||
VARIANTS = ["empty_only_index", "small", "medium", "large"]
|
||||
DEFAULT_VARIANT = "empty_only_index"
|
||||
URLS = {
|
||||
"empty_only_index": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_empty_only_index.setup.cypher.gz",
|
||||
"small": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_small.setup.cypher.gz",
|
||||
"medium": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_medium.setup.cypher.gz",
|
||||
"large": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_large.setup.cypher.gz",
|
||||
"empty_only_index": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_empty_only_index.setup.cypher.gz",
|
||||
"small": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_small.setup.cypher.gz",
|
||||
"medium": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_medium.setup.cypher.gz",
|
||||
"large": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/accesscontrol_large.setup.cypher.gz",
|
||||
}
|
||||
SIZES = {
|
||||
"empty_only_index": {
|
||||
"vertices": 0,
|
||||
"edges": -1, # not used
|
||||
"uuid_ranges": {
|
||||
"User": {"first_uuid": 0, "last_uuid": 0},
|
||||
"File": {"first_uuid": 0, "last_uuid": 0},
|
||||
"Permission": {"first_uuid": 0, "last_uuid": 0},
|
||||
"Identity": {"first_uuid": 0, "last_uuid": 0},
|
||||
},
|
||||
@ -313,7 +313,7 @@ class Distributed(Dataset):
|
||||
"vertices": 30,
|
||||
"edges": -1, # not used
|
||||
"uuid_ranges": {
|
||||
"User": {"first_uuid": 1, "last_uuid": 10},
|
||||
"File": {"first_uuid": 1, "last_uuid": 10},
|
||||
"Permission": {"first_uuid": 11, "last_uuid": 20},
|
||||
"Identity": {"first_uuid": 21, "last_uuid": 30},
|
||||
},
|
||||
@ -322,7 +322,7 @@ class Distributed(Dataset):
|
||||
"vertices": 30000,
|
||||
"edges": -1, # not used
|
||||
"uuid_ranges": {
|
||||
"User": {"first_uuid": 1, "last_uuid": 10000},
|
||||
"File": {"first_uuid": 1, "last_uuid": 10000},
|
||||
"Permission": {"first_uuid": 10001, "last_uuid": 20000},
|
||||
"Identity": {"first_uuid": 10001, "last_uuid": 30000},
|
||||
},
|
||||
@ -331,7 +331,7 @@ class Distributed(Dataset):
|
||||
"vertices": 3000000,
|
||||
"edges": -1, # not used
|
||||
"uuid_ranges": {
|
||||
"User": {"first_uuid": 1, "last_uuid": 1000000},
|
||||
"File": {"first_uuid": 1, "last_uuid": 1000000},
|
||||
"Permission": {"first_uuid": 100001, "last_uuid": 2000000},
|
||||
"Identity": {"first_uuid": 1000001, "last_uuid": 3000000},
|
||||
},
|
||||
@ -339,7 +339,7 @@ class Distributed(Dataset):
|
||||
}
|
||||
|
||||
def _get_random_uuid(self, type):
|
||||
assert type in ["User", "Permission", "Identity"]
|
||||
assert type in ["File", "Permission", "Identity"]
|
||||
|
||||
first_uuid = Dataset.get_size(self)["uuid_ranges"][type]["first_uuid"]
|
||||
last_uuid = Dataset.get_size(self)["uuid_ranges"][type]["last_uuid"]
|
||||
@ -353,17 +353,17 @@ class Distributed(Dataset):
|
||||
|
||||
def benchmark__create__vertex(self):
|
||||
self.next_value_idx += 1
|
||||
query = (f"CREATE (:User {{uuid: {self.next_value_idx}}});", {})
|
||||
query = (f"CREATE (:File {{uuid: {self.next_value_idx}}});", {})
|
||||
return query
|
||||
|
||||
def benchmark__create__edges(self):
|
||||
permission_uuid = self._get_random_uuid("Permission")
|
||||
user_uuid = self._get_random_uuid("User")
|
||||
file_uuid = self._get_random_uuid("File")
|
||||
|
||||
query = (
|
||||
"MATCH (permission:Permission {uuid: $permission_uuid}), (user:User {uuid: $user_uuid}) "
|
||||
"CREATE (permission)-[:IS_FOR_USER]->(user)",
|
||||
{"permission_uuid": permission_uuid, "user_uuid": user_uuid},
|
||||
"MATCH (permission:Permission {uuid: $permission_uuid}), (file:File {uuid: $file_uuid}) "
|
||||
"CREATE (permission)-[:IS_FOR_FILE]->(file)",
|
||||
{"permission_uuid": permission_uuid, "file_uuid": file_uuid},
|
||||
)
|
||||
|
||||
return query
|
||||
@ -375,10 +375,10 @@ class Distributed(Dataset):
|
||||
|
||||
def benchmark__match__match_on_labelled_vertices(self):
|
||||
self.next_value_idx += 1
|
||||
query = ("MATCH (n:User) RETURN *", {})
|
||||
query = ("MATCH (n:File) RETURN *", {})
|
||||
return query
|
||||
|
||||
def benchmark__match__match_all_verteices_with_edges(self):
|
||||
self.next_value_idx += 1
|
||||
query = ("MATCH (permission:Permission)-[e:IS_FOR_USER]->(user:User) RETURN *", {})
|
||||
query = ("MATCH (permission:Permission)-[e:IS_FOR_FILE]->(file:File) RETURN *", {})
|
||||
return query
|
||||
|
Loading…
Reference in New Issue
Block a user