Add new dataset for mgbench
This commit is contained in:
parent
4cb3b064c4
commit
a5dc818e19
94
tests/mgbench/dataset_creator.py
Normal file
94
tests/mgbench/dataset_creator.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# Copyright 2021 Memgraph Ltd.
|
||||||
|
#
|
||||||
|
# Use of this software is governed by the Business Source License
|
||||||
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
|
# License, and you may not use this file except in compliance with the Business Source License.
|
||||||
|
#
|
||||||
|
# As of the Change Date specified in that file, in accordance with
|
||||||
|
# the Business Source License, use of this software will be governed
|
||||||
|
# by the Apache License, Version 2.0, included in the file
|
||||||
|
# licenses/APL.txt.
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
import helpers
|
||||||
|
|
||||||
|
# Explaination of datasets:
|
||||||
|
# - empty_only_index: contains index; contains no data
|
||||||
|
# - small: contains index; contains data (small dataset)
|
||||||
|
#
|
||||||
|
# Datamodel is as follow:
|
||||||
|
#
|
||||||
|
# ┌──────────────┐
|
||||||
|
# │ Permission │
|
||||||
|
# ┌────────────────┐ │ Schema:uuid │ ┌────────────┐
|
||||||
|
# │:IS_FOR_IDENTITY├────┤ Index:name ├───┤:IS_FOR_USER│
|
||||||
|
# └┬───────────────┘ └──────────────┘ └────────────┤
|
||||||
|
# │ │
|
||||||
|
# ┌──────▼──────────────┐ ┌──▼───────────┐
|
||||||
|
# │ Identity │ │ User │
|
||||||
|
# │ Schema:uuid │ │ Schema:uuid │
|
||||||
|
# │ Index:platformId │ │ Index:email │
|
||||||
|
# │ Index:name │ └──────────────┘
|
||||||
|
# └─────────────────────┘
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# - User: attributes: ["uuid", "name", "platformId"]
|
||||||
|
# - Permission: attributes: ["uuid", "name"]
|
||||||
|
# - Identity: attributes: ["uuid", "email"]
|
||||||
|
#
|
||||||
|
# Indexes:
|
||||||
|
# - User: [User(uuid), User(platformId), User(name)]
|
||||||
|
# - Permission: [Permission(uuid), Permission(name)]
|
||||||
|
# - Identity: [Identity(uuid), Identity(email)]
|
||||||
|
#
|
||||||
|
# Edges:
|
||||||
|
# - (:Permission)-[:IS_FOR_USER]->(:User)
|
||||||
|
# - (:Permission)-[:IS_FOR_IDENTITYR]->(:Identity)
|
||||||
|
#
|
||||||
|
# Distributed specific: uuid is the schema
|
||||||
|
|
||||||
|
filename = "dataset.cypher"
|
||||||
|
f = open(filename, "x")
|
||||||
|
|
||||||
|
f.write("MATCH (n) DETACH DELETE n;\n")
|
||||||
|
|
||||||
|
# Create the indexes
|
||||||
|
f.write("CREATE INDEX ON :User;\n")
|
||||||
|
f.write("CREATE INDEX ON :Permission;\n")
|
||||||
|
f.write("CREATE INDEX ON :Identity;\n")
|
||||||
|
f.write("CREATE INDEX ON :User(platformId);\n")
|
||||||
|
f.write("CREATE INDEX ON :User(name);\n")
|
||||||
|
f.write("CREATE INDEX ON :Permission(name);\n")
|
||||||
|
f.write("CREATE INDEX ON :Identity(email);\n")
|
||||||
|
|
||||||
|
# Create extra index: in distributed, this will be the schema
|
||||||
|
f.write("CREATE INDEX ON :User(uuid);\n")
|
||||||
|
f.write("CREATE INDEX ON :Permission(uuid);\n")
|
||||||
|
f.write("CREATE INDEX ON :Identity(uuid);\n")
|
||||||
|
|
||||||
|
platform_ids = [f"somePlatformId_{id}" for id in range(10)]
|
||||||
|
|
||||||
|
# This is the number of clusters to change if you want a bigger dataset
|
||||||
|
number_of_clusters = 3000000
|
||||||
|
|
||||||
|
for index in range(1, number_of_clusters + 1):
|
||||||
|
platform_id = platform_ids[random.randint(0, len(platform_ids) - 1)]
|
||||||
|
user_uuid = index
|
||||||
|
platform_uuid = number_of_clusters + index
|
||||||
|
identity_uuid = 2 * number_of_clusters + index
|
||||||
|
|
||||||
|
# Create the nodes
|
||||||
|
f.write(f'CREATE (:User {{uuid: {user_uuid}, platformId: "{platform_id}", name: "name_user_{user_uuid}"}});\n')
|
||||||
|
f.write(f'CREATE (:Permission {{uuid: {platform_uuid}, name: "name_permission_{platform_uuid}"}});\n')
|
||||||
|
f.write(f'CREATE (:Permission {{uuid: {identity_uuid}, name: "mail_{identity_uuid}@something.com"}});\n')
|
||||||
|
|
||||||
|
# Create the edges
|
||||||
|
f.write(
|
||||||
|
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (user:User {{uuid: {user_uuid}}}) CREATE (permission)-[e: IS_FOR_USER]->(user);\n"
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f"MATCH (permission:Permission {{uuid: {platform_uuid}}}), (identity:Identity {{uuid: {identity_uuid}}}) CREATE (permission)-[e: IS_FOR_IDENTITY]->(identity);\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
f.close()
|
@ -45,13 +45,10 @@ class Dataset:
|
|||||||
variant = self.DEFAULT_VARIANT
|
variant = self.DEFAULT_VARIANT
|
||||||
if variant not in self.VARIANTS:
|
if variant not in self.VARIANTS:
|
||||||
raise ValueError("Invalid test variant!")
|
raise ValueError("Invalid test variant!")
|
||||||
if (self.FILES and variant not in self.FILES) and \
|
if (self.FILES and variant not in self.FILES) and (self.URLS and variant not in self.URLS):
|
||||||
(self.URLS and variant not in self.URLS):
|
raise ValueError("The variant doesn't have a defined URL or " "file path!")
|
||||||
raise ValueError("The variant doesn't have a defined URL or "
|
|
||||||
"file path!")
|
|
||||||
if variant not in self.SIZES:
|
if variant not in self.SIZES:
|
||||||
raise ValueError("The variant doesn't have a defined dataset "
|
raise ValueError("The variant doesn't have a defined dataset " "size!")
|
||||||
"size!")
|
|
||||||
self._variant = variant
|
self._variant = variant
|
||||||
if self.FILES is not None:
|
if self.FILES is not None:
|
||||||
self._file = self.FILES.get(variant, None)
|
self._file = self.FILES.get(variant, None)
|
||||||
@ -63,8 +60,7 @@ class Dataset:
|
|||||||
self._url = None
|
self._url = None
|
||||||
self._size = self.SIZES[variant]
|
self._size = self.SIZES[variant]
|
||||||
if "vertices" not in self._size or "edges" not in self._size:
|
if "vertices" not in self._size or "edges" not in self._size:
|
||||||
raise ValueError("The size defined for this variant doesn't "
|
raise ValueError("The size defined for this variant doesn't " "have the number of vertices and/or edges!")
|
||||||
"have the number of vertices and/or edges!")
|
|
||||||
self._num_vertices = self._size["vertices"]
|
self._num_vertices = self._size["vertices"]
|
||||||
self._num_edges = self._size["edges"]
|
self._num_edges = self._size["edges"]
|
||||||
|
|
||||||
@ -76,8 +72,7 @@ class Dataset:
|
|||||||
cached_input, exists = directory.get_file("dataset.cypher")
|
cached_input, exists = directory.get_file("dataset.cypher")
|
||||||
if not exists:
|
if not exists:
|
||||||
print("Downloading dataset file:", self._url)
|
print("Downloading dataset file:", self._url)
|
||||||
downloaded_file = helpers.download_file(
|
downloaded_file = helpers.download_file(self._url, directory.get_path())
|
||||||
self._url, directory.get_path())
|
|
||||||
print("Unpacking and caching file:", downloaded_file)
|
print("Unpacking and caching file:", downloaded_file)
|
||||||
helpers.unpack_and_move_file(downloaded_file, cached_input)
|
helpers.unpack_and_move_file(downloaded_file, cached_input)
|
||||||
print("Using cached dataset file:", cached_input)
|
print("Using cached dataset file:", cached_input)
|
||||||
@ -137,18 +132,17 @@ class Pokec(Dataset):
|
|||||||
# Arango benchmarks
|
# Arango benchmarks
|
||||||
|
|
||||||
def benchmark__arango__single_vertex_read(self):
|
def benchmark__arango__single_vertex_read(self):
|
||||||
return ("MATCH (n:User {id : $id}) RETURN n",
|
return ("MATCH (n:User {id : $id}) RETURN n", {"id": self._get_random_vertex()})
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__arango__single_vertex_write(self):
|
def benchmark__arango__single_vertex_write(self):
|
||||||
return ("CREATE (n:UserTemp {id : $id}) RETURN n",
|
return ("CREATE (n:UserTemp {id : $id}) RETURN n", {"id": random.randint(1, self._num_vertices * 10)})
|
||||||
{"id": random.randint(1, self._num_vertices * 10)})
|
|
||||||
|
|
||||||
def benchmark__arango__single_edge_write(self):
|
def benchmark__arango__single_edge_write(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return ("MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
return (
|
||||||
"CREATE (n)-[e:Temp]->(m) RETURN e",
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "CREATE (n)-[e:Temp]->(m) RETURN e",
|
||||||
{"from": vertex_from, "to": vertex_to})
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__arango__aggregate(self):
|
def benchmark__arango__aggregate(self):
|
||||||
return ("MATCH (n:User) RETURN n.age, COUNT(*)", {})
|
return ("MATCH (n:User) RETURN n.age, COUNT(*)", {})
|
||||||
@ -157,92 +151,94 @@ class Pokec(Dataset):
|
|||||||
return ("MATCH (n:User) WHERE n.age >= 18 RETURN n.age, COUNT(*)", {})
|
return ("MATCH (n:User) WHERE n.age >= 18 RETURN n.age, COUNT(*)", {})
|
||||||
|
|
||||||
def benchmark__arango__expansion_1(self):
|
def benchmark__arango__expansion_1(self):
|
||||||
return ("MATCH (s:User {id: $id})-->(n:User) "
|
return ("MATCH (s:User {id: $id})-->(n:User) " "RETURN n.id", {"id": self._get_random_vertex()})
|
||||||
"RETURN n.id",
|
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__arango__expansion_1_with_filter(self):
|
def benchmark__arango__expansion_1_with_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-->(n:User) " "WHERE n.age >= 18 " "RETURN n.id",
|
||||||
"RETURN n.id",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__expansion_2(self):
|
def benchmark__arango__expansion_2(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->(n:User) "
|
return ("MATCH (s:User {id: $id})-->()-->(n:User) " "RETURN DISTINCT n.id", {"id": self._get_random_vertex()})
|
||||||
"RETURN DISTINCT n.id",
|
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__arango__expansion_2_with_filter(self):
|
def benchmark__arango__expansion_2_with_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-->()-->(n:User) " "WHERE n.age >= 18 " "RETURN DISTINCT n.id",
|
||||||
"RETURN DISTINCT n.id",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__expansion_3(self):
|
def benchmark__arango__expansion_3(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->()-->(n:User) "
|
return (
|
||||||
"RETURN DISTINCT n.id",
|
"MATCH (s:User {id: $id})-->()-->()-->(n:User) " "RETURN DISTINCT n.id",
|
||||||
{"id": self._get_random_vertex()})
|
{"id": self._get_random_vertex()},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__arango__expansion_3_with_filter(self):
|
def benchmark__arango__expansion_3_with_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->()-->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-->()-->()-->(n:User) " "WHERE n.age >= 18 " "RETURN DISTINCT n.id",
|
||||||
"RETURN DISTINCT n.id",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__expansion_4(self):
|
def benchmark__arango__expansion_4(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->()-->()-->(n:User) "
|
return (
|
||||||
"RETURN DISTINCT n.id",
|
"MATCH (s:User {id: $id})-->()-->()-->()-->(n:User) " "RETURN DISTINCT n.id",
|
||||||
{"id": self._get_random_vertex()})
|
{"id": self._get_random_vertex()},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__arango__expansion_4_with_filter(self):
|
def benchmark__arango__expansion_4_with_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-->()-->()-->()-->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-->()-->()-->()-->(n:User) " "WHERE n.age >= 18 " "RETURN DISTINCT n.id",
|
||||||
"RETURN DISTINCT n.id",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__neighbours_2(self):
|
def benchmark__arango__neighbours_2(self):
|
||||||
return ("MATCH (s:User {id: $id})-[*1..2]->(n:User) "
|
return ("MATCH (s:User {id: $id})-[*1..2]->(n:User) " "RETURN DISTINCT n.id", {"id": self._get_random_vertex()})
|
||||||
"RETURN DISTINCT n.id",
|
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__arango__neighbours_2_with_filter(self):
|
def benchmark__arango__neighbours_2_with_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-[*1..2]->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-[*1..2]->(n:User) " "WHERE n.age >= 18 " "RETURN DISTINCT n.id",
|
||||||
"RETURN DISTINCT n.id",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__neighbours_2_with_data(self):
|
def benchmark__arango__neighbours_2_with_data(self):
|
||||||
return ("MATCH (s:User {id: $id})-[*1..2]->(n:User) "
|
return (
|
||||||
"RETURN DISTINCT n.id, n",
|
"MATCH (s:User {id: $id})-[*1..2]->(n:User) " "RETURN DISTINCT n.id, n",
|
||||||
{"id": self._get_random_vertex()})
|
{"id": self._get_random_vertex()},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__arango__neighbours_2_with_data_and_filter(self):
|
def benchmark__arango__neighbours_2_with_data_and_filter(self):
|
||||||
return ("MATCH (s:User {id: $id})-[*1..2]->(n:User) "
|
return (
|
||||||
"WHERE n.age >= 18 "
|
"MATCH (s:User {id: $id})-[*1..2]->(n:User) " "WHERE n.age >= 18 " "RETURN DISTINCT n.id, n",
|
||||||
"RETURN DISTINCT n.id, n",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__arango__shortest_path(self):
|
def benchmark__arango__shortest_path(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return ("MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
return (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*bfs..15]->(m) "
|
"MATCH p=(n)-[*bfs..15]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to})
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__arango__shortest_path_with_filter(self):
|
def benchmark__arango__shortest_path_with_filter(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return ("MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
return (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) "
|
"MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to})
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
|
||||||
# Our benchmark queries
|
# Our benchmark queries
|
||||||
|
|
||||||
def benchmark__create__edge(self):
|
def benchmark__create__edge(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return ("MATCH (a:User {id: $from}), (b:User {id: $to}) "
|
return (
|
||||||
"CREATE (a)-[:TempEdge]->(b)",
|
"MATCH (a:User {id: $from}), (b:User {id: $to}) " "CREATE (a)-[:TempEdge]->(b)",
|
||||||
{"from": vertex_from, "to": vertex_to})
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__create__pattern(self):
|
def benchmark__create__pattern(self):
|
||||||
return ("CREATE ()-[:TempEdge]->()", {})
|
return ("CREATE ()-[:TempEdge]->()", {})
|
||||||
@ -251,9 +247,12 @@ class Pokec(Dataset):
|
|||||||
return ("CREATE ()", {})
|
return ("CREATE ()", {})
|
||||||
|
|
||||||
def benchmark__create__vertex_big(self):
|
def benchmark__create__vertex_big(self):
|
||||||
return ("CREATE (:L1:L2:L3:L4:L5:L6:L7 {p1: true, p2: 42, "
|
return (
|
||||||
"p3: \"Here is some text that is not extremely short\", "
|
"CREATE (:L1:L2:L3:L4:L5:L6:L7 {p1: true, p2: 42, "
|
||||||
"p4:\"Short text\", p5: 234.434, p6: 11.11, p7: false})", {})
|
'p3: "Here is some text that is not extremely short", '
|
||||||
|
'p4:"Short text", p5: 234.434, p6: 11.11, p7: false})',
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
def benchmark__aggregation__count(self):
|
def benchmark__aggregation__count(self):
|
||||||
return ("MATCH (n) RETURN count(n), count(n.age)", {})
|
return ("MATCH (n) RETURN count(n), count(n.age)", {})
|
||||||
@ -262,29 +261,124 @@ class Pokec(Dataset):
|
|||||||
return ("MATCH (n) RETURN min(n.age), max(n.age), avg(n.age)", {})
|
return ("MATCH (n) RETURN min(n.age), max(n.age), avg(n.age)", {})
|
||||||
|
|
||||||
def benchmark__match__pattern_cycle(self):
|
def benchmark__match__pattern_cycle(self):
|
||||||
return ("MATCH (n:User {id: $id})-[e1]->(m)-[e2]->(n) "
|
return ("MATCH (n:User {id: $id})-[e1]->(m)-[e2]->(n) " "RETURN e1, m, e2", {"id": self._get_random_vertex()})
|
||||||
"RETURN e1, m, e2",
|
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__match__pattern_long(self):
|
def benchmark__match__pattern_long(self):
|
||||||
return ("MATCH (n1:User {id: $id})-[e1]->(n2)-[e2]->"
|
return (
|
||||||
"(n3)-[e3]->(n4)<-[e4]-(n5) "
|
"MATCH (n1:User {id: $id})-[e1]->(n2)-[e2]->" "(n3)-[e3]->(n4)<-[e4]-(n5) " "RETURN n5 LIMIT 1",
|
||||||
"RETURN n5 LIMIT 1",
|
{"id": self._get_random_vertex()},
|
||||||
{"id": self._get_random_vertex()})
|
)
|
||||||
|
|
||||||
def benchmark__match__pattern_short(self):
|
def benchmark__match__pattern_short(self):
|
||||||
return ("MATCH (n:User {id: $id})-[e]->(m) "
|
return ("MATCH (n:User {id: $id})-[e]->(m) " "RETURN m LIMIT 1", {"id": self._get_random_vertex()})
|
||||||
"RETURN m LIMIT 1",
|
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__match__vertex_on_label_property(self):
|
def benchmark__match__vertex_on_label_property(self):
|
||||||
return ("MATCH (n:User) WITH n WHERE n.id = $id RETURN n",
|
return ("MATCH (n:User) WITH n WHERE n.id = $id RETURN n", {"id": self._get_random_vertex()})
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__match__vertex_on_label_property_index(self):
|
def benchmark__match__vertex_on_label_property_index(self):
|
||||||
return ("MATCH (n:User {id: $id}) RETURN n",
|
return ("MATCH (n:User {id: $id}) RETURN n", {"id": self._get_random_vertex()})
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
def benchmark__match__vertex_on_property(self):
|
def benchmark__match__vertex_on_property(self):
|
||||||
return ("MATCH (n {id: $id}) RETURN n",
|
return ("MATCH (n {id: $id}) RETURN n", {"id": self._get_random_vertex()})
|
||||||
{"id": self._get_random_vertex()})
|
|
||||||
|
|
||||||
|
class Distributed(Dataset):
|
||||||
|
|
||||||
|
# Explaination of datasets:
|
||||||
|
# - empty_only_index: contains index; contains no data
|
||||||
|
# - small/medium/large: contains index; contains data (respectively small/medium/large dataset)
|
||||||
|
#
|
||||||
|
# See dataset_creator.py to understand the datamodel and generate a dataset
|
||||||
|
|
||||||
|
NAME = "distributed"
|
||||||
|
VARIANTS = ["empty_only_index", "small", "medium", "large"]
|
||||||
|
DEFAULT_VARIANT = "empty_only_index"
|
||||||
|
URLS = {
|
||||||
|
"empty_only_index": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_empty_only_index.setup.cypher.gz",
|
||||||
|
"small": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_small.setup.cypher.gz",
|
||||||
|
"medium": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_medium.setup.cypher.gz",
|
||||||
|
"large": "https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/distributed_large.setup.cypher.gz",
|
||||||
|
}
|
||||||
|
SIZES = {
|
||||||
|
"empty_only_index": {
|
||||||
|
"vertices": 0,
|
||||||
|
"edges": -1, # not used
|
||||||
|
"uuid_ranges": {
|
||||||
|
"User": {"first_uuid": 0, "last_uuid": 0},
|
||||||
|
"Permission": {"first_uuid": 0, "last_uuid": 0},
|
||||||
|
"Identity": {"first_uuid": 0, "last_uuid": 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"small": {
|
||||||
|
"vertices": 30,
|
||||||
|
"edges": -1, # not used
|
||||||
|
"uuid_ranges": {
|
||||||
|
"User": {"first_uuid": 1, "last_uuid": 10},
|
||||||
|
"Permission": {"first_uuid": 11, "last_uuid": 20},
|
||||||
|
"Identity": {"first_uuid": 21, "last_uuid": 30},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"medium": {
|
||||||
|
"vertices": 30000,
|
||||||
|
"edges": -1, # not used
|
||||||
|
"uuid_ranges": {
|
||||||
|
"User": {"first_uuid": 1, "last_uuid": 10000},
|
||||||
|
"Permission": {"first_uuid": 10001, "last_uuid": 20000},
|
||||||
|
"Identity": {"first_uuid": 10001, "last_uuid": 30000},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"large": {
|
||||||
|
"vertices": 3000000,
|
||||||
|
"edges": -1, # not used
|
||||||
|
"uuid_ranges": {
|
||||||
|
"User": {"first_uuid": 1, "last_uuid": 1000000},
|
||||||
|
"Permission": {"first_uuid": 100001, "last_uuid": 2000000},
|
||||||
|
"Identity": {"first_uuid": 1000001, "last_uuid": 3000000},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_random_uuid(self, type):
|
||||||
|
assert type in ["User", "Permission", "Identity"]
|
||||||
|
|
||||||
|
first_uuid = Dataset.get_size(self)["uuid_ranges"][type]["first_uuid"]
|
||||||
|
last_uuid = Dataset.get_size(self)["uuid_ranges"][type]["last_uuid"]
|
||||||
|
|
||||||
|
random_value = random.randint(first_uuid, last_uuid)
|
||||||
|
return random_value
|
||||||
|
|
||||||
|
def __init__(self, variant=None):
|
||||||
|
Dataset.__init__(self, variant)
|
||||||
|
self.next_value_idx = Dataset.get_size(self)["vertices"] + 1
|
||||||
|
|
||||||
|
def benchmark__create__vertex(self):
|
||||||
|
self.next_value_idx += 1
|
||||||
|
query = (f"CREATE (:User {{uuid: {self.next_value_idx}}});", {})
|
||||||
|
return query
|
||||||
|
|
||||||
|
def benchmark__create__edges(self):
|
||||||
|
permission_uuid = self._get_random_uuid("Permission")
|
||||||
|
user_uuid = self._get_random_uuid("User")
|
||||||
|
|
||||||
|
query = (
|
||||||
|
"MATCH (permission:Permission {uuid: $permission_uuid}), (user:User {uuid: $user_uuid}) "
|
||||||
|
"CREATE (permission)-[:IS_FOR_USER]->(user)",
|
||||||
|
{"permission_uuid": permission_uuid, "user_uuid": user_uuid},
|
||||||
|
)
|
||||||
|
|
||||||
|
return query
|
||||||
|
|
||||||
|
def benchmark__match__match_all_vertices(self):
|
||||||
|
self.next_value_idx += 1
|
||||||
|
query = ("MATCH (n) RETURN *", {})
|
||||||
|
return query
|
||||||
|
|
||||||
|
def benchmark__match__match_on_labelled_vertices(self):
|
||||||
|
self.next_value_idx += 1
|
||||||
|
query = ("MATCH (n:User) RETURN *", {})
|
||||||
|
return query
|
||||||
|
|
||||||
|
def benchmark__match__match_all_verteices_with_edges(self):
|
||||||
|
self.next_value_idx += 1
|
||||||
|
query = ("MATCH (permission:Permission)-[e:IS_FOR_USER]->(user:User) RETURN *", {})
|
||||||
|
return query
|
||||||
|
Loading…
Reference in New Issue
Block a user