709 lines
27 KiB
Python
709 lines
27 KiB
Python
import inspect
|
|
import random
|
|
from pathlib import Path
|
|
|
|
import helpers
|
|
from benchmark_context import BenchmarkContext
|
|
from workloads.base import Workload
|
|
from workloads.importers.importer_ldbc_bi import ImporterLDBCBI
|
|
|
|
|
|
class LDBC_BI(Workload):
|
|
NAME = "ldbc_bi"
|
|
VARIANTS = ["sf1", "sf3", "sf10"]
|
|
DEFAULT_VARIANT = "sf1"
|
|
|
|
URL_FILE = {
|
|
"sf1": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf1.cypher.gz",
|
|
"sf3": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf3.cypher.gz",
|
|
"sf10": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf10.cypher.gz",
|
|
}
|
|
|
|
URL_CSV = {
|
|
"sf1": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf1-composite-projected-fk.tar.zst",
|
|
"sf3": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf3-composite-projected-fk.tar.zst",
|
|
"sf10": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf10-composite-projected-fk.tar.zst",
|
|
}
|
|
|
|
SIZES = {
|
|
"sf1": {"vertices": 2997352, "edges": 17196776},
|
|
"sf3": {"vertices": 1, "edges": 1},
|
|
"sf10": {"vertices": 1, "edges": 1},
|
|
}
|
|
|
|
LOCAL_INDEX_FILES = None
|
|
|
|
URL_INDEX_FILE = {
|
|
"memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/memgraph_bi_index.cypher",
|
|
"neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/neo4j_bi_index.cypher",
|
|
}
|
|
|
|
QUERY_PARAMETERS = {
|
|
"sf1": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
|
|
"sf3": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
|
|
"sf10": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
|
|
}
|
|
|
|
def custom_import(self) -> bool:
|
|
importer = ImporterLDBCBI(
|
|
benchmark_context=self.benchmark_context,
|
|
dataset_name=self.NAME,
|
|
variant=self._variant,
|
|
index_file=self._file_index,
|
|
csv_dict=self.URL_CSV,
|
|
)
|
|
return importer.execute_import()
|
|
|
|
def _prepare_parameters_directory(self):
|
|
parameters = Path() / ".cache" / "datasets" / self.NAME / self._variant / "parameters"
|
|
parameters.mkdir(parents=True, exist_ok=True)
|
|
if parameters.exists() and any(parameters.iterdir()):
|
|
print("Files downloaded.")
|
|
else:
|
|
print("Downloading files")
|
|
downloaded_file = helpers.download_file(self.QUERY_PARAMETERS[self._variant], parameters.parent.absolute())
|
|
print("Unpacking the file..." + downloaded_file)
|
|
parameters = helpers.unpack_zip(Path(downloaded_file))
|
|
return parameters / ("parameters-" + self._variant)
|
|
|
|
def _get_query_parameters(self) -> dict:
|
|
func_name = inspect.stack()[1].function
|
|
parameters = {}
|
|
for file in self._parameters_dir.glob("bi-*.csv"):
|
|
file_name_query_id = file.name.split("-")[1][0:-4]
|
|
func_name_id = func_name.split("_")[-1]
|
|
if file_name_query_id == func_name_id or file_name_query_id == func_name_id + "a":
|
|
with file.open("r") as input:
|
|
lines = input.readlines()
|
|
header = lines[0].strip("\n").split("|")
|
|
position = random.randint(1, len(lines) - 1)
|
|
data = lines[position].strip("\n").split("|")
|
|
for i in range(len(header)):
|
|
key, value_type = header[i].split(":")
|
|
if value_type == "DATETIME":
|
|
# Drop time zone
|
|
converted = data[i][0:-6]
|
|
parameters[key] = converted
|
|
elif value_type == "DATE":
|
|
converted = data[i] + "T00:00:00"
|
|
parameters[key] = converted
|
|
elif value_type == "INT":
|
|
parameters[key] = int(data[i])
|
|
elif value_type == "STRING[]":
|
|
elements = data[i].split(";")
|
|
parameters[key] = elements
|
|
else:
|
|
parameters[key] = data[i]
|
|
break
|
|
|
|
return parameters
|
|
|
|
def __init__(self, variant=None, benchmark_context: BenchmarkContext = None):
|
|
super().__init__(variant, benchmark_context=benchmark_context)
|
|
self._parameters_dir = self._prepare_parameters_directory()
|
|
|
|
def benchmark__bi__query_1_analytical(self):
|
|
|
|
memgraph = (
|
|
"""
|
|
MATCH (message:Message)
|
|
WHERE message.creationDate < localDateTime($datetime)
|
|
WITH count(message) AS totalMessageCountInt
|
|
WITH toFloat(totalMessageCountInt) AS totalMessageCount
|
|
MATCH (message:Message)
|
|
WHERE message.creationDate < localDateTime($datetime)
|
|
AND message.content IS NOT NULL
|
|
WITH
|
|
totalMessageCount,
|
|
message,
|
|
message.creationDate.year AS year
|
|
WITH
|
|
totalMessageCount,
|
|
year,
|
|
message:Comment AS isComment,
|
|
CASE
|
|
WHEN message.length < 40 THEN 0
|
|
WHEN message.length < 80 THEN 1
|
|
WHEN message.length < 160 THEN 2
|
|
ELSE 3
|
|
END AS lengthCategory,
|
|
count(message) AS messageCount,
|
|
sum(message.length) / toFloat(count(message)) AS averageMessageLength,
|
|
sum(message.length) AS sumMessageLength
|
|
RETURN
|
|
year,
|
|
isComment,
|
|
lengthCategory,
|
|
messageCount,
|
|
averageMessageLength,
|
|
sumMessageLength,
|
|
messageCount / totalMessageCount AS percentageOfMessages
|
|
ORDER BY
|
|
year DESC,
|
|
isComment ASC,
|
|
lengthCategory ASC
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH (message:Message)
|
|
WHERE message.creationDate < DateTime($datetime)
|
|
WITH count(message) AS totalMessageCountInt
|
|
WITH toFloat(totalMessageCountInt) AS totalMessageCount
|
|
MATCH (message:Message)
|
|
WHERE message.creationDate < DateTime($datetime)
|
|
AND message.content IS NOT NULL
|
|
WITH
|
|
totalMessageCount,
|
|
message,
|
|
message.creationDate.year AS year
|
|
WITH
|
|
totalMessageCount,
|
|
year,
|
|
message:Comment AS isComment,
|
|
CASE
|
|
WHEN message.length < 40 THEN 0
|
|
WHEN message.length < 80 THEN 1
|
|
WHEN message.length < 160 THEN 2
|
|
ELSE 3
|
|
END AS lengthCategory,
|
|
count(message) AS messageCount,
|
|
sum(message.length) / toFloat(count(message)) AS averageMessageLength,
|
|
sum(message.length) AS sumMessageLength
|
|
RETURN
|
|
year,
|
|
isComment,
|
|
lengthCategory,
|
|
messageCount,
|
|
averageMessageLength,
|
|
sumMessageLength,
|
|
messageCount / totalMessageCount AS percentageOfMessages
|
|
ORDER BY
|
|
year DESC,
|
|
isComment ASC,
|
|
lengthCategory ASC
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_2_analytical(self):
|
|
|
|
memgraph = (
|
|
"""
|
|
MATCH (tag:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
|
|
OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag)
|
|
WHERE localDateTime($date) <= message1.creationDate
|
|
AND message1.creationDate < localDateTime($date) + duration({day: 100})
|
|
WITH tag, count(message1) AS countWindow1
|
|
OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag)
|
|
WHERE localDateTime($date) + duration({day: 100}) <= message2.creationDate
|
|
AND message2.creationDate < localDateTime($date) + duration({day: 200})
|
|
WITH
|
|
tag,
|
|
countWindow1,
|
|
count(message2) AS countWindow2
|
|
RETURN
|
|
tag.name,
|
|
countWindow1,
|
|
countWindow2,
|
|
abs(countWindow1 - countWindow2) AS diff
|
|
ORDER BY
|
|
diff DESC,
|
|
tag.name ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH (tag:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
|
|
OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag)
|
|
WHERE DateTime($date) <= message1.creationDate
|
|
AND message1.creationDate < DateTime($date) + duration({days: 100})
|
|
WITH tag, count(message1) AS countWindow1
|
|
OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag)
|
|
WHERE DateTime($date) + duration({days: 100}) <= message2.creationDate
|
|
AND message2.creationDate < DateTime($date) + duration({days: 200})
|
|
WITH
|
|
tag,
|
|
countWindow1,
|
|
count(message2) AS countWindow2
|
|
RETURN
|
|
tag.name,
|
|
countWindow1,
|
|
countWindow2,
|
|
abs(countWindow1 - countWindow2) AS diff
|
|
ORDER BY
|
|
diff DESC,
|
|
tag.name ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_3_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH
|
|
(:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-
|
|
(person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]->
|
|
(post:Post)<-[:REPLY_OF*0..]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
|
|
RETURN
|
|
forum.id as id,
|
|
forum.title,
|
|
person.id,
|
|
count(DISTINCT message) AS messageCount
|
|
ORDER BY
|
|
messageCount DESC,
|
|
id ASC
|
|
LIMIT 20
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_5_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person)
|
|
OPTIONAL MATCH (message)<-[likes:LIKES]-(:Person)
|
|
WITH person, message, count(likes) AS likeCount
|
|
OPTIONAL MATCH (message)<-[:REPLY_OF]-(reply:Comment)
|
|
WITH person, message, likeCount, count(reply) AS replyCount
|
|
WITH person, count(message) AS messageCount, sum(likeCount) AS likeCount, sum(replyCount) AS replyCount
|
|
RETURN
|
|
person.id,
|
|
replyCount,
|
|
likeCount,
|
|
messageCount,
|
|
1*messageCount + 2*replyCount + 10*likeCount AS score
|
|
ORDER BY
|
|
score DESC,
|
|
person.id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_6_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person)
|
|
OPTIONAL MATCH (message1)<-[:LIKES]-(person2:Person)
|
|
OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message2:Message)<-[like:LIKES]-(person3:Person)
|
|
RETURN
|
|
person1.id as id,
|
|
count(DISTINCT like) AS authorityScore
|
|
ORDER BY
|
|
authorityScore DESC,
|
|
id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_7_analytical(self):
|
|
|
|
memgraph = (
|
|
"""
|
|
MATCH
|
|
(tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message),
|
|
(message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag)
|
|
OPTIONAL MATCH (comment)-[:HAS_TAG]->(tag)
|
|
WHERE tag IS NOT NULL
|
|
RETURN
|
|
relatedTag,
|
|
count(DISTINCT comment) AS count
|
|
ORDER BY
|
|
relatedTag.name ASC,
|
|
count DESC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH
|
|
(tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message),
|
|
(message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag)
|
|
WHERE NOT (comment)-[:HAS_TAG]->(tag)
|
|
RETURN
|
|
relatedTag.name,
|
|
count(DISTINCT comment) AS count
|
|
ORDER BY
|
|
relatedTag.name ASC,
|
|
count DESC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_9_analytical(self):
|
|
memgraph = (
|
|
"""
|
|
MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message)
|
|
WHERE post.creationDate >= localDateTime($startDate)
|
|
AND post.creationDate <= localDateTime($endDate)
|
|
AND reply.creationDate >= localDateTime($startDate)
|
|
AND reply.creationDate <= localDateTime($endDate)
|
|
RETURN
|
|
person.id as id,
|
|
person.firstName,
|
|
person.lastName,
|
|
count(DISTINCT post) AS threadCount,
|
|
count(DISTINCT reply) AS messageCount
|
|
ORDER BY
|
|
messageCount DESC,
|
|
id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
neo4j = (
|
|
"""
|
|
MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message)
|
|
WHERE post.creationDate >= DateTime($startDate)
|
|
AND post.creationDate <= DateTime($endDate)
|
|
AND reply.creationDate >= DateTime($startDate)
|
|
AND reply.creationDate <= DateTime($endDate)
|
|
RETURN
|
|
person.id as id,
|
|
person.firstName,
|
|
person.lastName,
|
|
count(DISTINCT post) AS threadCount,
|
|
count(DISTINCT reply) AS messageCount
|
|
ORDER BY
|
|
messageCount DESC,
|
|
id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_11_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country:Country {name: $country}),
|
|
(a)-[k1:KNOWS]-(b:Person)
|
|
WHERE a.id < b.id
|
|
AND localDateTime($startDate) <= k1.creationDate AND k1.creationDate <= localDateTime($endDate)
|
|
WITH DISTINCT country, a, b
|
|
MATCH (b)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country)
|
|
WITH DISTINCT country, a, b
|
|
MATCH (b)-[k2:KNOWS]-(c:Person),
|
|
(c)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country)
|
|
WHERE b.id < c.id
|
|
AND localDateTime($startDate) <= k2.creationDate AND k2.creationDate <= localDateTime($endDate)
|
|
WITH DISTINCT a, b, c
|
|
MATCH (c)-[k3:KNOWS]-(a)
|
|
WHERE localDateTime($startDate) <= k3.creationDate AND k3.creationDate <= localDateTime($endDate)
|
|
WITH DISTINCT a, b, c
|
|
RETURN count(*) AS count
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_12_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH (person:Person)
|
|
OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post)
|
|
WHERE message.content IS NOT NULL
|
|
AND message.length < $lengthThreshold
|
|
AND message.creationDate > localDateTime($startDate)
|
|
AND post.language IN $languages
|
|
WITH
|
|
person,
|
|
count(message) AS messageCount
|
|
RETURN
|
|
messageCount,
|
|
count(person) AS personCount
|
|
ORDER BY
|
|
personCount DESC,
|
|
messageCount DESC
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_13_analytical(self):
|
|
memgraph = (
|
|
"""
|
|
MATCH (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person)
|
|
WHERE zombie.creationDate < localDateTime($endDate)
|
|
WITH country, zombie
|
|
OPTIONAL MATCH (zombie)<-[:HAS_CREATOR]-(message:Message)
|
|
WHERE message.creationDate < localDateTime($endDate)
|
|
WITH
|
|
country,
|
|
zombie,
|
|
count(message) AS messageCount
|
|
WITH
|
|
country,
|
|
zombie,
|
|
12 * (localDateTime($endDate).year - zombie.creationDate.year )
|
|
+ (localDateTime($endDate).month - zombie.creationDate.month)
|
|
+ 1 AS months,
|
|
messageCount
|
|
WHERE messageCount / months < 1
|
|
WITH
|
|
country,
|
|
collect(zombie) AS zombies
|
|
UNWIND zombies AS zombie
|
|
OPTIONAL MATCH
|
|
(zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person)
|
|
WHERE likerZombie IN zombies
|
|
WITH
|
|
zombie,
|
|
count(likerZombie) AS zombieLikeCount
|
|
OPTIONAL MATCH
|
|
(zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person)
|
|
WHERE likerPerson.creationDate < localDateTime($endDate)
|
|
WITH
|
|
zombie,
|
|
zombieLikeCount,
|
|
count(likerPerson) AS totalLikeCount
|
|
RETURN
|
|
zombie.id,
|
|
zombieLikeCount,
|
|
totalLikeCount,
|
|
CASE totalLikeCount
|
|
WHEN 0 THEN 0.0
|
|
ELSE zombieLikeCount / toFloat(totalLikeCount)
|
|
END AS zombieScore
|
|
ORDER BY
|
|
zombieScore DESC,
|
|
zombie.id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person)
|
|
WHERE zombie.creationDate < DateTime($endDate)
|
|
WITH country, zombie
|
|
OPTIONAL MATCH (zombie)<-[:HAS_CREATOR]-(message:Message)
|
|
WHERE message.creationDate < DateTime($endDate)
|
|
WITH
|
|
country,
|
|
zombie,
|
|
count(message) AS messageCount
|
|
WITH
|
|
country,
|
|
zombie,
|
|
12 * (DateTime($endDate).year - zombie.creationDate.year )
|
|
+ (DateTime($endDate).month - zombie.creationDate.month)
|
|
+ 1 AS months,
|
|
messageCount
|
|
WHERE messageCount / months < 1
|
|
WITH
|
|
country,
|
|
collect(zombie) AS zombies
|
|
UNWIND zombies AS zombie
|
|
OPTIONAL MATCH
|
|
(zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person)
|
|
WHERE likerZombie IN zombies
|
|
WITH
|
|
zombie,
|
|
count(likerZombie) AS zombieLikeCount
|
|
OPTIONAL MATCH
|
|
(zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person)
|
|
WHERE likerPerson.creationDate < DateTime($endDate)
|
|
WITH
|
|
zombie,
|
|
zombieLikeCount,
|
|
count(likerPerson) AS totalLikeCount
|
|
RETURN
|
|
zombie.id,
|
|
zombieLikeCount,
|
|
totalLikeCount,
|
|
CASE totalLikeCount
|
|
WHEN 0 THEN 0.0
|
|
ELSE zombieLikeCount / toFloat(totalLikeCount)
|
|
END AS zombieScore
|
|
ORDER BY
|
|
zombieScore DESC,
|
|
zombie.id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_14_analytical(self):
|
|
return (
|
|
"""
|
|
MATCH
|
|
(country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person),
|
|
(country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person),
|
|
(person1)-[:KNOWS]-(person2)
|
|
WITH person1, person2, city1, 0 AS score
|
|
OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2)
|
|
WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE 4 END) AS score
|
|
OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2)
|
|
WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score
|
|
OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2)
|
|
WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score
|
|
OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2)
|
|
WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 1 END) AS score
|
|
ORDER BY
|
|
city1.name ASC,
|
|
score DESC,
|
|
person1.id ASC,
|
|
person2.id ASC
|
|
WITH city1, collect({score: score, person1Id: person1.id, person2Id: person2.id})[0] AS top
|
|
RETURN
|
|
top.person1Id,
|
|
top.person2Id,
|
|
city1.name,
|
|
top.score
|
|
ORDER BY
|
|
top.score DESC,
|
|
top.person1Id ASC,
|
|
top.person2Id ASC
|
|
LIMIT 100
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
def benchmark__bi__query_17_analytical(self):
|
|
|
|
memgraph = (
|
|
"""
|
|
MATCH
|
|
(tag:Tag {name: $tag}),
|
|
(person1:Person)<-[:HAS_CREATOR]-(message1:Message)-[:REPLY_OF*0..]->(post1:Post)<-[:CONTAINER_OF]-(forum1:Forum),
|
|
(message1)-[:HAS_TAG]->(tag),
|
|
(forum1)<-[:HAS_MEMBER]->(person2:Person)<-[:HAS_CREATOR]-(comment:Comment)-[:HAS_TAG]->(tag),
|
|
(forum1)<-[:HAS_MEMBER]->(person3:Person)<-[:HAS_CREATOR]-(message2:Message),
|
|
(comment)-[:REPLY_OF]->(message2)-[:REPLY_OF*0..]->(post2:Post)<-[:CONTAINER_OF]-(forum2:Forum)
|
|
MATCH (comment)-[:HAS_TAG]->(tag)
|
|
MATCH (message2)-[:HAS_TAG]->(tag)
|
|
OPTIONAL MATCH (forum2)-[:HAS_MEMBER]->(person1)
|
|
WHERE forum1 <> forum2 AND message2.creationDate > message1.creationDate + duration({hours: $delta}) AND person1 IS NULL
|
|
RETURN person1, count(DISTINCT message2) AS messageCount
|
|
ORDER BY messageCount DESC, person1.id ASC
|
|
LIMIT 10
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH
|
|
(tag:Tag {name: $tag}),
|
|
(person1:Person)<-[:HAS_CREATOR]-(message1:Message)-[:REPLY_OF*0..]->(post1:Post)<-[:CONTAINER_OF]-(forum1:Forum),
|
|
(message1)-[:HAS_TAG]->(tag),
|
|
(forum1)<-[:HAS_MEMBER]->(person2:Person)<-[:HAS_CREATOR]-(comment:Comment)-[:HAS_TAG]->(tag),
|
|
(forum1)<-[:HAS_MEMBER]->(person3:Person)<-[:HAS_CREATOR]-(message2:Message),
|
|
(comment)-[:REPLY_OF]->(message2)-[:REPLY_OF*0..]->(post2:Post)<-[:CONTAINER_OF]-(forum2:Forum)
|
|
MATCH (comment)-[:HAS_TAG]->(tag)
|
|
MATCH (message2)-[:HAS_TAG]->(tag)
|
|
WHERE forum1 <> forum2
|
|
AND message2.creationDate > message1.creationDate + duration({hours: $delta})
|
|
AND NOT (forum2)-[:HAS_MEMBER]->(person1)
|
|
RETURN person1, count(DISTINCT message2) AS messageCount
|
|
ORDER BY messageCount DESC, person1.id ASC
|
|
LIMIT 10
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|
|
|
|
def benchmark__bi__query_18_analytical(self):
|
|
|
|
memgraph = (
|
|
"""
|
|
MATCH (tag:Tag {name: $tag})<-[:HAS_INTEREST]-(person1:Person)-[:KNOWS]-(mutualFriend:Person)-[:KNOWS]-(person2:Person)-[:HAS_INTEREST]->(tag)
|
|
OPTIONAL MATCH (person1)-[:KNOWS]-(person2)
|
|
WHERE person1 <> person2
|
|
RETURN person1.id AS person1Id, person2.id AS person2Id, count(DISTINCT mutualFriend) AS mutualFriendCount
|
|
ORDER BY mutualFriendCount DESC, person1Id ASC, person2Id ASC
|
|
LIMIT 20
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
|
|
neo4j = (
|
|
"""
|
|
MATCH (tag:Tag {name: $tag})<-[:HAS_INTEREST]-(person1:Person)-[:KNOWS]-(mutualFriend:Person)-[:KNOWS]-(person2:Person)-[:HAS_INTEREST]->(tag)
|
|
WHERE person1 <> person2
|
|
AND NOT (person1)-[:KNOWS]-(person2)
|
|
RETURN person1.id AS person1Id, person2.id AS person2Id, count(DISTINCT mutualFriend) AS mutualFriendCount
|
|
ORDER BY mutualFriendCount DESC, person1Id ASC, person2Id ASC
|
|
LIMIT 20
|
|
""".replace(
|
|
"\n", ""
|
|
),
|
|
self._get_query_parameters(),
|
|
)
|
|
if self._vendor == "memgraph":
|
|
return memgraph
|
|
else:
|
|
return neo4j
|