Fix and update mgbench (#1838)
This commit is contained in:
parent
a3d2474c5b
commit
56be736d30
@ -632,10 +632,12 @@ def run_isolated_workload_without_authorization(vendor_runner, client, queries,
|
|||||||
|
|
||||||
|
|
||||||
def setup_indices_and_import_dataset(client, vendor_runner, generated_queries, workload, storage_mode):
|
def setup_indices_and_import_dataset(client, vendor_runner, generated_queries, workload, storage_mode):
|
||||||
vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT)
|
if benchmark_context.vendor_name == "memgraph":
|
||||||
|
# Neo4j will get started just before import -> without this if statement it would try to start it twice
|
||||||
|
vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT)
|
||||||
log.info("Executing database index setup")
|
log.info("Executing database index setup")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
import_results = None
|
||||||
if generated_queries:
|
if generated_queries:
|
||||||
client.execute(queries=workload.indexes_generator(), num_workers=1)
|
client.execute(queries=workload.indexes_generator(), num_workers=1)
|
||||||
log.info("Finished setting up indexes.")
|
log.info("Finished setting up indexes.")
|
||||||
|
@ -127,8 +127,6 @@ def run_full_benchmarks(
|
|||||||
],
|
],
|
||||||
]
|
]
|
||||||
|
|
||||||
assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!"
|
|
||||||
|
|
||||||
if realistic:
|
if realistic:
|
||||||
# Configurations for full workload
|
# Configurations for full workload
|
||||||
for count, write, read, update, analytical in realistic:
|
for count, write, read, update, analytical in realistic:
|
||||||
|
99
tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh
Normal file
99
tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Currently only pokec dataset is modified to be used with memgraph on-disk storage
|
||||||
|
|
||||||
|
pushd () { command pushd "$@" > /dev/null; }
|
||||||
|
popd () { command popd "$@" > /dev/null; }
|
||||||
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
pushd "$SCRIPT_DIR"
|
||||||
|
|
||||||
|
# Help function
|
||||||
|
function show_help() {
|
||||||
|
echo "Usage: $0 [OPTIONS]"
|
||||||
|
echo "Options:"
|
||||||
|
echo " -n, --neo4j-path Path to Neo4j binary"
|
||||||
|
echo " -m, --memgraph-path Path to Memgraph binary"
|
||||||
|
echo " -w, --num-workers Number of workers for benchmark and import"
|
||||||
|
echo " -d, --dataset_size dataset_size (small, medium, large)"
|
||||||
|
echo " -h, --help Show this help message"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
neo4j_path="/usr/share/neo4j/bin/neo4j"
|
||||||
|
memgraph_path="../../build/memgraph"
|
||||||
|
num_workers=12
|
||||||
|
dataset_size="small"
|
||||||
|
|
||||||
|
# Parse command line arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
key="$1"
|
||||||
|
case $key in
|
||||||
|
-n|--neo4j-path)
|
||||||
|
neo4j_path="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-m|--memgraph-path)
|
||||||
|
memgraph_path="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-w|--num-workers)
|
||||||
|
num_workers="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-d|--dataset_size)
|
||||||
|
dataset_size="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid option: $1"
|
||||||
|
show_help
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ ! -d "pokec_${dataset_size}_results" ]; then
|
||||||
|
mkdir "pokec_${dataset_size}_results"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run Python: Mgbench - Neo4j
|
||||||
|
echo "Running Python: Mgbench - Neo4j"
|
||||||
|
python3 benchmark.py vendor-native \
|
||||||
|
--vendor-binary "$neo4j_path" \
|
||||||
|
--vendor-name neo4j \
|
||||||
|
--num-workers-for-benchmark "$num_workers" \
|
||||||
|
--num-workers-for-import "$num_workers" \
|
||||||
|
--no-load-query-counts \
|
||||||
|
--export-results "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \
|
||||||
|
"pokec_disk/${dataset_size}/*/*" \
|
||||||
|
--vendor-specific "config=$neo4j_path/conf/neo4j.conf" \
|
||||||
|
--no-authorization
|
||||||
|
|
||||||
|
# Run Python: Mgbench - Memgraph - on-disk
|
||||||
|
echo "Running Python: Mgbench - Memgraph - on-disk"
|
||||||
|
python3 benchmark.py vendor-native \
|
||||||
|
--vendor-binary "$memgraph_path" \
|
||||||
|
--vendor-name memgraph \
|
||||||
|
--num-workers-for-benchmark "$num_workers" \
|
||||||
|
--num-workers-for-import "$num_workers" \
|
||||||
|
--no-load-query-counts \
|
||||||
|
--export-results-on-disk-txn "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \
|
||||||
|
--export-results "pokec_${dataset_size}_results/on_disk_export_${dataset_size}_pokec.json" \
|
||||||
|
"pokec_disk/${dataset_size}/*/*" \
|
||||||
|
--no-authorization \
|
||||||
|
--vendor-specific "data-directory=benchmark_datadir" "storage-mode=ON_DISK_TRANSACTIONAL"
|
||||||
|
|
||||||
|
echo "Comparing results"
|
||||||
|
python3 compare_results.py --compare \
|
||||||
|
"pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \
|
||||||
|
"pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \
|
||||||
|
--output \
|
||||||
|
"pokec_${dataset_size}_results/neo4j_vs_mg_ondisk_results.html" \
|
||||||
|
--different-vendors
|
@ -634,7 +634,7 @@ class Neo4j(BaseRunner):
|
|||||||
exit_proc = subprocess.run(args=[self._neo4j_binary, "stop"], capture_output=True, check=True)
|
exit_proc = subprocess.run(args=[self._neo4j_binary, "stop"], capture_output=True, check=True)
|
||||||
return exit_proc.returncode, usage
|
return exit_proc.returncode, usage
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0, 0
|
||||||
|
|
||||||
def start_db_init(self, workload):
|
def start_db_init(self, workload):
|
||||||
if self._performance_tracking:
|
if self._performance_tracking:
|
||||||
|
@ -160,12 +160,7 @@ class Workload(ABC):
|
|||||||
raise ValueError("Vendor does not have INDEX for dataset!")
|
raise ValueError("Vendor does not have INDEX for dataset!")
|
||||||
|
|
||||||
def _set_local_files(self) -> None:
|
def _set_local_files(self) -> None:
|
||||||
if not self.disk_workload:
|
if self.disk_workload and self._vendor != "neo4j":
|
||||||
if self.LOCAL_FILE is not None:
|
|
||||||
self._local_file = self.LOCAL_FILE.get(self._variant, None)
|
|
||||||
else:
|
|
||||||
self._local_file = None
|
|
||||||
else:
|
|
||||||
if self.LOCAL_FILE_NODES is not None:
|
if self.LOCAL_FILE_NODES is not None:
|
||||||
self._local_file_nodes = self.LOCAL_FILE_NODES.get(self._variant, None)
|
self._local_file_nodes = self.LOCAL_FILE_NODES.get(self._variant, None)
|
||||||
else:
|
else:
|
||||||
@ -175,14 +170,14 @@ class Workload(ABC):
|
|||||||
self._local_file_edges = self.LOCAL_FILE_EDGES.get(self._variant, None)
|
self._local_file_edges = self.LOCAL_FILE_EDGES.get(self._variant, None)
|
||||||
else:
|
else:
|
||||||
self._local_file_edges = None
|
self._local_file_edges = None
|
||||||
|
else:
|
||||||
|
if self.LOCAL_FILE is not None:
|
||||||
|
self._local_file = self.LOCAL_FILE.get(self._variant, None)
|
||||||
|
else:
|
||||||
|
self._local_file = None
|
||||||
|
|
||||||
def _set_url_files(self) -> None:
|
def _set_url_files(self) -> None:
|
||||||
if not self.disk_workload:
|
if self.disk_workload and self._vendor != "neo4j":
|
||||||
if self.URL_FILE is not None:
|
|
||||||
self._url_file = self.URL_FILE.get(self._variant, None)
|
|
||||||
else:
|
|
||||||
self._url_file = None
|
|
||||||
else:
|
|
||||||
if self.URL_FILE_NODES is not None:
|
if self.URL_FILE_NODES is not None:
|
||||||
self._url_file_nodes = self.URL_FILE_NODES.get(self._variant, None)
|
self._url_file_nodes = self.URL_FILE_NODES.get(self._variant, None)
|
||||||
else:
|
else:
|
||||||
@ -191,6 +186,11 @@ class Workload(ABC):
|
|||||||
self._url_file_edges = self.URL_FILE_EDGES.get(self._variant, None)
|
self._url_file_edges = self.URL_FILE_EDGES.get(self._variant, None)
|
||||||
else:
|
else:
|
||||||
self._url_file_edges = None
|
self._url_file_edges = None
|
||||||
|
else:
|
||||||
|
if self.URL_FILE is not None:
|
||||||
|
self._url_file = self.URL_FILE.get(self._variant, None)
|
||||||
|
else:
|
||||||
|
self._url_file = None
|
||||||
|
|
||||||
def _set_local_index_file(self) -> None:
|
def _set_local_index_file(self) -> None:
|
||||||
if self.LOCAL_INDEX_FILE is not None:
|
if self.LOCAL_INDEX_FILE is not None:
|
||||||
@ -205,10 +205,10 @@ class Workload(ABC):
|
|||||||
self._url_index = None
|
self._url_index = None
|
||||||
|
|
||||||
def prepare(self, directory):
|
def prepare(self, directory):
|
||||||
if not self.disk_workload:
|
if self.disk_workload and self._vendor != "neo4j":
|
||||||
self._prepare_dataset_for_in_memory_workload(directory)
|
|
||||||
else:
|
|
||||||
self._prepare_dataset_for_on_disk_workload(directory)
|
self._prepare_dataset_for_on_disk_workload(directory)
|
||||||
|
else:
|
||||||
|
self._prepare_dataset_for_in_memory_workload(directory)
|
||||||
|
|
||||||
if self._local_index is not None:
|
if self._local_index is not None:
|
||||||
print("Using local index file:", self._local_index)
|
print("Using local index file:", self._local_index)
|
||||||
|
@ -13,7 +13,8 @@ import random
|
|||||||
|
|
||||||
from benchmark_context import BenchmarkContext
|
from benchmark_context import BenchmarkContext
|
||||||
from workloads.base import Workload
|
from workloads.base import Workload
|
||||||
from workloads.importers.disk_importer_pokec import ImporterPokec
|
from workloads.importers.disk_importer_pokec import DiskImporterPokec
|
||||||
|
from workloads.importers.importer_pokec import ImporterPokec
|
||||||
|
|
||||||
|
|
||||||
class Pokec(Workload):
|
class Pokec(Workload):
|
||||||
@ -22,6 +23,12 @@ class Pokec(Workload):
|
|||||||
DEFAULT_VARIANT = "small"
|
DEFAULT_VARIANT = "small"
|
||||||
FILE = None
|
FILE = None
|
||||||
|
|
||||||
|
URL_FILE = {
|
||||||
|
"small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_small_import.cypher",
|
||||||
|
"medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_medium_import.cypher",
|
||||||
|
"large": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_large.setup.cypher.gz",
|
||||||
|
}
|
||||||
|
|
||||||
URL_FILE_NODES = {
|
URL_FILE_NODES = {
|
||||||
"small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_small_import_nodes.cypher",
|
"small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_small_import_nodes.cypher",
|
||||||
"medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_medium_import_nodes.cypher",
|
"medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_medium_import_nodes.cypher",
|
||||||
@ -42,7 +49,7 @@ class Pokec(Workload):
|
|||||||
|
|
||||||
URL_INDEX_FILE = {
|
URL_INDEX_FILE = {
|
||||||
"memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/memgraph.cypher",
|
"memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/memgraph.cypher",
|
||||||
"neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/neo4j.cypher",
|
"neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/neo4j.cypher",
|
||||||
}
|
}
|
||||||
|
|
||||||
PROPERTIES_ON_EDGES = False
|
PROPERTIES_ON_EDGES = False
|
||||||
@ -51,15 +58,26 @@ class Pokec(Workload):
|
|||||||
super().__init__(variant, benchmark_context=benchmark_context, disk_workload=True)
|
super().__init__(variant, benchmark_context=benchmark_context, disk_workload=True)
|
||||||
|
|
||||||
def custom_import(self) -> bool:
|
def custom_import(self) -> bool:
|
||||||
importer = ImporterPokec(
|
if self._vendor == "neo4j":
|
||||||
benchmark_context=self.benchmark_context,
|
importer = ImporterPokec(
|
||||||
dataset_name=self.NAME,
|
benchmark_context=self.benchmark_context,
|
||||||
index_file=self._file_index,
|
dataset_name=self.NAME,
|
||||||
dataset_nodes_file=self._node_file,
|
index_file=self._file_index,
|
||||||
dataset_edges_file=self._edge_file,
|
dataset_file=self._file,
|
||||||
variant=self._variant,
|
variant=self._variant,
|
||||||
)
|
)
|
||||||
return importer.execute_import()
|
return importer.execute_import()
|
||||||
|
|
||||||
|
else:
|
||||||
|
importer = DiskImporterPokec(
|
||||||
|
benchmark_context=self.benchmark_context,
|
||||||
|
dataset_name=self.NAME,
|
||||||
|
index_file=self._file_index,
|
||||||
|
dataset_nodes_file=self._node_file,
|
||||||
|
dataset_edges_file=self._edge_file,
|
||||||
|
variant=self._variant,
|
||||||
|
)
|
||||||
|
return importer.execute_import()
|
||||||
|
|
||||||
# Helpers used to generate the queries
|
# Helpers used to generate the queries
|
||||||
def _get_random_vertex(self):
|
def _get_random_vertex(self):
|
||||||
@ -214,12 +232,22 @@ class Pokec(Workload):
|
|||||||
# OK
|
# OK
|
||||||
def benchmark__arango__allshortest_paths(self):
|
def benchmark__arango__allshortest_paths(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return (
|
memgraph = (
|
||||||
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
|
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to},
|
{"from": vertex_from, "to": vertex_to},
|
||||||
)
|
)
|
||||||
|
neo4j = (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
|
"MATCH p = allShortestPaths((n)-[*..2]->(m)) "
|
||||||
|
"RETURN [node in nodes(p) | node.id] AS path",
|
||||||
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
if self._vendor == "neo4j":
|
||||||
|
return neo4j
|
||||||
|
else:
|
||||||
|
return memgraph
|
||||||
|
|
||||||
# Our benchmark queries
|
# Our benchmark queries
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ from constants import *
|
|||||||
from runners import BaseRunner
|
from runners import BaseRunner
|
||||||
|
|
||||||
|
|
||||||
class ImporterPokec:
|
class DiskImporterPokec:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
benchmark_context: BenchmarkContext,
|
benchmark_context: BenchmarkContext,
|
||||||
|
@ -167,30 +167,62 @@ class Pokec(Workload):
|
|||||||
|
|
||||||
def benchmark__arango__shortest_path(self):
|
def benchmark__arango__shortest_path(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return (
|
memgraph = (
|
||||||
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*bfs..15]->(m) "
|
"MATCH p=(n)-[*bfs..15]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to},
|
{"from": vertex_from, "to": vertex_to},
|
||||||
)
|
)
|
||||||
|
neo4j = (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
|
"MATCH p=shortestPath((n)-[*..15]->(m)) "
|
||||||
|
"RETURN [n in nodes(p) | n.id] AS path",
|
||||||
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
if self._vendor == "memgraph":
|
||||||
|
return memgraph
|
||||||
|
else:
|
||||||
|
return neo4j
|
||||||
|
|
||||||
def benchmark__arango__shortest_path_with_filter(self):
|
def benchmark__arango__shortest_path_with_filter(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return (
|
memgraph = (
|
||||||
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) "
|
"MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to},
|
{"from": vertex_from, "to": vertex_to},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
neo4j = (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
|
"MATCH p=shortestPath((n)-[*..15]->(m)) "
|
||||||
|
"WHERE all(node in nodes(p) WHERE node.age >= 18) "
|
||||||
|
"RETURN [n in nodes(p) | n.id] AS path",
|
||||||
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
if self._vendor == "memgraph":
|
||||||
|
return memgraph
|
||||||
|
else:
|
||||||
|
return neo4j
|
||||||
|
|
||||||
def benchmark__arango__allshortest_paths(self):
|
def benchmark__arango__allshortest_paths(self):
|
||||||
vertex_from, vertex_to = self._get_random_from_to()
|
vertex_from, vertex_to = self._get_random_from_to()
|
||||||
return (
|
memgraph = (
|
||||||
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
|
"MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) "
|
||||||
"RETURN extract(n in nodes(p) | n.id) AS path",
|
"RETURN extract(n in nodes(p) | n.id) AS path",
|
||||||
{"from": vertex_from, "to": vertex_to},
|
{"from": vertex_from, "to": vertex_to},
|
||||||
)
|
)
|
||||||
|
neo4j = (
|
||||||
|
"MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m "
|
||||||
|
"MATCH p = allShortestPaths((n)-[*..2]->(m)) "
|
||||||
|
"RETURN [node in nodes(p) | node.id] AS path",
|
||||||
|
{"from": vertex_from, "to": vertex_to},
|
||||||
|
)
|
||||||
|
if self._vendor == "memgraph":
|
||||||
|
return memgraph
|
||||||
|
else:
|
||||||
|
return neo4j
|
||||||
|
|
||||||
# Our benchmark queries
|
# Our benchmark queries
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user