LDBC: Integrate with Apollo
Summary: Run neo4j and memgraph from run_benchmark script. This makes mg and neo scripts obsolete. Reviewers: buda, teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D806
This commit is contained in:
parent
f2a82f4f58
commit
686dc452ee
6
tests/public_benchmark/ldbc/.gitignore
vendored
6
tests/public_benchmark/ldbc/.gitignore
vendored
@ -1,8 +1,8 @@
|
||||
datasets/
|
||||
ldbc_snb_datagen/
|
||||
ldbc_driver/
|
||||
ldbc-snb-impls/
|
||||
neo4j_csv_dataset_scale_*/
|
||||
tmp/
|
||||
ve3/
|
||||
ve2/
|
||||
maven/
|
||||
hadoop/
|
||||
*.out
|
||||
|
@ -3,16 +3,15 @@
|
||||
## How to run the benchmark against Neo4j OR Memgraph?
|
||||
|
||||
cd memgraph/tests/public_benchmark/ldbc
|
||||
./setup_system
|
||||
./setup_dependencies
|
||||
./setup_dataset [--scale-factor 1]
|
||||
./neo [--run] OR ./mg [--run]
|
||||
./setup
|
||||
./build_dataset [--scale-factor 1]
|
||||
# To run short reads by default, just call:
|
||||
./run_benchmark
|
||||
# To run update queries use the following.
|
||||
./run_benchmark --properties-file ldbc-snb-impls-updates.properties
|
||||
# You may need to increase the time compression when updating:
|
||||
./run_benchmark --time-compresion-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
||||
./run_benchmark --create-index --run-db memgraph # or neo4j
|
||||
# To run update queries pass the properties file for updates and slow down
|
||||
# the execution by setting a larger time compression ratio.
|
||||
./run_benchmark --create-index --run-db memgraph \
|
||||
--properties-file ldbc-snb-impls-updates.properties \
|
||||
--time-compression-ratio 1.5
|
||||
|
||||
## How to run a specific test?
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/bash -e
|
||||
|
||||
# Generate SNB dataset.
|
||||
|
||||
@ -7,18 +7,18 @@ function print_help () {
|
||||
echo "Optional arguments:"
|
||||
echo -e " -h|--help -> Prints help."
|
||||
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
|
||||
echo -e " --neo4j-home Neo4j home directory, overrides NEO4J_HOME"
|
||||
echo -e " --memgraph-home Memgraph home directory."
|
||||
echo -e " --skip-generating Only transform generated dataset"
|
||||
}
|
||||
|
||||
set -e
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
base_dir="${script_dir}/../../.."
|
||||
neo4j_dir="${base_dir}/libs/neo4j"
|
||||
|
||||
# Add Maven to PATH
|
||||
export PATH=$PATH:${script_dir}/maven/bin
|
||||
|
||||
# Read the arguments.
|
||||
scale_factor=1
|
||||
skip_generating=false
|
||||
memgraph_dir="${script_dir}/../../.."
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case $1 in
|
||||
@ -30,17 +30,6 @@ do
|
||||
scale_factor=$2
|
||||
shift
|
||||
;;
|
||||
--neo4j-home)
|
||||
NEO4J_HOME=$2
|
||||
shift
|
||||
;;
|
||||
--memgraph-home)
|
||||
memgraph_dir=$2
|
||||
shift
|
||||
;;
|
||||
--skip-generating)
|
||||
skip_generating=true
|
||||
;;
|
||||
*)
|
||||
# unknown option
|
||||
;;
|
||||
@ -48,13 +37,15 @@ do
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
echo "Using scale_factor" $scale_factor
|
||||
# Prepare the folder structure.
|
||||
dataset_folder_prefix="neo4j_csv_dataset"
|
||||
dataset_folder="${script_dir}/${dataset_folder_prefix}_scale_${scale_factor}"
|
||||
mkdir -p ${dataset_folder}
|
||||
dataset_dir="${script_dir}/datasets/scale_${scale_factor}"
|
||||
if [ -d ${dataset_dir} ]; then
|
||||
rm -rf ${dataset_dir}
|
||||
fi
|
||||
mkdir -p ${dataset_dir}
|
||||
|
||||
# Define scale factor.
|
||||
echo "Using scale_factor" $scale_factor
|
||||
cat > ${script_dir}/ldbc_snb_datagen/params.ini <<EOF
|
||||
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.${scale_factor}
|
||||
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
|
||||
@ -62,83 +53,84 @@ ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.
|
||||
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
|
||||
EOF
|
||||
|
||||
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
|
||||
if [[ ${skip_generating} = false ]]; then
|
||||
# Generate the dataset.
|
||||
rm -rf ${dataset_folder}/*
|
||||
cd ${ldbc_snb_datagen_folder}
|
||||
export HADOOP_OPTS="$HADOOP_OPTS -Xmx20G"
|
||||
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
|
||||
# Find installed Java binary.
|
||||
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
|
||||
export JAVA_HOME=/usr/lib/jvm/default-java/jre
|
||||
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
|
||||
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
|
||||
export JAVA_HOME=/usr/lib/jvm/default-runtime/
|
||||
else
|
||||
else
|
||||
echo "Unable to find JRE under /usr/lib/jvm"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using JAVA_HOME" $JAVA_HOME
|
||||
HADOOP_HOME=/usr/local/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh || exit 1
|
||||
|
||||
# Transform the dataset into Neo4j CSV format.
|
||||
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
|
||||
mvn exec:java \
|
||||
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
|
||||
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_folder}" || exit 1
|
||||
fi
|
||||
echo "Using JAVA_HOME" $JAVA_HOME
|
||||
|
||||
rm -rf ${dataset_folder}/social_network
|
||||
cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_folder}/social_network
|
||||
# Remove old generated dataset.
|
||||
rm -rf ${ldbc_snb_datagen_folder}/social_network ${ldbc_snb_datagen_folder}/substitution_parameters
|
||||
|
||||
rm -rf ${dataset_folder}/substitution_parameters
|
||||
cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_folder}/substitution_parameters
|
||||
# Generate the dataset.
|
||||
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
|
||||
cd ${ldbc_snb_datagen_folder}
|
||||
# Poorly documented hadoop heapsize flag (unit is 'm'), see: hadoop/libexec/hadoop-config.sh
|
||||
# https://stackoverflow.com/questions/15609909/error-java-heap-space
|
||||
export HADOOP_HEAPSIZE=8192
|
||||
HADOOP_HOME=${script_dir}/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh
|
||||
|
||||
# Copy generated dataset.
|
||||
cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/
|
||||
cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_dir}/
|
||||
|
||||
# Transform the dataset into Neo4j CSV format.
|
||||
mkdir -p ${dataset_dir}/csv
|
||||
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
|
||||
mvn exec:java \
|
||||
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
|
||||
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/csv"
|
||||
|
||||
csv_dataset="
|
||||
--nodes ${dataset_folder}/comment_0_0.csv \
|
||||
--nodes ${dataset_folder}/forum_0_0.csv \
|
||||
--nodes ${dataset_folder}/organisation_0_0.csv \
|
||||
--nodes ${dataset_folder}/person_0_0.csv \
|
||||
--nodes ${dataset_folder}/place_0_0.csv \
|
||||
--nodes ${dataset_folder}/post_0_0.csv \
|
||||
--nodes ${dataset_folder}/tag_0_0.csv \
|
||||
--nodes ${dataset_folder}/tagclass_0_0.csv \
|
||||
--relationships ${dataset_folder}/comment_hasCreator_person_0_0.csv \
|
||||
--relationships ${dataset_folder}/comment_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_folder}/comment_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_folder}/comment_replyOf_comment_0_0.csv \
|
||||
--relationships ${dataset_folder}/comment_replyOf_post_0_0.csv \
|
||||
--relationships ${dataset_folder}/forum_containerOf_post_0_0.csv \
|
||||
--relationships ${dataset_folder}/forum_hasMember_person_0_0.csv \
|
||||
--relationships ${dataset_folder}/forum_hasModerator_person_0_0.csv \
|
||||
--relationships ${dataset_folder}/forum_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_folder}/organisation_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_hasInterest_tag_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_knows_person_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_likes_comment_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_likes_post_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_studyAt_organisation_0_0.csv \
|
||||
--relationships ${dataset_folder}/person_workAt_organisation_0_0.csv \
|
||||
--relationships ${dataset_folder}/place_isPartOf_place_0_0.csv \
|
||||
--relationships ${dataset_folder}/post_hasCreator_person_0_0.csv \
|
||||
--relationships ${dataset_folder}/post_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_folder}/post_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_folder}/tag_hasType_tagclass_0_0.csv \
|
||||
--relationships ${dataset_folder}/tagclass_isSubclassOf_tagclass_0_0.csv"
|
||||
--nodes ${dataset_dir}/csv/comment_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/forum_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/organisation_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/person_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/place_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/post_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/tag_0_0.csv \
|
||||
--nodes ${dataset_dir}/csv/tagclass_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/comment_hasCreator_person_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/comment_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/comment_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/comment_replyOf_comment_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/comment_replyOf_post_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/forum_containerOf_post_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/forum_hasMember_person_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/forum_hasModerator_person_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/forum_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/organisation_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_hasInterest_tag_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_knows_person_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_likes_comment_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_likes_post_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_studyAt_organisation_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/person_workAt_organisation_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/place_isPartOf_place_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/post_hasCreator_person_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/post_hasTag_tag_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/post_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/tag_hasType_tagclass_0_0.csv \
|
||||
--relationships ${dataset_dir}/csv/tagclass_isSubclassOf_tagclass_0_0.csv"
|
||||
|
||||
# Convert to neo4j internal format.
|
||||
if [[ ! -d "${NEO4J_HOME}" ]]; then
|
||||
NEO4J_HOME="/usr/share/neo4j"
|
||||
fi
|
||||
echo "Using NEO4J_HOME" ${NEO4J_HOME}
|
||||
mkdir -p ${dataset_folder}/neo4j
|
||||
cd ${dataset_folder}/neo4j
|
||||
echo "Converting CSV dataset to '${dataset_folder}/neo4j/graph.db'"
|
||||
rm -rf graph.db
|
||||
${NEO4J_HOME}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
|
||||
neo4j_database_dir=${dataset_dir}/neo4j/databases
|
||||
mkdir -p ${neo4j_database_dir}
|
||||
cd ${neo4j_database_dir}
|
||||
echo "Converting CSV dataset to '${neo4j_database_dir}/graph.db'"
|
||||
${neo4j_dir}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
|
||||
|
||||
# Convert to memgraph internal format.
|
||||
echo "Using MEMGRAPH_HOME" ${memgraph_dir}
|
||||
mkdir -p ${dataset_folder}/memgraph
|
||||
cd ${dataset_folder}/memgraph
|
||||
echo "Converting CSV dataset to '${dataset_folder}/memgraph/graph.snapshot'"
|
||||
rm -rf graph.snapshot
|
||||
${memgraph_dir}/tools/csv_to_snapshot -o graph.snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
|
||||
memgraph_snapshot_dir=${dataset_dir}/memgraph/default
|
||||
mkdir -p ${memgraph_snapshot_dir}
|
||||
cd ${memgraph_snapshot_dir}
|
||||
echo "Converting CSV dataset to '${memgraph_snapshot_dir}/snapshot'"
|
||||
${base_dir}/tools/csv_to_snapshot -o snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
|
||||
|
||||
echo "Done!"
|
||||
|
18
tests/public_benchmark/ldbc/cleanup
Executable file
18
tests/public_benchmark/ldbc/cleanup
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
# go to script directory
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
cd ${script_dir}
|
||||
|
||||
# remove archives
|
||||
rm *.tar.gz *.tar 2>/dev/null
|
||||
|
||||
# remove logo
|
||||
rm plots/ldbc-logo.png 2>/dev/null
|
||||
|
||||
# remove directories
|
||||
for folder in maven hadoop ldbc_snb_datagen ldbc_driver ldbc-snb-impls ve3; do
|
||||
if [ -d "$folder" ]; then
|
||||
rm -rf $folder
|
||||
fi
|
||||
done
|
@ -13,13 +13,13 @@
|
||||
#dbms.directories.plugins=/var/lib/neo4j/plugins
|
||||
#dbms.directories.certificates=/var/lib/neo4j/certificates
|
||||
#dbms.directories.logs=/var/log/neo4j
|
||||
dbms.directories.lib=/usr/share/neo4j/lib
|
||||
#dbms.directories.lib=/usr/share/neo4j/lib
|
||||
#dbms.directories.run=/var/run/neo4j
|
||||
|
||||
# This setting constrains all `LOAD CSV` import files to be under the `import` directory. Remove or comment it out to
|
||||
# allow files to be loaded from anywhere in the filesystem; this introduces possible security problems. See the
|
||||
# `LOAD CSV` section of the manual for details.
|
||||
dbms.directories.import=/var/lib/neo4j/import
|
||||
#dbms.directories.import=/var/lib/neo4j/import
|
||||
|
||||
# Whether requests to Neo4j are authenticated.
|
||||
# To disable authentication, uncomment this line
|
||||
@ -32,8 +32,8 @@ dbms.security.auth_enabled=false
|
||||
# calculated based on available system resources.
|
||||
# Uncomment these lines to set specific initial and maximum
|
||||
# heap size.
|
||||
dbms.memory.heap.initial_size=2096m
|
||||
dbms.memory.heap.max_size=4192m
|
||||
#dbms.memory.heap.initial_size=2096m
|
||||
#dbms.memory.heap.max_size=4192m
|
||||
|
||||
# The amount of memory to use for mapping the store files, in bytes (or
|
||||
# kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g').
|
||||
@ -44,7 +44,7 @@ dbms.memory.heap.max_size=4192m
|
||||
# The default page cache memory assumes the machine is dedicated to running
|
||||
# Neo4j, and is heuristically set to 50% of RAM minus the max Java heap size.
|
||||
#dbms.memory.pagecache.size=10g
|
||||
dbms.query_cache_size=0
|
||||
#dbms.query_cache_size=0
|
||||
|
||||
#*****************************************************************
|
||||
# Network connector configuration
|
||||
@ -317,3 +317,6 @@ dbms.windows_service_name=neo4j
|
||||
# Other Neo4j system properties
|
||||
#********************************************************************
|
||||
dbms.jvm.additional=-Dunsupported.dbms.udc.source=debian
|
||||
|
||||
# Disable Neo4j usage data collection
|
||||
dbms.udc.enabled=false
|
17
tests/public_benchmark/ldbc/continuous_integration
Normal file
17
tests/public_benchmark/ldbc/continuous_integration
Normal file
@ -0,0 +1,17 @@
|
||||
# script used to run LDBC benchmarks on Apollo
|
||||
|
||||
# setup dependencies
|
||||
TIMEOUT=1200 ./setup
|
||||
|
||||
# build dataset
|
||||
TIMEOUT=3600 ./build_dataset
|
||||
|
||||
# run read benchmarks
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
||||
|
||||
# run update benchmarks
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
@ -1,44 +0,0 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>net.ellitron.ldbcsnbimpls</groupId>
|
||||
<artifactId>ldbc-snb-impls</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>0.1.0</version>
|
||||
<name>LDBC SNB Workload Implementations</name>
|
||||
<url>https://github.com/ellitron/ldbc-snb-impls</url>
|
||||
<description>
|
||||
A collection of workload implementations for the LDBC SNB benchmark driver
|
||||
(see https://github.com/ldbc/ldbc_driver).
|
||||
</description>
|
||||
<modules>
|
||||
<module>snb-interactive-core</module>
|
||||
<!--<module>snb-interactive-tools</module>-->
|
||||
<module>snb-interactive-neo4j</module>
|
||||
<!--<module>snb-interactive-titan</module>-->
|
||||
<!--<module>snb-interactive-torc</module>-->
|
||||
</modules>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.ldbc.driver</groupId>
|
||||
<artifactId>jeeves</artifactId>
|
||||
<version>0.3-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||
</descriptorRefs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<properties>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
@ -1,2 +0,0 @@
|
||||
*
|
||||
!.gitignore
|
@ -1,131 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
function print_help () {
|
||||
echo "Usage: $0 [OPTION]"
|
||||
echo "Optional arguments:"
|
||||
echo -e " -h|--help -> Prints help."
|
||||
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
|
||||
echo -e " --transform-dataset -> Run just transform dataset (SNB -> Memgraph Snapshot)."
|
||||
echo -e " --copy-dataset -> Just copy dataset into the Memgraph snapshots path."
|
||||
echo -e " --run -> Just run Memgraph."
|
||||
}
|
||||
|
||||
set -e
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
memgraph_dir="${script_dir}/../../.."
|
||||
# TODO: pass as an argument
|
||||
memgraph_build_dir="${memgraph_dir}/build"
|
||||
loadable_snapshot_path="${memgraph_build_dir}/snapshots/default/3000_01_01__01_01_01_00000"
|
||||
|
||||
# Read the arguments.
|
||||
scale_factor=1
|
||||
run_all=true
|
||||
transform_dataset=false
|
||||
copy_dataset=false
|
||||
run_memgraph=false
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
print_help
|
||||
exit 1
|
||||
;;
|
||||
--scale-factor)
|
||||
scale_factor=$2
|
||||
shift
|
||||
;;
|
||||
--transform-dataset)
|
||||
run_all=false
|
||||
transform_dataset=true
|
||||
;;
|
||||
--copy-dataset)
|
||||
run_all=false
|
||||
copy_dataset=true
|
||||
;;
|
||||
--run)
|
||||
run_all=false
|
||||
run_memgraph=true
|
||||
;;
|
||||
*)
|
||||
# unknown option
|
||||
;;
|
||||
esac
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
snapshot_path="${script_dir}/memgraph_snapshots/snb_scale_${scale_factor}.snapshot"
|
||||
csv_folder="${script_dir}/neo4j_csv_dataset_scale_${scale_factor}"
|
||||
|
||||
# Transform dataset into MemGraph Snapshot.
|
||||
if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
|
||||
rm ${snapshot_path}
|
||||
${memgraph_dir}/tools/csv_to_snapshot -o ${snapshot_path} \
|
||||
--nodes ${csv_folder}/comment_0_0.csv \
|
||||
--nodes ${csv_folder}/forum_0_0.csv \
|
||||
--nodes ${csv_folder}/organisation_0_0.csv \
|
||||
--nodes ${csv_folder}/person_0_0.csv \
|
||||
--nodes ${csv_folder}/place_0_0.csv \
|
||||
--nodes ${csv_folder}/post_0_0.csv \
|
||||
--nodes ${csv_folder}/tag_0_0.csv \
|
||||
--nodes ${csv_folder}/tagclass_0_0.csv \
|
||||
--relationships ${csv_folder}/comment_hasCreator_person_0_0.csv \
|
||||
--relationships ${csv_folder}/comment_hasTag_tag_0_0.csv \
|
||||
--relationships ${csv_folder}/comment_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${csv_folder}/comment_replyOf_comment_0_0.csv \
|
||||
--relationships ${csv_folder}/comment_replyOf_post_0_0.csv \
|
||||
--relationships ${csv_folder}/forum_containerOf_post_0_0.csv \
|
||||
--relationships ${csv_folder}/forum_hasMember_person_0_0.csv \
|
||||
--relationships ${csv_folder}/forum_hasModerator_person_0_0.csv \
|
||||
--relationships ${csv_folder}/forum_hasTag_tag_0_0.csv \
|
||||
--relationships ${csv_folder}/organisation_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${csv_folder}/person_hasInterest_tag_0_0.csv \
|
||||
--relationships ${csv_folder}/person_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${csv_folder}/person_knows_person_0_0.csv \
|
||||
--relationships ${csv_folder}/person_likes_comment_0_0.csv \
|
||||
--relationships ${csv_folder}/person_likes_post_0_0.csv \
|
||||
--relationships ${csv_folder}/person_studyAt_organisation_0_0.csv \
|
||||
--relationships ${csv_folder}/person_workAt_organisation_0_0.csv \
|
||||
--relationships ${csv_folder}/place_isPartOf_place_0_0.csv \
|
||||
--relationships ${csv_folder}/post_hasCreator_person_0_0.csv \
|
||||
--relationships ${csv_folder}/post_hasTag_tag_0_0.csv \
|
||||
--relationships ${csv_folder}/post_isLocatedIn_place_0_0.csv \
|
||||
--relationships ${csv_folder}/tag_hasType_tagclass_0_0.csv \
|
||||
--relationships ${csv_folder}/tagclass_isSubclassOf_tagclass_0_0.csv \
|
||||
--csv-delimiter "|" --array-delimiter ";"
|
||||
echo "Dataset transformed."
|
||||
fi
|
||||
|
||||
# Copy the dataset.
|
||||
if [[ ${run_all} = true ]] || [[ ${copy_dataset} = true ]] ; then
|
||||
cp ${snapshot_path} ${loadable_snapshot_path}
|
||||
echo "Dataset copied."
|
||||
fi
|
||||
|
||||
# Run MemGraph.
|
||||
if [[ ${run_all} = true ]] || [[ ${run_memgraph} = true ]] ; then
|
||||
${memgraph_build_dir}/memgraph -flagfile ${memgraph_dir}/config/public_benchmark_ldbc.conf 2>&1 &
|
||||
memgraph_pid=$!
|
||||
sleep 200 # TODO: replace this with something that is going to work in all cases
|
||||
# not just in SNB scale 1 case
|
||||
|
||||
# Create indexes.
|
||||
cd ${script_dir}
|
||||
if [ ! -d "ve3" ]; then
|
||||
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
|
||||
source ve3/bin/activate
|
||||
pip install -r ${script_dir}/requirements_3.txt
|
||||
fi
|
||||
source ve3/bin/activate
|
||||
python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
|
||||
|
||||
# On Ctrl-C stop Memgraph.
|
||||
trap ctrl_c INT
|
||||
function ctrl_c() {
|
||||
kill -9 ${memgraph_pid}
|
||||
exit 0
|
||||
}
|
||||
while true; do
|
||||
sleep 1
|
||||
done
|
||||
fi
|
@ -1,95 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
function print_help () {
|
||||
echo "Usage: $0 [OPTION]"
|
||||
echo "Optional arguments:"
|
||||
echo -e " -h|--help -> Prints help."
|
||||
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
|
||||
echo -e " --transform-dataset -> Run just transform dataset (SNB -> Neo4j CSV)."
|
||||
echo -e " --load-dataset -> Just load dataset into Neo4j."
|
||||
echo -e " --run -> Just run Neo4j."
|
||||
}
|
||||
|
||||
set -e
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
# Read the arguments.
|
||||
scale_factor=1
|
||||
run_all=true
|
||||
transform_dataset=false
|
||||
load_dataset=false
|
||||
run_neo=false
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
print_help
|
||||
exit 1
|
||||
;;
|
||||
--scale-factor)
|
||||
scale_factor=$2
|
||||
shift
|
||||
;;
|
||||
--transform-dataset)
|
||||
run_all=false
|
||||
transform_dataset=true
|
||||
;;
|
||||
--load-dataset)
|
||||
run_all=false
|
||||
load_dataset=true
|
||||
;;
|
||||
--run)
|
||||
run_all=false
|
||||
run_neo=true
|
||||
;;
|
||||
*)
|
||||
# unknown option
|
||||
;;
|
||||
esac
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
dataset_folder=${script_dir}/neo4j_csv_dataset_scale_${scale_factor}
|
||||
|
||||
# Transform the dataset into files on disk.
|
||||
if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
|
||||
cd ${dataset_folder}
|
||||
chmod +x import.sh
|
||||
NEO4J_HOME=/usr/share/neo4j ./import.sh
|
||||
fi
|
||||
|
||||
# Load the dataset into Neo4j.
|
||||
if [[ ${run_all} = true ]] || [[ ${load_dataset} = true ]] ; then
|
||||
neo4j_data=${script_dir}/neo4j_home/data
|
||||
neo4j_graphdb=${neo4j_data}/databases/graph.db
|
||||
mkdir -p ${neo4j_graphdb}
|
||||
rm -rf ${neo4j_graphdb}/*
|
||||
cp -r ${dataset_folder}/graph.db/* ${neo4j_graphdb}/
|
||||
fi
|
||||
|
||||
# Run Neo4j.
|
||||
if [[ ${run_all} = true ]] || [[ ${run_neo} = true ]] ; then
|
||||
NEO4J_HOME=${script_dir}/neo4j_home NEO4J_CONF=${script_dir}/neo4j_config /usr/share/neo4j/bin/neo4j console 2>&1 &
|
||||
neo_pid=$!
|
||||
sleep 5
|
||||
|
||||
# Create indexes.
|
||||
cd ${script_dir}
|
||||
if [ ! -d "ve3" ]; then
|
||||
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
|
||||
source ve3/bin/activate
|
||||
pip install -r ${script_dir}/requirements_3.txt
|
||||
fi
|
||||
source ve3/bin/activate
|
||||
python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
|
||||
|
||||
# On Ctrl-C stop Neo4j.
|
||||
trap ctrl_c INT
|
||||
function ctrl_c() {
|
||||
kill -9 ${neo_pid}
|
||||
exit 0
|
||||
}
|
||||
while true; do
|
||||
sleep 1
|
||||
done
|
||||
fi
|
@ -1,2 +0,0 @@
|
||||
*
|
||||
!.gitignore
|
3
tests/public_benchmark/ldbc/requirements.txt
Normal file
3
tests/public_benchmark/ldbc/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
neo4j-driver==1.4.0
|
||||
matplotlib==2.0.2
|
||||
numpy==1.13.1
|
@ -1,10 +0,0 @@
|
||||
cycler==0.10.0
|
||||
functools32==3.2.3.post2
|
||||
matplotlib==2.0.2
|
||||
numpy==1.13.1
|
||||
pkg-resources==0.0.0
|
||||
pyparsing==2.2.0
|
||||
python-dateutil==2.6.1
|
||||
pytz==2017.2
|
||||
six==1.10.0
|
||||
subprocess32==3.2.7
|
@ -1,2 +0,0 @@
|
||||
neo4j-driver==1.4.0
|
||||
pkg-resources==0.0.0
|
@ -1,81 +1,230 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Run the LDBC SNB interactive workload / benchmark.
|
||||
# The benchmark is executed with:
|
||||
# * ldbc_driver -> workload executor
|
||||
# * ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
|
||||
'''
|
||||
Run the LDBC SNB interactive workload / benchmark.
|
||||
The benchmark is executed with:
|
||||
* ldbc_driver -> workload executor
|
||||
* ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
|
||||
'''
|
||||
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
function print_help () {
|
||||
echo "Usage: $0 [OPTION]"
|
||||
echo "Optional arguments:"
|
||||
echo -e " -h|--help -> Prints help."
|
||||
echo -e " --host -> Database host."
|
||||
echo -e " --port -> Database port."
|
||||
echo -e " --time-compression-ratio |"
|
||||
echo -e " --operation-count | -> https://github.com/ldbc/ldbc_driver/wiki/Driver-Configuration"
|
||||
echo -e " --thread-count |"
|
||||
echo -e " --result-file-prefix -> Result file prefix."
|
||||
echo -e " --properties-file -> Properties file used to select queries"
|
||||
}
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", ".."))
|
||||
|
||||
# Default parameters.
|
||||
host=127.0.0.1
|
||||
port=7687
|
||||
time_compression_ratio=0.01
|
||||
operation_count=200
|
||||
thread_count=8
|
||||
result_file_prefix="undefined"
|
||||
properties_file="${script_dir}/ldbc-snb-impls-short-reads.properties"
|
||||
|
||||
# Read the arguments.
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
print_help
|
||||
exit 1
|
||||
;;
|
||||
--host)
|
||||
host=$2
|
||||
shift
|
||||
;;
|
||||
--port)
|
||||
port=$2
|
||||
shift
|
||||
;;
|
||||
--result-file-prefix)
|
||||
result_file_prefix=$2
|
||||
shift
|
||||
;;
|
||||
--properties-file)
|
||||
properties_file=$2
|
||||
shift
|
||||
;;
|
||||
--time-compression-ratio)
|
||||
time_compression_ratio=$2
|
||||
shift
|
||||
;;
|
||||
--operation-count)
|
||||
operation_count=$2
|
||||
shift
|
||||
;;
|
||||
--thread-count)
|
||||
thread_count=$2
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
# unknown option
|
||||
;;
|
||||
esac
|
||||
shift # past argument or value
|
||||
done
|
||||
def wait_for_server(port, delay=1.0):
|
||||
cmd = ["nc", "-z", "-w", "1", "127.0.0.1", str(port)]
|
||||
while subprocess.call(cmd) != 0:
|
||||
time.sleep(0.5)
|
||||
time.sleep(delay)
|
||||
|
||||
cd ${script_dir}/ldbc-snb-impls
|
||||
mvn clean compile assembly:single
|
||||
|
||||
cd ${script_dir}/ldbc_driver
|
||||
java -cp target/jeeves-0.3-SNAPSHOT.jar:${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/target/snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar com.ldbc.driver.Client -P ${script_dir}/ldbc_driver/configuration/ldbc_driver_default.properties -P ${properties_file} -p ldbc.snb.interactive.updates_dir ${script_dir}/ldbc_snb_datagen/social_network -p host ${host} -p port ${port} -db net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb -p ldbc.snb.interactive.parameters_dir ${script_dir}/ldbc_snb_datagen/substitution_parameters --time_compression_ratio ${time_compression_ratio} --operation_count ${operation_count} --thread_count ${thread_count}
|
||||
class Memgraph:
|
||||
def __init__(self, dataset, port, num_workers):
|
||||
self.proc = None
|
||||
self.dataset = dataset
|
||||
self.port = str(port)
|
||||
self.num_workers = str(num_workers)
|
||||
|
||||
cp ${script_dir}/ldbc_driver/results/LDBC-results.json ${script_dir}/results/${result_file_prefix}-LDBC-results.json
|
||||
def start(self):
|
||||
# find executable path
|
||||
binary = os.path.join(BASE_DIR, "build", "memgraph")
|
||||
if not os.path.exists(binary):
|
||||
binary = os.path.join(BASE_DIR, "build_release", "memgraph")
|
||||
|
||||
# database args
|
||||
database_args = [binary, "--num-workers", self.num_workers,
|
||||
"--snapshot-directory", os.path.join(self.dataset,
|
||||
"memgraph"),
|
||||
"--recover-on-startup", "true",
|
||||
"--port", self.port]
|
||||
|
||||
# database env
|
||||
env = {"MEMGRAPH_CONFIG": os.path.join(SCRIPT_DIR, "config",
|
||||
"memgraph.conf")}
|
||||
|
||||
# start memgraph
|
||||
self.proc = subprocess.Popen(database_args, env=env)
|
||||
wait_for_server(self.port)
|
||||
|
||||
def stop(self):
|
||||
self.proc.terminate()
|
||||
if self.proc.wait() != 0:
|
||||
raise Exception("Database exited with non-zero exit code!")
|
||||
|
||||
|
||||
class Neo:
|
||||
def __init__(self, dataset, port):
|
||||
self.proc = None
|
||||
self.dataset = dataset
|
||||
self.port = str(port)
|
||||
self.http_port = str(int(port) + 7474)
|
||||
self.home_dir = None
|
||||
|
||||
def start(self):
|
||||
# create home directory
|
||||
self.home_dir = tempfile.mkdtemp(dir="/dev/shm")
|
||||
|
||||
neo4j_dir = os.path.join(BASE_DIR, "libs", "neo4j")
|
||||
|
||||
try:
|
||||
os.symlink(os.path.join(neo4j_dir, "lib"),
|
||||
os.path.join(self.home_dir, "lib"))
|
||||
os.symlink(os.path.join(self.dataset, "neo4j"),
|
||||
os.path.join(self.home_dir, "data"))
|
||||
conf_dir = os.path.join(self.home_dir, "conf")
|
||||
conf_file = os.path.join(conf_dir, "neo4j.conf")
|
||||
os.mkdir(conf_dir)
|
||||
shutil.copyfile(os.path.join(SCRIPT_DIR, "config", "neo4j.conf"),
|
||||
conf_file)
|
||||
with open(conf_file, "a") as f:
|
||||
f.write("\ndbms.connector.bolt.listen_address=:" +
|
||||
self.port + "\n")
|
||||
f.write("\ndbms.connector.http.listen_address=:" +
|
||||
self.http_port + "\n")
|
||||
|
||||
# environment
|
||||
env = {"NEO4J_HOME": self.home_dir}
|
||||
|
||||
self.proc = subprocess.Popen([os.path.join(neo4j_dir, "bin",
|
||||
"neo4j"),
|
||||
"console"], env=env, cwd=neo4j_dir)
|
||||
except:
|
||||
shutil.rmtree(self.home_dir)
|
||||
raise Exception("Couldn't run Neo4j!")
|
||||
|
||||
wait_for_server(self.http_port, 2.0)
|
||||
|
||||
def stop(self):
|
||||
self.proc.terminate()
|
||||
ret = self.proc.wait()
|
||||
if os.path.exists(self.home_dir):
|
||||
shutil.rmtree(self.home_dir)
|
||||
if ret != 0:
|
||||
raise Exception("Database exited with non-zero exit code!")
|
||||
|
||||
|
||||
def parse_args():
|
||||
argp = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
argp.add_argument('--scale', type=int, default=1,
|
||||
help='Dataset scale to use for benchmarking.')
|
||||
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
|
||||
argp.add_argument('--port', default='7687', help='Database port.')
|
||||
argp.add_argument('--time-compression-ratio', type=float, default=0.01,
|
||||
help='Compress/stretch durations between operation start '
|
||||
'times to increase/decrease benchmark load. '
|
||||
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
|
||||
'benchmark 10x faster. Default is 0.01.')
|
||||
argp.add_argument('--operation-count', type=int, default=200,
|
||||
help='Number of operations to generate during benchmark '
|
||||
'execution.')
|
||||
argp.add_argument('--thread-count', type=int, default=8,
|
||||
help='Thread pool size to use for executing operation '
|
||||
'handlers.')
|
||||
argp.add_argument('--time-unit', default='microseconds',
|
||||
choices=('nanoseconds', 'microseconds', 'milliseconds',
|
||||
'seconds', 'minutes'),
|
||||
help='Time unit to use for measuring performance metrics')
|
||||
argp.add_argument('--result-file-prefix', default='',
|
||||
help='Result file name prefix')
|
||||
argp.add_argument('--properties-file',
|
||||
default=os.path.join(
|
||||
SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
|
||||
help='Properties file used to select queries')
|
||||
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
|
||||
help='Run the database before starting LDBC')
|
||||
argp.add_argument('--create-index', action='store_true', default=False,
|
||||
help='Create index in the running database.')
|
||||
return argp.parse_args()
|
||||
|
||||
|
||||
LDBC_INTERACTIVE_NEO4J = \
|
||||
os.path.join(SCRIPT_DIR,
|
||||
'ldbc-snb-impls', 'snb-interactive-neo4j', 'target',
|
||||
'snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar')
|
||||
LDBC_DEFAULT_PROPERTIES = \
|
||||
os.path.join(SCRIPT_DIR, 'ldbc_driver', 'configuration',
|
||||
'ldbc_driver_default.properties')
|
||||
|
||||
|
||||
def create_index(port):
|
||||
index_file = os.path.join(SCRIPT_DIR, 'ldbc-snb-impls',
|
||||
'snb-interactive-neo4j', 'scripts', 'indexCreation.neo4j')
|
||||
subprocess.check_call(('ve3/bin/python3', 'index_creation.py', index_file, port),
|
||||
cwd=SCRIPT_DIR)
|
||||
time.sleep(1.0)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
dataset = os.path.join(SCRIPT_DIR, "datasets", "scale_" + str(args.scale))
|
||||
|
||||
db = None
|
||||
if args.run_db:
|
||||
if args.host != "127.0.0.1":
|
||||
raise Exception("Host parameter must point to localhost when "
|
||||
"this script starts the database!")
|
||||
if args.run_db.lower() == 'memgraph':
|
||||
db = Memgraph(dataset, args.port, args.thread_count)
|
||||
elif args.run_db.lower() == 'neo4j':
|
||||
db = Neo(dataset, args.port)
|
||||
|
||||
try:
|
||||
if db:
|
||||
db.start()
|
||||
if args.create_index:
|
||||
create_index(args.port)
|
||||
|
||||
# Run LDBC driver.
|
||||
cp = 'target/jeeves-0.3-SNAPSHOT.jar:{}'.format(LDBC_INTERACTIVE_NEO4J)
|
||||
updates_dir = os.path.join(dataset, 'social_network')
|
||||
parameters_dir = os.path.join(dataset, 'substitution_parameters')
|
||||
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
|
||||
'-P', LDBC_DEFAULT_PROPERTIES,
|
||||
'-P', os.path.join(os.getcwd(), args.properties_file),
|
||||
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
|
||||
'-p', 'host', args.host, '-p', 'port', args.port,
|
||||
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
|
||||
'-p', 'ldbc.snb.interactive.parameters_dir', parameters_dir,
|
||||
'--time_compression_ratio', str(args.time_compression_ratio),
|
||||
'--operation_count', str(args.operation_count),
|
||||
'--thread_count', str(args.thread_count),
|
||||
'--time_unit', args.time_unit.upper())
|
||||
subprocess.check_call(java_cmd, cwd=os.path.join(SCRIPT_DIR, 'ldbc_driver'))
|
||||
|
||||
# Copy the results to results dir.
|
||||
ldbc_results = os.path.join(SCRIPT_DIR, 'ldbc_driver', 'results',
|
||||
'LDBC-results.json')
|
||||
results_dir = os.path.join(SCRIPT_DIR, 'results')
|
||||
results_name = []
|
||||
if args.result_file_prefix:
|
||||
results_name.append(args.result_file_prefix)
|
||||
if args.run_db:
|
||||
results_name.append(args.run_db)
|
||||
else:
|
||||
results_name.append("external")
|
||||
results_name.append("scale_" + str(args.scale))
|
||||
results_name = "-".join(results_name + ["LDBC", "results.json"])
|
||||
results_copy = os.path.join(results_dir, results_name)
|
||||
shutil.copyfile(ldbc_results, results_copy)
|
||||
|
||||
print("Results saved to:", results_copy)
|
||||
|
||||
finally:
|
||||
if db:
|
||||
db.stop()
|
||||
|
||||
print("Done!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
82
tests/public_benchmark/ldbc/setup
Executable file
82
tests/public_benchmark/ldbc/setup
Executable file
@ -0,0 +1,82 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
# Cleanup
|
||||
cd ${script_dir}
|
||||
./cleanup
|
||||
|
||||
# Find remote endpoints
|
||||
if [ "$USER" == "apollo" ]; then
|
||||
deps_http_url="http://89.201.166.70:46744"
|
||||
deps_git_url="git://89.201.166.70:46745"
|
||||
deps_pypi_url="http://89.201.166.70:46746/root/pypi"
|
||||
deps_pypi_trusted="89.201.166.70"
|
||||
else
|
||||
deps_http_url="http://deps.memgraph.io"
|
||||
deps_git_url="git://deps.memgraph.io"
|
||||
deps_pypi_url="http://deps.memgraph.io:3141/root/pypi"
|
||||
deps_pypi_trusted="deps.memgraph.io"
|
||||
fi
|
||||
|
||||
# Download Maven
|
||||
cd ${script_dir}
|
||||
wget -nv $deps_http_url/ldbc/apache-maven-3.5.0-bin.tar.gz -O maven.tar.gz
|
||||
tar -xzf maven.tar.gz
|
||||
mv apache-maven-3.5.0 maven
|
||||
rm maven.tar.gz
|
||||
cd maven
|
||||
sed -r "s@</settings>@<localRepository>${script_dir}/maven/.m2</localRepository>\n</settings>@g" -i conf/settings.xml
|
||||
wget -nv $deps_http_url/ldbc/maven-cache.tar.gz
|
||||
tar -xzf maven-cache.tar.gz
|
||||
rm maven-cache.tar.gz
|
||||
mvn=${script_dir}/maven/bin/mvn
|
||||
|
||||
# Download Hadoop
|
||||
cd ${script_dir}
|
||||
wget -nv $deps_http_url/ldbc/hadoop-2.7.4.tar.gz -O hadoop.tar.gz
|
||||
tar -xzf hadoop.tar.gz
|
||||
mv hadoop-2.7.4 hadoop
|
||||
rm hadoop.tar.gz
|
||||
# Edit Java Xmx settings because Hadoop has stupid default run scripts and they duplicate the -Xmx flag
|
||||
# https://stackoverflow.com/questions/2740725/duplicated-java-runtime-options-what-is-the-order-of-preference
|
||||
# "Depends on the JVM, perhaps the version...perhaps even how many paper clips you have on your desk at the time. It might not even work. Don't do that."
|
||||
sed -r "s@-Xmx512m @@g" -i hadoop/etc/hadoop/hadoop-env.sh
|
||||
|
||||
# Download LDBC logo
|
||||
cd ${script_dir}/plots
|
||||
wget -nv $deps_http_url/ldbc/ldbc-logo.png
|
||||
|
||||
# Setup ldbc_snb_datagen
|
||||
cd ${script_dir}
|
||||
#git clone https://github.com/ldbc/ldbc_snb_datagen.git
|
||||
git clone $deps_git_url/ldbc_snb_datagen.git
|
||||
cd ldbc_snb_datagen
|
||||
git checkout 46ccf9340c20d8cfde0e7e11c9297a4061117bd3
|
||||
sed -r "s@#!/bin/bash@#!/bin/bash -e@g" -i run.sh
|
||||
|
||||
# Setup ldbc_driver
|
||||
cd ${script_dir}
|
||||
#git clone https://github.com/ldbc/ldbc_driver.git
|
||||
git clone $deps_git_url/ldbc_driver.git
|
||||
cd ldbc_driver
|
||||
git checkout 1bb441394c3cd3e23d4df5a87689b9d1c5e6f48f
|
||||
$mvn clean package -DskipTests
|
||||
$mvn install -DskipTests
|
||||
|
||||
# Setup ldbc-snb-impls
|
||||
cd ${script_dir}
|
||||
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
|
||||
git clone $deps_git_url/ldbc-snb-impls.git
|
||||
cd ldbc-snb-impls
|
||||
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
|
||||
$mvn install
|
||||
$mvn clean compile assembly:single
|
||||
|
||||
# Setup Python3 Virtualenv
|
||||
cd ${script_dir}
|
||||
virtualenv -p python3 ve3
|
||||
source ve3/bin/activate
|
||||
pip install -i $deps_pypi_url \
|
||||
--trusted-host $deps_pypi_trusted -r requirements.txt
|
||||
deactivate
|
@ -1,50 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Setup all dependencies
|
||||
|
||||
function command_fail {
|
||||
echo $1
|
||||
exit 1
|
||||
}
|
||||
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
# Setup ldbc_snb_datagen
|
||||
cd ${script_dir}
|
||||
git clone https://github.com/ldbc/ldbc_snb_datagen
|
||||
|
||||
# Setup ldbc_driver
|
||||
cd ${script_dir}
|
||||
git clone https://github.com/ldbc/ldbc_driver.git
|
||||
cd ${script_dir}/ldbc_driver
|
||||
mvn clean package -DskipTests || exit 1
|
||||
mvn install -DskipTests || exit 1
|
||||
|
||||
# Setup ldbc-snb-impls
|
||||
cd ${script_dir}
|
||||
git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
|
||||
cp ${script_dir}/ldbc-snb-impls-pom.xml ${script_dir}/ldbc-snb-impls/pom.xml
|
||||
cd ${script_dir}/ldbc-snb-impls
|
||||
mvn install || exit 1
|
||||
|
||||
# Use set -e after we have called git clone, to avoid exiting if we already
|
||||
# cloned something.
|
||||
set -e
|
||||
|
||||
# Setup python virtual environment & Install dependencies
|
||||
cd ${script_dir}
|
||||
if ! which virtualenv > /dev/null 2>&1; then
|
||||
command_fail "Please install virtualenv!"
|
||||
fi
|
||||
if [ ! -d "ve3" ]; then
|
||||
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
|
||||
fi
|
||||
source ve3/bin/activate
|
||||
pip install -r ${script_dir}/requirements_3.txt
|
||||
deactivate
|
||||
if [ ! -d "ve2" ]; then
|
||||
virtualenv -p python2 ve2 || command_fail "Virtualenv setup failed."
|
||||
fi
|
||||
source ve2/bin/activate
|
||||
pip install -r ${script_dir}/requirements_2.txt
|
||||
deactivate
|
@ -1,37 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# System setup (root access is required)
|
||||
|
||||
# Working directories
|
||||
set -e
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
mkdir -p ${script_dir}/tmp
|
||||
|
||||
# Install OS packages
|
||||
cd ${script_dir}/tmp
|
||||
if which apt-get > /dev/null 2>&1; then
|
||||
wget -O - http://debian.neo4j.org/neotechnology.gpg.key >> key.pgp || exit 1
|
||||
sudo apt-key add key.pgp || exit 1
|
||||
sudo echo 'deb http://debian.neo4j.org/repo stable/' | tee -a /etc/apt/sources.list.d/neo4j.list > /dev/null || exit 1
|
||||
sudo apt-get update || exit 1
|
||||
sudo apt-get install -y maven default-jdk neo4j || exit 1
|
||||
else
|
||||
echo "Assuming that 'maven', 'jdk' and 'neo4j' are installed"
|
||||
fi
|
||||
|
||||
# Install Hadoop
|
||||
cd ${script_dir}/tmp
|
||||
hadoop_version="hadoop-2.7.3"
|
||||
hadoop_tar="${hadoop_version}.tar.gz"
|
||||
hadoop_url="http://apache.mirrors.tds.net/hadoop/common/${hadoop_version}/${hadoop_tar}"
|
||||
wget ${hadoop_url}
|
||||
tar -xzf ${hadoop_tar}
|
||||
# TODO: root access is required here -> run hadoop under a current user
|
||||
echo "Moving hadoop to /usr/local/hadoop"
|
||||
sudo mv ${hadoop_version} /usr/local/hadoop
|
||||
|
||||
# Performance Setup
|
||||
# echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >/dev/null
|
||||
|
||||
# Cleanup
|
||||
rm -rf ${script_dir}/tmp
|
@ -260,6 +260,23 @@ if mode == "release":
|
||||
RUNS.append(generate_run("stress_large", commands = cmd, infile = infile,
|
||||
slave_group = "remote_16c56g"))
|
||||
|
||||
# public_benchmark/ldbc tests
|
||||
if mode == "release":
|
||||
ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc")
|
||||
neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j")
|
||||
csv_to_snapshot_path = os.path.join(BASE_DIR, "tools", "csv_to_snapshot")
|
||||
plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency")
|
||||
infile = create_archive("ldbc", [binary_release_path, ldbc_path,
|
||||
binary_release_link_path, neo4j_path, config_path,
|
||||
csv_to_snapshot_path, plot_ldbc_latency_path],
|
||||
cwd = WORKSPACE_DIR)
|
||||
cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n"
|
||||
outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \
|
||||
"\./memgraph/tests/public_benchmark/ldbc/plots/.+\n"
|
||||
RUNS.append(generate_run("public_benchmark__ldbc", commands = cmd,
|
||||
infile = infile, outfile_paths = outfile_paths,
|
||||
slave_group = "remote_20c140g", enable_network = True))
|
||||
|
||||
# store ARCHIVES and RUNS
|
||||
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
|
||||
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)
|
||||
|
@ -8,38 +8,51 @@ Latency Barchart (Based on LDBC JSON output).
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
from argparse import ArgumentParser
|
||||
import string
|
||||
|
||||
import matplotlib
|
||||
# Must set 'Agg' backend before importing pyplot
|
||||
# This is so the script works on headless machines (without X11)
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.cbook import get_sample_data
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
COLORS = {
|
||||
'memgraph': '#ff7300',
|
||||
'neo4j': '#008cc2'
|
||||
COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors!
|
||||
LDBC_TIME_FACTORS = {
|
||||
"SECONDS": 1.0,
|
||||
"MILLISECONDS": 1000.0,
|
||||
"MICROSECONDS": 1000000.0,
|
||||
"NANOSECONDS": 1000000000.0
|
||||
}
|
||||
TIME_FACTORS = {
|
||||
"s": 1.0,
|
||||
"ms": 1000,
|
||||
"us": 1000000,
|
||||
"ns": 1000000000,
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
argp = ArgumentParser(description=__doc__)
|
||||
argp.add_argument("--vendor-references", nargs="+",
|
||||
help="Short references that represent all the "
|
||||
"vendors that are going to be "
|
||||
"visualized on the plot.")
|
||||
argp.add_argument("--vendor-titles", nargs="+",
|
||||
default=["Memgraph", "Market leader"],
|
||||
help="Vender titles that are going to appear "
|
||||
"on the plot, e.g. legend titles.")
|
||||
argp.add_argument("--plot-title", default="{{Plot title placeholder}}",
|
||||
argp.add_argument("--plot-title", default="",
|
||||
help="Plot title.")
|
||||
argp.add_argument("--logo-path", default=None,
|
||||
help="Path to the logo that is going to be presented"
|
||||
" instead of title.")
|
||||
argp.add_argument("--results-dir",
|
||||
default=os.path.join(SCRIPT_DIR,
|
||||
"../tests/public_benchmark"
|
||||
"/ldbc/results"),
|
||||
argp.add_argument("--results", nargs="+", required=True,
|
||||
help="Path to the folder with result files in format "
|
||||
"{{vendor-reference}}-LDBC-results.json")
|
||||
argp.add_argument("--time-unit", choices=("s", "ms", "us", "ns"),
|
||||
default="ms", help="The time unit that should be used.")
|
||||
argp.add_argument("--output", default="",
|
||||
help="Save plot to file (instead of displaying it).")
|
||||
argp.add_argument("--max-label-width", default=11, type=int,
|
||||
help="Maximum length of the x-axis labels (-1 is unlimited)")
|
||||
return argp.parse_args()
|
||||
@ -62,33 +75,43 @@ def main():
|
||||
args = parse_args()
|
||||
|
||||
# Prepare the datastructure.
|
||||
vendors = {}
|
||||
for vendor_reference, vendor_title in zip(args.vendor_references,
|
||||
args.vendor_titles):
|
||||
vendors[vendor_reference] = {}
|
||||
vendors[vendor_reference]['title'] = vendor_title
|
||||
vendors[vendor_reference]['results_path'] = os.path.join(
|
||||
args.results_dir, "%s-LDBC-results.json" % vendor_reference)
|
||||
vendors[vendor_reference]['color'] = COLORS[vendor_reference]
|
||||
vendors[vendor_reference]['latencies'] = []
|
||||
vendors[vendor_reference]['query_names'] = []
|
||||
vendors = []
|
||||
for i, results_file, vendor_title in zip(range(len(args.results)),
|
||||
args.results,
|
||||
args.vendor_titles):
|
||||
vendor = {}
|
||||
vendor['title'] = vendor_title
|
||||
vendor['results_file'] = results_file
|
||||
vendor['color'] = COLORS[i]
|
||||
vendor['results'] = []
|
||||
vendors.append(vendor)
|
||||
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
|
||||
|
||||
# Collect the benchmark data.
|
||||
print("LDBC Latency Data")
|
||||
for vendor_reference, vendor_data in vendors.items():
|
||||
print("Vendor: %s" % vendor_reference)
|
||||
with open(vendor_data['results_path']) as results_file:
|
||||
for vendor in vendors:
|
||||
with open(vendor['results_file']) as results_file:
|
||||
results_data = json.load(results_file)
|
||||
for query_data in results_data["all_metrics"]:
|
||||
mean_runtime = query_data["run_time"]["mean"]
|
||||
mean_runtime = (query_data["run_time"]["mean"] /
|
||||
LDBC_TIME_FACTORS[results_data["unit"]] *
|
||||
TIME_FACTORS[args.time_unit])
|
||||
query_name = query_data['name']
|
||||
print("%s -> %sms" % (query_name, str(mean_runtime)))
|
||||
vendor_data['latencies'].append(mean_runtime)
|
||||
vendor_data['query_names'].append(query_name)
|
||||
vendor['results'].append((query_name, mean_runtime))
|
||||
|
||||
# Sort results.
|
||||
for vendor in vendors:
|
||||
vendor['results'].sort(key=lambda item: int("".join(filter(
|
||||
lambda x: x in string.digits, item[0]))))
|
||||
|
||||
# Print results.
|
||||
for vendor in vendors:
|
||||
print("Vendor:", vendor['title'])
|
||||
for query_name, latency in vendor['results']:
|
||||
print("{} -> {:.3f}{}".format(query_name, latency, args.time_unit))
|
||||
|
||||
# Consistency check.
|
||||
all_query_names = [tuple(vd['query_names']) for vd in vendors.values()]
|
||||
all_query_names = [tuple(res[0] for res in vd['results']) for vd in vendors]
|
||||
assert len(set(all_query_names)) == 1, \
|
||||
"Queries between different vendors are different!"
|
||||
query_names = all_query_names[0]
|
||||
@ -97,7 +120,8 @@ def main():
|
||||
ind = np.arange(len(query_names)) # the x locations for the groups
|
||||
width = 0.40 # the width of the bars
|
||||
fig, ax = plt.subplots() # figure setup
|
||||
ax.set_ylabel('Mean Latency (ms)') # YAxis title
|
||||
fig.set_size_inches(1920 / 96, 1080 / 96) # set figure size
|
||||
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
|
||||
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
|
||||
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
|
||||
|
||||
@ -121,26 +145,33 @@ def main():
|
||||
line.set_linestyle('--')
|
||||
ax.set_axisbelow(True) # put the grid below all other elements
|
||||
plt.grid(True) # show grid
|
||||
# Set plot title
|
||||
ax.set_title(args.plot_title)
|
||||
# Draw logo or plot title
|
||||
if args.logo_path is None:
|
||||
ax.set_title(args.plot_title)
|
||||
else:
|
||||
if args.logo_path != None:
|
||||
# TODO: improve the logo positioning
|
||||
im = plt.imread(get_sample_data(args.logo_path))
|
||||
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
|
||||
args.logo_path)))
|
||||
plt.gcf().subplots_adjust(top=0.85)
|
||||
newax = fig.add_axes([0.4, 0.75, 0.2, 0.25], anchor='N')
|
||||
newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
|
||||
newax.imshow(im)
|
||||
newax.axis('off')
|
||||
# Draw bars
|
||||
for index, vendor_data in enumerate(vendors.values()):
|
||||
rects = ax.bar(ind + index * width, vendor_data['latencies'], width,
|
||||
color=vendor_data['color'])
|
||||
vendor_data['rects'] = rects
|
||||
for index, vendor in enumerate(vendors):
|
||||
latencies = [res[1] for res in vendor['results']]
|
||||
rects = ax.bar(ind + index * width, latencies, width,
|
||||
color=vendor['color'])
|
||||
vendor['rects'] = rects
|
||||
autolabel(ax, rects)
|
||||
rects = [vd['rects'][0] for vd in vendors.values()]
|
||||
titles = [vd['title'] for vd in vendors.values()]
|
||||
rects = [vd['rects'][0] for vd in vendors]
|
||||
titles = [vd['title'] for vd in vendors]
|
||||
ax.legend(rects, titles) # Draw the legend.
|
||||
plt.show()
|
||||
|
||||
if args.output == "":
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(args.output, dpi=96)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
Reference in New Issue
Block a user