LDBC: Integrate with Apollo

Summary:
Run neo4j and memgraph from run_benchmark script.
This makes mg and neo scripts obsolete.

Reviewers: buda, teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D806
This commit is contained in:
Matej Ferencevic 2017-09-21 20:01:10 +02:00
parent f2a82f4f58
commit 686dc452ee
21 changed files with 534 additions and 596 deletions

View File

@ -1,8 +1,8 @@
datasets/
ldbc_snb_datagen/
ldbc_driver/
ldbc-snb-impls/
neo4j_csv_dataset_scale_*/
tmp/
ve3/
ve2/
maven/
hadoop/
*.out

View File

@ -3,16 +3,15 @@
## How to run the benchmark against Neo4j OR Memgraph?
cd memgraph/tests/public_benchmark/ldbc
./setup_system
./setup_dependencies
./setup_dataset [--scale-factor 1]
./neo [--run] OR ./mg [--run]
./setup
./build_dataset [--scale-factor 1]
# To run short reads by default, just call:
./run_benchmark
# To run update queries use the following.
./run_benchmark --properties-file ldbc-snb-impls-updates.properties
# You may need to increase the time compression when updating:
./run_benchmark --time-compresion-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
./run_benchmark --create-index --run-db memgraph # or neo4j
# To run update queries pass the properties file for updates and slow down
# the execution by setting a larger time compression ratio.
./run_benchmark --create-index --run-db memgraph \
--properties-file ldbc-snb-impls-updates.properties \
--time-compression-ratio 1.5
## How to run a specific test?

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/bash -e
# Generate SNB dataset.
@ -7,18 +7,18 @@ function print_help () {
echo "Optional arguments:"
echo -e " -h|--help -> Prints help."
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
echo -e " --neo4j-home Neo4j home directory, overrides NEO4J_HOME"
echo -e " --memgraph-home Memgraph home directory."
echo -e " --skip-generating Only transform generated dataset"
}
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
base_dir="${script_dir}/../../.."
neo4j_dir="${base_dir}/libs/neo4j"
# Add Maven to PATH
export PATH=$PATH:${script_dir}/maven/bin
# Read the arguments.
scale_factor=1
skip_generating=false
memgraph_dir="${script_dir}/../../.."
while [[ $# -gt 0 ]]
do
case $1 in
@ -30,17 +30,6 @@ do
scale_factor=$2
shift
;;
--neo4j-home)
NEO4J_HOME=$2
shift
;;
--memgraph-home)
memgraph_dir=$2
shift
;;
--skip-generating)
skip_generating=true
;;
*)
# unknown option
;;
@ -48,13 +37,15 @@ do
shift # past argument or value
done
echo "Using scale_factor" $scale_factor
# Prepare the folder structure.
dataset_folder_prefix="neo4j_csv_dataset"
dataset_folder="${script_dir}/${dataset_folder_prefix}_scale_${scale_factor}"
mkdir -p ${dataset_folder}
dataset_dir="${script_dir}/datasets/scale_${scale_factor}"
if [ -d ${dataset_dir} ]; then
rm -rf ${dataset_dir}
fi
mkdir -p ${dataset_dir}
# Define scale factor.
echo "Using scale_factor" $scale_factor
cat > ${script_dir}/ldbc_snb_datagen/params.ini <<EOF
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.${scale_factor}
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
@ -62,83 +53,84 @@ ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
EOF
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
if [[ ${skip_generating} = false ]]; then
# Generate the dataset.
rm -rf ${dataset_folder}/*
cd ${ldbc_snb_datagen_folder}
export HADOOP_OPTS="$HADOOP_OPTS -Xmx20G"
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
# Find installed Java binary.
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
export JAVA_HOME=/usr/lib/jvm/default-java/jre
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
export JAVA_HOME=/usr/lib/jvm/default-runtime/
else
else
echo "Unable to find JRE under /usr/lib/jvm"
exit 1
fi
echo "Using JAVA_HOME" $JAVA_HOME
HADOOP_HOME=/usr/local/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh || exit 1
# Transform the dataset into Neo4j CSV format.
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
mvn exec:java \
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_folder}" || exit 1
fi
echo "Using JAVA_HOME" $JAVA_HOME
rm -rf ${dataset_folder}/social_network
cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_folder}/social_network
# Remove old generated dataset.
rm -rf ${ldbc_snb_datagen_folder}/social_network ${ldbc_snb_datagen_folder}/substitution_parameters
rm -rf ${dataset_folder}/substitution_parameters
cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_folder}/substitution_parameters
# Generate the dataset.
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
cd ${ldbc_snb_datagen_folder}
# Poorly documented hadoop heapsize flag (unit is 'm'), see: hadoop/libexec/hadoop-config.sh
# https://stackoverflow.com/questions/15609909/error-java-heap-space
export HADOOP_HEAPSIZE=8192
HADOOP_HOME=${script_dir}/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh
# Copy generated dataset.
cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/
cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_dir}/
# Transform the dataset into Neo4j CSV format.
mkdir -p ${dataset_dir}/csv
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
mvn exec:java \
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/csv"
csv_dataset="
--nodes ${dataset_folder}/comment_0_0.csv \
--nodes ${dataset_folder}/forum_0_0.csv \
--nodes ${dataset_folder}/organisation_0_0.csv \
--nodes ${dataset_folder}/person_0_0.csv \
--nodes ${dataset_folder}/place_0_0.csv \
--nodes ${dataset_folder}/post_0_0.csv \
--nodes ${dataset_folder}/tag_0_0.csv \
--nodes ${dataset_folder}/tagclass_0_0.csv \
--relationships ${dataset_folder}/comment_hasCreator_person_0_0.csv \
--relationships ${dataset_folder}/comment_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/comment_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/comment_replyOf_comment_0_0.csv \
--relationships ${dataset_folder}/comment_replyOf_post_0_0.csv \
--relationships ${dataset_folder}/forum_containerOf_post_0_0.csv \
--relationships ${dataset_folder}/forum_hasMember_person_0_0.csv \
--relationships ${dataset_folder}/forum_hasModerator_person_0_0.csv \
--relationships ${dataset_folder}/forum_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/organisation_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/person_hasInterest_tag_0_0.csv \
--relationships ${dataset_folder}/person_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/person_knows_person_0_0.csv \
--relationships ${dataset_folder}/person_likes_comment_0_0.csv \
--relationships ${dataset_folder}/person_likes_post_0_0.csv \
--relationships ${dataset_folder}/person_studyAt_organisation_0_0.csv \
--relationships ${dataset_folder}/person_workAt_organisation_0_0.csv \
--relationships ${dataset_folder}/place_isPartOf_place_0_0.csv \
--relationships ${dataset_folder}/post_hasCreator_person_0_0.csv \
--relationships ${dataset_folder}/post_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/post_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/tag_hasType_tagclass_0_0.csv \
--relationships ${dataset_folder}/tagclass_isSubclassOf_tagclass_0_0.csv"
--nodes ${dataset_dir}/csv/comment_0_0.csv \
--nodes ${dataset_dir}/csv/forum_0_0.csv \
--nodes ${dataset_dir}/csv/organisation_0_0.csv \
--nodes ${dataset_dir}/csv/person_0_0.csv \
--nodes ${dataset_dir}/csv/place_0_0.csv \
--nodes ${dataset_dir}/csv/post_0_0.csv \
--nodes ${dataset_dir}/csv/tag_0_0.csv \
--nodes ${dataset_dir}/csv/tagclass_0_0.csv \
--relationships ${dataset_dir}/csv/comment_hasCreator_person_0_0.csv \
--relationships ${dataset_dir}/csv/comment_hasTag_tag_0_0.csv \
--relationships ${dataset_dir}/csv/comment_isLocatedIn_place_0_0.csv \
--relationships ${dataset_dir}/csv/comment_replyOf_comment_0_0.csv \
--relationships ${dataset_dir}/csv/comment_replyOf_post_0_0.csv \
--relationships ${dataset_dir}/csv/forum_containerOf_post_0_0.csv \
--relationships ${dataset_dir}/csv/forum_hasMember_person_0_0.csv \
--relationships ${dataset_dir}/csv/forum_hasModerator_person_0_0.csv \
--relationships ${dataset_dir}/csv/forum_hasTag_tag_0_0.csv \
--relationships ${dataset_dir}/csv/organisation_isLocatedIn_place_0_0.csv \
--relationships ${dataset_dir}/csv/person_hasInterest_tag_0_0.csv \
--relationships ${dataset_dir}/csv/person_isLocatedIn_place_0_0.csv \
--relationships ${dataset_dir}/csv/person_knows_person_0_0.csv \
--relationships ${dataset_dir}/csv/person_likes_comment_0_0.csv \
--relationships ${dataset_dir}/csv/person_likes_post_0_0.csv \
--relationships ${dataset_dir}/csv/person_studyAt_organisation_0_0.csv \
--relationships ${dataset_dir}/csv/person_workAt_organisation_0_0.csv \
--relationships ${dataset_dir}/csv/place_isPartOf_place_0_0.csv \
--relationships ${dataset_dir}/csv/post_hasCreator_person_0_0.csv \
--relationships ${dataset_dir}/csv/post_hasTag_tag_0_0.csv \
--relationships ${dataset_dir}/csv/post_isLocatedIn_place_0_0.csv \
--relationships ${dataset_dir}/csv/tag_hasType_tagclass_0_0.csv \
--relationships ${dataset_dir}/csv/tagclass_isSubclassOf_tagclass_0_0.csv"
# Convert to neo4j internal format.
if [[ ! -d "${NEO4J_HOME}" ]]; then
NEO4J_HOME="/usr/share/neo4j"
fi
echo "Using NEO4J_HOME" ${NEO4J_HOME}
mkdir -p ${dataset_folder}/neo4j
cd ${dataset_folder}/neo4j
echo "Converting CSV dataset to '${dataset_folder}/neo4j/graph.db'"
rm -rf graph.db
${NEO4J_HOME}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
neo4j_database_dir=${dataset_dir}/neo4j/databases
mkdir -p ${neo4j_database_dir}
cd ${neo4j_database_dir}
echo "Converting CSV dataset to '${neo4j_database_dir}/graph.db'"
${neo4j_dir}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
# Convert to memgraph internal format.
echo "Using MEMGRAPH_HOME" ${memgraph_dir}
mkdir -p ${dataset_folder}/memgraph
cd ${dataset_folder}/memgraph
echo "Converting CSV dataset to '${dataset_folder}/memgraph/graph.snapshot'"
rm -rf graph.snapshot
${memgraph_dir}/tools/csv_to_snapshot -o graph.snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
memgraph_snapshot_dir=${dataset_dir}/memgraph/default
mkdir -p ${memgraph_snapshot_dir}
cd ${memgraph_snapshot_dir}
echo "Converting CSV dataset to '${memgraph_snapshot_dir}/snapshot'"
${base_dir}/tools/csv_to_snapshot -o snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
echo "Done!"

View File

@ -0,0 +1,18 @@
#!/bin/bash
# go to script directory
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd ${script_dir}
# remove archives
rm *.tar.gz *.tar 2>/dev/null
# remove logo
rm plots/ldbc-logo.png 2>/dev/null
# remove directories
for folder in maven hadoop ldbc_snb_datagen ldbc_driver ldbc-snb-impls ve3; do
if [ -d "$folder" ]; then
rm -rf $folder
fi
done

View File

@ -13,13 +13,13 @@
#dbms.directories.plugins=/var/lib/neo4j/plugins
#dbms.directories.certificates=/var/lib/neo4j/certificates
#dbms.directories.logs=/var/log/neo4j
dbms.directories.lib=/usr/share/neo4j/lib
#dbms.directories.lib=/usr/share/neo4j/lib
#dbms.directories.run=/var/run/neo4j
# This setting constrains all `LOAD CSV` import files to be under the `import` directory. Remove or comment it out to
# allow files to be loaded from anywhere in the filesystem; this introduces possible security problems. See the
# `LOAD CSV` section of the manual for details.
dbms.directories.import=/var/lib/neo4j/import
#dbms.directories.import=/var/lib/neo4j/import
# Whether requests to Neo4j are authenticated.
# To disable authentication, uncomment this line
@ -32,8 +32,8 @@ dbms.security.auth_enabled=false
# calculated based on available system resources.
# Uncomment these lines to set specific initial and maximum
# heap size.
dbms.memory.heap.initial_size=2096m
dbms.memory.heap.max_size=4192m
#dbms.memory.heap.initial_size=2096m
#dbms.memory.heap.max_size=4192m
# The amount of memory to use for mapping the store files, in bytes (or
# kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g').
@ -44,7 +44,7 @@ dbms.memory.heap.max_size=4192m
# The default page cache memory assumes the machine is dedicated to running
# Neo4j, and is heuristically set to 50% of RAM minus the max Java heap size.
#dbms.memory.pagecache.size=10g
dbms.query_cache_size=0
#dbms.query_cache_size=0
#*****************************************************************
# Network connector configuration
@ -317,3 +317,6 @@ dbms.windows_service_name=neo4j
# Other Neo4j system properties
#********************************************************************
dbms.jvm.additional=-Dunsupported.dbms.udc.source=debian
# Disable Neo4j usage data collection
dbms.udc.enabled=false

View File

@ -0,0 +1,17 @@
# script used to run LDBC benchmarks on Apollo
# setup dependencies
TIMEOUT=1200 ./setup
# build dataset
TIMEOUT=3600 ./build_dataset
# run read benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
# run update benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png

View File

@ -1,44 +0,0 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>net.ellitron.ldbcsnbimpls</groupId>
<artifactId>ldbc-snb-impls</artifactId>
<packaging>pom</packaging>
<version>0.1.0</version>
<name>LDBC SNB Workload Implementations</name>
<url>https://github.com/ellitron/ldbc-snb-impls</url>
<description>
A collection of workload implementations for the LDBC SNB benchmark driver
(see https://github.com/ldbc/ldbc_driver).
</description>
<modules>
<module>snb-interactive-core</module>
<!--<module>snb-interactive-tools</module>-->
<module>snb-interactive-neo4j</module>
<!--<module>snb-interactive-titan</module>-->
<!--<module>snb-interactive-torc</module>-->
</modules>
<dependencies>
<dependency>
<groupId>com.ldbc.driver</groupId>
<artifactId>jeeves</artifactId>
<version>0.3-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>

View File

@ -1,2 +0,0 @@
*
!.gitignore

View File

@ -1,131 +0,0 @@
#!/bin/bash
function print_help () {
echo "Usage: $0 [OPTION]"
echo "Optional arguments:"
echo -e " -h|--help -> Prints help."
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
echo -e " --transform-dataset -> Run just transform dataset (SNB -> Memgraph Snapshot)."
echo -e " --copy-dataset -> Just copy dataset into the Memgraph snapshots path."
echo -e " --run -> Just run Memgraph."
}
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
memgraph_dir="${script_dir}/../../.."
# TODO: pass as an argument
memgraph_build_dir="${memgraph_dir}/build"
loadable_snapshot_path="${memgraph_build_dir}/snapshots/default/3000_01_01__01_01_01_00000"
# Read the arguments.
scale_factor=1
run_all=true
transform_dataset=false
copy_dataset=false
run_memgraph=false
while [[ $# -gt 0 ]]
do
case $1 in
-h|--help)
print_help
exit 1
;;
--scale-factor)
scale_factor=$2
shift
;;
--transform-dataset)
run_all=false
transform_dataset=true
;;
--copy-dataset)
run_all=false
copy_dataset=true
;;
--run)
run_all=false
run_memgraph=true
;;
*)
# unknown option
;;
esac
shift # past argument or value
done
snapshot_path="${script_dir}/memgraph_snapshots/snb_scale_${scale_factor}.snapshot"
csv_folder="${script_dir}/neo4j_csv_dataset_scale_${scale_factor}"
# Transform dataset into MemGraph Snapshot.
if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
rm ${snapshot_path}
${memgraph_dir}/tools/csv_to_snapshot -o ${snapshot_path} \
--nodes ${csv_folder}/comment_0_0.csv \
--nodes ${csv_folder}/forum_0_0.csv \
--nodes ${csv_folder}/organisation_0_0.csv \
--nodes ${csv_folder}/person_0_0.csv \
--nodes ${csv_folder}/place_0_0.csv \
--nodes ${csv_folder}/post_0_0.csv \
--nodes ${csv_folder}/tag_0_0.csv \
--nodes ${csv_folder}/tagclass_0_0.csv \
--relationships ${csv_folder}/comment_hasCreator_person_0_0.csv \
--relationships ${csv_folder}/comment_hasTag_tag_0_0.csv \
--relationships ${csv_folder}/comment_isLocatedIn_place_0_0.csv \
--relationships ${csv_folder}/comment_replyOf_comment_0_0.csv \
--relationships ${csv_folder}/comment_replyOf_post_0_0.csv \
--relationships ${csv_folder}/forum_containerOf_post_0_0.csv \
--relationships ${csv_folder}/forum_hasMember_person_0_0.csv \
--relationships ${csv_folder}/forum_hasModerator_person_0_0.csv \
--relationships ${csv_folder}/forum_hasTag_tag_0_0.csv \
--relationships ${csv_folder}/organisation_isLocatedIn_place_0_0.csv \
--relationships ${csv_folder}/person_hasInterest_tag_0_0.csv \
--relationships ${csv_folder}/person_isLocatedIn_place_0_0.csv \
--relationships ${csv_folder}/person_knows_person_0_0.csv \
--relationships ${csv_folder}/person_likes_comment_0_0.csv \
--relationships ${csv_folder}/person_likes_post_0_0.csv \
--relationships ${csv_folder}/person_studyAt_organisation_0_0.csv \
--relationships ${csv_folder}/person_workAt_organisation_0_0.csv \
--relationships ${csv_folder}/place_isPartOf_place_0_0.csv \
--relationships ${csv_folder}/post_hasCreator_person_0_0.csv \
--relationships ${csv_folder}/post_hasTag_tag_0_0.csv \
--relationships ${csv_folder}/post_isLocatedIn_place_0_0.csv \
--relationships ${csv_folder}/tag_hasType_tagclass_0_0.csv \
--relationships ${csv_folder}/tagclass_isSubclassOf_tagclass_0_0.csv \
--csv-delimiter "|" --array-delimiter ";"
echo "Dataset transformed."
fi
# Copy the dataset.
if [[ ${run_all} = true ]] || [[ ${copy_dataset} = true ]] ; then
cp ${snapshot_path} ${loadable_snapshot_path}
echo "Dataset copied."
fi
# Run MemGraph.
if [[ ${run_all} = true ]] || [[ ${run_memgraph} = true ]] ; then
${memgraph_build_dir}/memgraph -flagfile ${memgraph_dir}/config/public_benchmark_ldbc.conf 2>&1 &
memgraph_pid=$!
sleep 200 # TODO: replace this with something that is going to work in all cases
# not just in SNB scale 1 case
# Create indexes.
cd ${script_dir}
if [ ! -d "ve3" ]; then
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
source ve3/bin/activate
pip install -r ${script_dir}/requirements_3.txt
fi
source ve3/bin/activate
python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
# On Ctrl-C stop Memgraph.
trap ctrl_c INT
function ctrl_c() {
kill -9 ${memgraph_pid}
exit 0
}
while true; do
sleep 1
done
fi

View File

@ -1,95 +0,0 @@
#!/bin/bash
function print_help () {
echo "Usage: $0 [OPTION]"
echo "Optional arguments:"
echo -e " -h|--help -> Prints help."
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
echo -e " --transform-dataset -> Run just transform dataset (SNB -> Neo4j CSV)."
echo -e " --load-dataset -> Just load dataset into Neo4j."
echo -e " --run -> Just run Neo4j."
}
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Read the arguments.
scale_factor=1
run_all=true
transform_dataset=false
load_dataset=false
run_neo=false
while [[ $# -gt 0 ]]
do
case $1 in
-h|--help)
print_help
exit 1
;;
--scale-factor)
scale_factor=$2
shift
;;
--transform-dataset)
run_all=false
transform_dataset=true
;;
--load-dataset)
run_all=false
load_dataset=true
;;
--run)
run_all=false
run_neo=true
;;
*)
# unknown option
;;
esac
shift # past argument or value
done
dataset_folder=${script_dir}/neo4j_csv_dataset_scale_${scale_factor}
# Transform the dataset into files on disk.
if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
cd ${dataset_folder}
chmod +x import.sh
NEO4J_HOME=/usr/share/neo4j ./import.sh
fi
# Load the dataset into Neo4j.
if [[ ${run_all} = true ]] || [[ ${load_dataset} = true ]] ; then
neo4j_data=${script_dir}/neo4j_home/data
neo4j_graphdb=${neo4j_data}/databases/graph.db
mkdir -p ${neo4j_graphdb}
rm -rf ${neo4j_graphdb}/*
cp -r ${dataset_folder}/graph.db/* ${neo4j_graphdb}/
fi
# Run Neo4j.
if [[ ${run_all} = true ]] || [[ ${run_neo} = true ]] ; then
NEO4J_HOME=${script_dir}/neo4j_home NEO4J_CONF=${script_dir}/neo4j_config /usr/share/neo4j/bin/neo4j console 2>&1 &
neo_pid=$!
sleep 5
# Create indexes.
cd ${script_dir}
if [ ! -d "ve3" ]; then
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
source ve3/bin/activate
pip install -r ${script_dir}/requirements_3.txt
fi
source ve3/bin/activate
python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
# On Ctrl-C stop Neo4j.
trap ctrl_c INT
function ctrl_c() {
kill -9 ${neo_pid}
exit 0
}
while true; do
sleep 1
done
fi

View File

@ -1,2 +0,0 @@
*
!.gitignore

View File

@ -0,0 +1,3 @@
neo4j-driver==1.4.0
matplotlib==2.0.2
numpy==1.13.1

View File

@ -1,10 +0,0 @@
cycler==0.10.0
functools32==3.2.3.post2
matplotlib==2.0.2
numpy==1.13.1
pkg-resources==0.0.0
pyparsing==2.2.0
python-dateutil==2.6.1
pytz==2017.2
six==1.10.0
subprocess32==3.2.7

View File

@ -1,2 +0,0 @@
neo4j-driver==1.4.0
pkg-resources==0.0.0

View File

@ -1,81 +1,230 @@
#!/bin/bash
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Run the LDBC SNB interactive workload / benchmark.
# The benchmark is executed with:
# * ldbc_driver -> workload executor
# * ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
'''
Run the LDBC SNB interactive workload / benchmark.
The benchmark is executed with:
* ldbc_driver -> workload executor
* ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
'''
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
import argparse
import os
import shutil
import subprocess
import sys
import tempfile
import time
function print_help () {
echo "Usage: $0 [OPTION]"
echo "Optional arguments:"
echo -e " -h|--help -> Prints help."
echo -e " --host -> Database host."
echo -e " --port -> Database port."
echo -e " --time-compression-ratio |"
echo -e " --operation-count | -> https://github.com/ldbc/ldbc_driver/wiki/Driver-Configuration"
echo -e " --thread-count |"
echo -e " --result-file-prefix -> Result file prefix."
echo -e " --properties-file -> Properties file used to select queries"
}
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", ".."))
# Default parameters.
host=127.0.0.1
port=7687
time_compression_ratio=0.01
operation_count=200
thread_count=8
result_file_prefix="undefined"
properties_file="${script_dir}/ldbc-snb-impls-short-reads.properties"
# Read the arguments.
while [[ $# -gt 0 ]]
do
case $1 in
-h|--help)
print_help
exit 1
;;
--host)
host=$2
shift
;;
--port)
port=$2
shift
;;
--result-file-prefix)
result_file_prefix=$2
shift
;;
--properties-file)
properties_file=$2
shift
;;
--time-compression-ratio)
time_compression_ratio=$2
shift
;;
--operation-count)
operation_count=$2
shift
;;
--thread-count)
thread_count=$2
shift
;;
*)
# unknown option
;;
esac
shift # past argument or value
done
def wait_for_server(port, delay=1.0):
cmd = ["nc", "-z", "-w", "1", "127.0.0.1", str(port)]
while subprocess.call(cmd) != 0:
time.sleep(0.5)
time.sleep(delay)
cd ${script_dir}/ldbc-snb-impls
mvn clean compile assembly:single
cd ${script_dir}/ldbc_driver
java -cp target/jeeves-0.3-SNAPSHOT.jar:${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/target/snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar com.ldbc.driver.Client -P ${script_dir}/ldbc_driver/configuration/ldbc_driver_default.properties -P ${properties_file} -p ldbc.snb.interactive.updates_dir ${script_dir}/ldbc_snb_datagen/social_network -p host ${host} -p port ${port} -db net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb -p ldbc.snb.interactive.parameters_dir ${script_dir}/ldbc_snb_datagen/substitution_parameters --time_compression_ratio ${time_compression_ratio} --operation_count ${operation_count} --thread_count ${thread_count}
class Memgraph:
def __init__(self, dataset, port, num_workers):
self.proc = None
self.dataset = dataset
self.port = str(port)
self.num_workers = str(num_workers)
cp ${script_dir}/ldbc_driver/results/LDBC-results.json ${script_dir}/results/${result_file_prefix}-LDBC-results.json
def start(self):
# find executable path
binary = os.path.join(BASE_DIR, "build", "memgraph")
if not os.path.exists(binary):
binary = os.path.join(BASE_DIR, "build_release", "memgraph")
# database args
database_args = [binary, "--num-workers", self.num_workers,
"--snapshot-directory", os.path.join(self.dataset,
"memgraph"),
"--recover-on-startup", "true",
"--port", self.port]
# database env
env = {"MEMGRAPH_CONFIG": os.path.join(SCRIPT_DIR, "config",
"memgraph.conf")}
# start memgraph
self.proc = subprocess.Popen(database_args, env=env)
wait_for_server(self.port)
def stop(self):
self.proc.terminate()
if self.proc.wait() != 0:
raise Exception("Database exited with non-zero exit code!")
class Neo:
def __init__(self, dataset, port):
self.proc = None
self.dataset = dataset
self.port = str(port)
self.http_port = str(int(port) + 7474)
self.home_dir = None
def start(self):
# create home directory
self.home_dir = tempfile.mkdtemp(dir="/dev/shm")
neo4j_dir = os.path.join(BASE_DIR, "libs", "neo4j")
try:
os.symlink(os.path.join(neo4j_dir, "lib"),
os.path.join(self.home_dir, "lib"))
os.symlink(os.path.join(self.dataset, "neo4j"),
os.path.join(self.home_dir, "data"))
conf_dir = os.path.join(self.home_dir, "conf")
conf_file = os.path.join(conf_dir, "neo4j.conf")
os.mkdir(conf_dir)
shutil.copyfile(os.path.join(SCRIPT_DIR, "config", "neo4j.conf"),
conf_file)
with open(conf_file, "a") as f:
f.write("\ndbms.connector.bolt.listen_address=:" +
self.port + "\n")
f.write("\ndbms.connector.http.listen_address=:" +
self.http_port + "\n")
# environment
env = {"NEO4J_HOME": self.home_dir}
self.proc = subprocess.Popen([os.path.join(neo4j_dir, "bin",
"neo4j"),
"console"], env=env, cwd=neo4j_dir)
except:
shutil.rmtree(self.home_dir)
raise Exception("Couldn't run Neo4j!")
wait_for_server(self.http_port, 2.0)
def stop(self):
self.proc.terminate()
ret = self.proc.wait()
if os.path.exists(self.home_dir):
shutil.rmtree(self.home_dir)
if ret != 0:
raise Exception("Database exited with non-zero exit code!")
def parse_args():
argp = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
argp.add_argument('--scale', type=int, default=1,
help='Dataset scale to use for benchmarking.')
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
argp.add_argument('--port', default='7687', help='Database port.')
argp.add_argument('--time-compression-ratio', type=float, default=0.01,
help='Compress/stretch durations between operation start '
'times to increase/decrease benchmark load. '
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
'benchmark 10x faster. Default is 0.01.')
argp.add_argument('--operation-count', type=int, default=200,
help='Number of operations to generate during benchmark '
'execution.')
argp.add_argument('--thread-count', type=int, default=8,
help='Thread pool size to use for executing operation '
'handlers.')
argp.add_argument('--time-unit', default='microseconds',
choices=('nanoseconds', 'microseconds', 'milliseconds',
'seconds', 'minutes'),
help='Time unit to use for measuring performance metrics')
argp.add_argument('--result-file-prefix', default='',
help='Result file name prefix')
argp.add_argument('--properties-file',
default=os.path.join(
SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
help='Properties file used to select queries')
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
help='Run the database before starting LDBC')
argp.add_argument('--create-index', action='store_true', default=False,
help='Create index in the running database.')
return argp.parse_args()
LDBC_INTERACTIVE_NEO4J = \
os.path.join(SCRIPT_DIR,
'ldbc-snb-impls', 'snb-interactive-neo4j', 'target',
'snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar')
LDBC_DEFAULT_PROPERTIES = \
os.path.join(SCRIPT_DIR, 'ldbc_driver', 'configuration',
'ldbc_driver_default.properties')
def create_index(port):
index_file = os.path.join(SCRIPT_DIR, 'ldbc-snb-impls',
'snb-interactive-neo4j', 'scripts', 'indexCreation.neo4j')
subprocess.check_call(('ve3/bin/python3', 'index_creation.py', index_file, port),
cwd=SCRIPT_DIR)
time.sleep(1.0)
def main():
args = parse_args()
dataset = os.path.join(SCRIPT_DIR, "datasets", "scale_" + str(args.scale))
db = None
if args.run_db:
if args.host != "127.0.0.1":
raise Exception("Host parameter must point to localhost when "
"this script starts the database!")
if args.run_db.lower() == 'memgraph':
db = Memgraph(dataset, args.port, args.thread_count)
elif args.run_db.lower() == 'neo4j':
db = Neo(dataset, args.port)
try:
if db:
db.start()
if args.create_index:
create_index(args.port)
# Run LDBC driver.
cp = 'target/jeeves-0.3-SNAPSHOT.jar:{}'.format(LDBC_INTERACTIVE_NEO4J)
updates_dir = os.path.join(dataset, 'social_network')
parameters_dir = os.path.join(dataset, 'substitution_parameters')
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
'-P', LDBC_DEFAULT_PROPERTIES,
'-P', os.path.join(os.getcwd(), args.properties_file),
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
'-p', 'host', args.host, '-p', 'port', args.port,
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
'-p', 'ldbc.snb.interactive.parameters_dir', parameters_dir,
'--time_compression_ratio', str(args.time_compression_ratio),
'--operation_count', str(args.operation_count),
'--thread_count', str(args.thread_count),
'--time_unit', args.time_unit.upper())
subprocess.check_call(java_cmd, cwd=os.path.join(SCRIPT_DIR, 'ldbc_driver'))
# Copy the results to results dir.
ldbc_results = os.path.join(SCRIPT_DIR, 'ldbc_driver', 'results',
'LDBC-results.json')
results_dir = os.path.join(SCRIPT_DIR, 'results')
results_name = []
if args.result_file_prefix:
results_name.append(args.result_file_prefix)
if args.run_db:
results_name.append(args.run_db)
else:
results_name.append("external")
results_name.append("scale_" + str(args.scale))
results_name = "-".join(results_name + ["LDBC", "results.json"])
results_copy = os.path.join(results_dir, results_name)
shutil.copyfile(ldbc_results, results_copy)
print("Results saved to:", results_copy)
finally:
if db:
db.stop()
print("Done!")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,82 @@
#!/bin/bash -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Cleanup
cd ${script_dir}
./cleanup
# Find remote endpoints
if [ "$USER" == "apollo" ]; then
deps_http_url="http://89.201.166.70:46744"
deps_git_url="git://89.201.166.70:46745"
deps_pypi_url="http://89.201.166.70:46746/root/pypi"
deps_pypi_trusted="89.201.166.70"
else
deps_http_url="http://deps.memgraph.io"
deps_git_url="git://deps.memgraph.io"
deps_pypi_url="http://deps.memgraph.io:3141/root/pypi"
deps_pypi_trusted="deps.memgraph.io"
fi
# Download Maven
cd ${script_dir}
wget -nv $deps_http_url/ldbc/apache-maven-3.5.0-bin.tar.gz -O maven.tar.gz
tar -xzf maven.tar.gz
mv apache-maven-3.5.0 maven
rm maven.tar.gz
cd maven
sed -r "s@</settings>@<localRepository>${script_dir}/maven/.m2</localRepository>\n</settings>@g" -i conf/settings.xml
wget -nv $deps_http_url/ldbc/maven-cache.tar.gz
tar -xzf maven-cache.tar.gz
rm maven-cache.tar.gz
mvn=${script_dir}/maven/bin/mvn
# Download Hadoop
cd ${script_dir}
wget -nv $deps_http_url/ldbc/hadoop-2.7.4.tar.gz -O hadoop.tar.gz
tar -xzf hadoop.tar.gz
mv hadoop-2.7.4 hadoop
rm hadoop.tar.gz
# Edit Java Xmx settings because Hadoop has stupid default run scripts and they duplicate the -Xmx flag
# https://stackoverflow.com/questions/2740725/duplicated-java-runtime-options-what-is-the-order-of-preference
# "Depends on the JVM, perhaps the version...perhaps even how many paper clips you have on your desk at the time. It might not even work. Don't do that."
sed -r "s@-Xmx512m @@g" -i hadoop/etc/hadoop/hadoop-env.sh
# Download LDBC logo
cd ${script_dir}/plots
wget -nv $deps_http_url/ldbc/ldbc-logo.png
# Setup ldbc_snb_datagen
cd ${script_dir}
#git clone https://github.com/ldbc/ldbc_snb_datagen.git
git clone $deps_git_url/ldbc_snb_datagen.git
cd ldbc_snb_datagen
git checkout 46ccf9340c20d8cfde0e7e11c9297a4061117bd3
sed -r "s@#!/bin/bash@#!/bin/bash -e@g" -i run.sh
# Setup ldbc_driver
cd ${script_dir}
#git clone https://github.com/ldbc/ldbc_driver.git
git clone $deps_git_url/ldbc_driver.git
cd ldbc_driver
git checkout 1bb441394c3cd3e23d4df5a87689b9d1c5e6f48f
$mvn clean package -DskipTests
$mvn install -DskipTests
# Setup ldbc-snb-impls
cd ${script_dir}
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
git clone $deps_git_url/ldbc-snb-impls.git
cd ldbc-snb-impls
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
$mvn install
$mvn clean compile assembly:single
# Setup Python3 Virtualenv
cd ${script_dir}
virtualenv -p python3 ve3
source ve3/bin/activate
pip install -i $deps_pypi_url \
--trusted-host $deps_pypi_trusted -r requirements.txt
deactivate

View File

@ -1,50 +0,0 @@
#!/bin/bash
# Setup all dependencies
function command_fail {
echo $1
exit 1
}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Setup ldbc_snb_datagen
cd ${script_dir}
git clone https://github.com/ldbc/ldbc_snb_datagen
# Setup ldbc_driver
cd ${script_dir}
git clone https://github.com/ldbc/ldbc_driver.git
cd ${script_dir}/ldbc_driver
mvn clean package -DskipTests || exit 1
mvn install -DskipTests || exit 1
# Setup ldbc-snb-impls
cd ${script_dir}
git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
cp ${script_dir}/ldbc-snb-impls-pom.xml ${script_dir}/ldbc-snb-impls/pom.xml
cd ${script_dir}/ldbc-snb-impls
mvn install || exit 1
# Use set -e after we have called git clone, to avoid exiting if we already
# cloned something.
set -e
# Setup python virtual environment & Install dependencies
cd ${script_dir}
if ! which virtualenv > /dev/null 2>&1; then
command_fail "Please install virtualenv!"
fi
if [ ! -d "ve3" ]; then
virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
fi
source ve3/bin/activate
pip install -r ${script_dir}/requirements_3.txt
deactivate
if [ ! -d "ve2" ]; then
virtualenv -p python2 ve2 || command_fail "Virtualenv setup failed."
fi
source ve2/bin/activate
pip install -r ${script_dir}/requirements_2.txt
deactivate

View File

@ -1,37 +0,0 @@
#!/bin/bash
# System setup (root access is required)
# Working directories
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
mkdir -p ${script_dir}/tmp
# Install OS packages
cd ${script_dir}/tmp
if which apt-get > /dev/null 2>&1; then
wget -O - http://debian.neo4j.org/neotechnology.gpg.key >> key.pgp || exit 1
sudo apt-key add key.pgp || exit 1
sudo echo 'deb http://debian.neo4j.org/repo stable/' | tee -a /etc/apt/sources.list.d/neo4j.list > /dev/null || exit 1
sudo apt-get update || exit 1
sudo apt-get install -y maven default-jdk neo4j || exit 1
else
echo "Assuming that 'maven', 'jdk' and 'neo4j' are installed"
fi
# Install Hadoop
cd ${script_dir}/tmp
hadoop_version="hadoop-2.7.3"
hadoop_tar="${hadoop_version}.tar.gz"
hadoop_url="http://apache.mirrors.tds.net/hadoop/common/${hadoop_version}/${hadoop_tar}"
wget ${hadoop_url}
tar -xzf ${hadoop_tar}
# TODO: root access is required here -> run hadoop under a current user
echo "Moving hadoop to /usr/local/hadoop"
sudo mv ${hadoop_version} /usr/local/hadoop
# Performance Setup
# echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >/dev/null
# Cleanup
rm -rf ${script_dir}/tmp

View File

@ -260,6 +260,23 @@ if mode == "release":
RUNS.append(generate_run("stress_large", commands = cmd, infile = infile,
slave_group = "remote_16c56g"))
# public_benchmark/ldbc tests
if mode == "release":
ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc")
neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j")
csv_to_snapshot_path = os.path.join(BASE_DIR, "tools", "csv_to_snapshot")
plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency")
infile = create_archive("ldbc", [binary_release_path, ldbc_path,
binary_release_link_path, neo4j_path, config_path,
csv_to_snapshot_path, plot_ldbc_latency_path],
cwd = WORKSPACE_DIR)
cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n"
outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \
"\./memgraph/tests/public_benchmark/ldbc/plots/.+\n"
RUNS.append(generate_run("public_benchmark__ldbc", commands = cmd,
infile = infile, outfile_paths = outfile_paths,
slave_group = "remote_20c140g", enable_network = True))
# store ARCHIVES and RUNS
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)

View File

@ -8,38 +8,51 @@ Latency Barchart (Based on LDBC JSON output).
import json
import os
import numpy as np
from argparse import ArgumentParser
import string
import matplotlib
# Must set 'Agg' backend before importing pyplot
# This is so the script works on headless machines (without X11)
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.cbook import get_sample_data
from argparse import ArgumentParser
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
COLORS = {
'memgraph': '#ff7300',
'neo4j': '#008cc2'
COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors!
LDBC_TIME_FACTORS = {
"SECONDS": 1.0,
"MILLISECONDS": 1000.0,
"MICROSECONDS": 1000000.0,
"NANOSECONDS": 1000000000.0
}
TIME_FACTORS = {
"s": 1.0,
"ms": 1000,
"us": 1000000,
"ns": 1000000000,
}
def parse_args():
argp = ArgumentParser(description=__doc__)
argp.add_argument("--vendor-references", nargs="+",
help="Short references that represent all the "
"vendors that are going to be "
"visualized on the plot.")
argp.add_argument("--vendor-titles", nargs="+",
default=["Memgraph", "Market leader"],
help="Vender titles that are going to appear "
"on the plot, e.g. legend titles.")
argp.add_argument("--plot-title", default="{{Plot title placeholder}}",
argp.add_argument("--plot-title", default="",
help="Plot title.")
argp.add_argument("--logo-path", default=None,
help="Path to the logo that is going to be presented"
" instead of title.")
argp.add_argument("--results-dir",
default=os.path.join(SCRIPT_DIR,
"../tests/public_benchmark"
"/ldbc/results"),
argp.add_argument("--results", nargs="+", required=True,
help="Path to the folder with result files in format "
"{{vendor-reference}}-LDBC-results.json")
argp.add_argument("--time-unit", choices=("s", "ms", "us", "ns"),
default="ms", help="The time unit that should be used.")
argp.add_argument("--output", default="",
help="Save plot to file (instead of displaying it).")
argp.add_argument("--max-label-width", default=11, type=int,
help="Maximum length of the x-axis labels (-1 is unlimited)")
return argp.parse_args()
@ -62,33 +75,43 @@ def main():
args = parse_args()
# Prepare the datastructure.
vendors = {}
for vendor_reference, vendor_title in zip(args.vendor_references,
args.vendor_titles):
vendors[vendor_reference] = {}
vendors[vendor_reference]['title'] = vendor_title
vendors[vendor_reference]['results_path'] = os.path.join(
args.results_dir, "%s-LDBC-results.json" % vendor_reference)
vendors[vendor_reference]['color'] = COLORS[vendor_reference]
vendors[vendor_reference]['latencies'] = []
vendors[vendor_reference]['query_names'] = []
vendors = []
for i, results_file, vendor_title in zip(range(len(args.results)),
args.results,
args.vendor_titles):
vendor = {}
vendor['title'] = vendor_title
vendor['results_file'] = results_file
vendor['color'] = COLORS[i]
vendor['results'] = []
vendors.append(vendor)
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
# Collect the benchmark data.
print("LDBC Latency Data")
for vendor_reference, vendor_data in vendors.items():
print("Vendor: %s" % vendor_reference)
with open(vendor_data['results_path']) as results_file:
for vendor in vendors:
with open(vendor['results_file']) as results_file:
results_data = json.load(results_file)
for query_data in results_data["all_metrics"]:
mean_runtime = query_data["run_time"]["mean"]
mean_runtime = (query_data["run_time"]["mean"] /
LDBC_TIME_FACTORS[results_data["unit"]] *
TIME_FACTORS[args.time_unit])
query_name = query_data['name']
print("%s -> %sms" % (query_name, str(mean_runtime)))
vendor_data['latencies'].append(mean_runtime)
vendor_data['query_names'].append(query_name)
vendor['results'].append((query_name, mean_runtime))
# Sort results.
for vendor in vendors:
vendor['results'].sort(key=lambda item: int("".join(filter(
lambda x: x in string.digits, item[0]))))
# Print results.
for vendor in vendors:
print("Vendor:", vendor['title'])
for query_name, latency in vendor['results']:
print("{} -> {:.3f}{}".format(query_name, latency, args.time_unit))
# Consistency check.
all_query_names = [tuple(vd['query_names']) for vd in vendors.values()]
all_query_names = [tuple(res[0] for res in vd['results']) for vd in vendors]
assert len(set(all_query_names)) == 1, \
"Queries between different vendors are different!"
query_names = all_query_names[0]
@ -97,7 +120,8 @@ def main():
ind = np.arange(len(query_names)) # the x locations for the groups
width = 0.40 # the width of the bars
fig, ax = plt.subplots() # figure setup
ax.set_ylabel('Mean Latency (ms)') # YAxis title
fig.set_size_inches(1920 / 96, 1080 / 96) # set figure size
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
@ -121,26 +145,33 @@ def main():
line.set_linestyle('--')
ax.set_axisbelow(True) # put the grid below all other elements
plt.grid(True) # show grid
# Set plot title
ax.set_title(args.plot_title)
# Draw logo or plot title
if args.logo_path is None:
ax.set_title(args.plot_title)
else:
if args.logo_path != None:
# TODO: improve the logo positioning
im = plt.imread(get_sample_data(args.logo_path))
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
args.logo_path)))
plt.gcf().subplots_adjust(top=0.85)
newax = fig.add_axes([0.4, 0.75, 0.2, 0.25], anchor='N')
newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
newax.imshow(im)
newax.axis('off')
# Draw bars
for index, vendor_data in enumerate(vendors.values()):
rects = ax.bar(ind + index * width, vendor_data['latencies'], width,
color=vendor_data['color'])
vendor_data['rects'] = rects
for index, vendor in enumerate(vendors):
latencies = [res[1] for res in vendor['results']]
rects = ax.bar(ind + index * width, latencies, width,
color=vendor['color'])
vendor['rects'] = rects
autolabel(ax, rects)
rects = [vd['rects'][0] for vd in vendors.values()]
titles = [vd['title'] for vd in vendors.values()]
rects = [vd['rects'][0] for vd in vendors]
titles = [vd['title'] for vd in vendors]
ax.legend(rects, titles) # Draw the legend.
plt.show()
if args.output == "":
plt.show()
else:
plt.savefig(args.output, dpi=96)
if __name__ == '__main__':