LDBC: Integrate with Apollo

Summary: Run neo4j and memgraph from run_benchmark script. This makes mg and neo scripts obsolete. Reviewers: buda, teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D806
2017-09-21 20:01:10 +02:00 · 2017-09-21 20:01:10 +02:00 · 686dc452ee
commit 686dc452ee
parent f2a82f4f58
21 changed files with 534 additions and 596 deletions
--- a/tests/public_benchmark/ldbc/.gitignore
+++ b/tests/public_benchmark/ldbc/.gitignore
@ -1,8 +1,8 @@
+datasets/
 ldbc_snb_datagen/
 ldbc_driver/
 ldbc-snb-impls/
-neo4j_csv_dataset_scale_*/
-tmp/
 ve3/
-ve2/
+maven/
+hadoop/
 *.out
--- a/tests/public_benchmark/ldbc/README.md
+++ b/tests/public_benchmark/ldbc/README.md
@ -3,16 +3,15 @@
 ## How to run the benchmark against Neo4j OR Memgraph?

    cd memgraph/tests/public_benchmark/ldbc
-    ./setup_system
-    ./setup_dependencies
-    ./setup_dataset [--scale-factor 1]
-    ./neo [--run] OR ./mg [--run]
+    ./setup
+    ./build_dataset [--scale-factor 1]
    # To run short reads by default, just call:
-    ./run_benchmark
-    # To run update queries use the following.
-    ./run_benchmark --properties-file ldbc-snb-impls-updates.properties
-    # You may need to increase the time compression when updating:
-    ./run_benchmark --time-compresion-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
+    ./run_benchmark --create-index --run-db memgraph # or neo4j
+    # To run update queries pass the properties file for updates and slow down
+    # the execution by setting a larger time compression ratio.
+    ./run_benchmark --create-index --run-db memgraph \
+                    --properties-file ldbc-snb-impls-updates.properties \
+                    --time-compression-ratio 1.5

 ## How to run a specific test?

--- a/tests/public_benchmark/ldbc/build_dataset
+++ b/tests/public_benchmark/ldbc/build_dataset
@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -e

 # Generate SNB dataset.

@ -7,18 +7,18 @@ function print_help () {
    echo "Optional arguments:"
    echo -e "  -h|--help -> Prints help."
    echo -e "  --scale-factor Positive_Integer -> Defines the dataset size."
-    echo -e "  --neo4j-home Neo4j home directory, overrides NEO4J_HOME"
-    echo -e "  --memgraph-home Memgraph home directory."
-    echo -e "  --skip-generating Only transform generated dataset"
 }

-set -e
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+base_dir="${script_dir}/../../.."
+neo4j_dir="${base_dir}/libs/neo4j"
+
+# Add Maven to PATH
+export PATH=$PATH:${script_dir}/maven/bin

 # Read the arguments.
 scale_factor=1
 skip_generating=false
-memgraph_dir="${script_dir}/../../.."
 while [[ $# -gt 0 ]]
 do
    case $1 in
@ -30,17 +30,6 @@ do
        scale_factor=$2
        shift
        ;;
-        --neo4j-home)
-        NEO4J_HOME=$2
-        shift
-        ;;
-        --memgraph-home)
-        memgraph_dir=$2
-        shift
-        ;;
-        --skip-generating)
-        skip_generating=true
-        ;;
        *)
        # unknown option
        ;;
@ -48,13 +37,15 @@ do
    shift # past argument or value
 done

-echo "Using scale_factor" $scale_factor
 # Prepare the folder structure.
-dataset_folder_prefix="neo4j_csv_dataset"
-dataset_folder="${script_dir}/${dataset_folder_prefix}_scale_${scale_factor}"
-mkdir -p ${dataset_folder}
+dataset_dir="${script_dir}/datasets/scale_${scale_factor}"
+if [ -d ${dataset_dir} ]; then
+    rm -rf ${dataset_dir}
+fi
+mkdir -p ${dataset_dir}

 # Define scale factor.
+echo "Using scale_factor" $scale_factor
 cat > ${script_dir}/ldbc_snb_datagen/params.ini <<EOF
 ldbc.snb.datagen.generator.scaleFactor:snb.interactive.${scale_factor}
 ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
@ -62,83 +53,84 @@ ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.
 ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
 EOF

-ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
-if [[ ${skip_generating} = false ]]; then
-  # Generate the dataset.
-  rm -rf ${dataset_folder}/*
-  cd ${ldbc_snb_datagen_folder}
-  export HADOOP_OPTS="$HADOOP_OPTS -Xmx20G"
-  if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
+# Find installed Java binary.
+if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
    export JAVA_HOME=/usr/lib/jvm/default-java/jre
-  elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
+elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
    export JAVA_HOME=/usr/lib/jvm/default-runtime/
-  else
+else
    echo "Unable to find JRE under /usr/lib/jvm"
    exit 1
-  fi
-  echo "Using JAVA_HOME" $JAVA_HOME
-  HADOOP_HOME=/usr/local/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh || exit 1
-
-  # Transform the dataset into Neo4j CSV format.
-  cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
-  mvn exec:java \
-      -Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
-      -Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_folder}" || exit 1
 fi
+echo "Using JAVA_HOME" $JAVA_HOME

-rm -rf ${dataset_folder}/social_network
-cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_folder}/social_network
+# Remove old generated dataset.
+rm -rf ${ldbc_snb_datagen_folder}/social_network ${ldbc_snb_datagen_folder}/substitution_parameters

-rm -rf ${dataset_folder}/substitution_parameters
-cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_folder}/substitution_parameters
+# Generate the dataset.
+ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
+cd ${ldbc_snb_datagen_folder}
+# Poorly documented hadoop heapsize flag (unit is 'm'), see: hadoop/libexec/hadoop-config.sh
+# https://stackoverflow.com/questions/15609909/error-java-heap-space
+export HADOOP_HEAPSIZE=8192
+HADOOP_HOME=${script_dir}/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh
+
+# Copy generated dataset.
+cp -r ${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/
+cp -r ${ldbc_snb_datagen_folder}/substitution_parameters ${dataset_dir}/
+
+# Transform the dataset into Neo4j CSV format.
+mkdir -p ${dataset_dir}/csv
+cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
+mvn exec:java \
+    -Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
+    -Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_dir}/csv"

 csv_dataset="
--nodes ${dataset_folder}/comment_0_0.csv \
--nodes ${dataset_folder}/forum_0_0.csv \
--nodes ${dataset_folder}/organisation_0_0.csv \
--nodes ${dataset_folder}/person_0_0.csv \
--nodes ${dataset_folder}/place_0_0.csv \
--nodes ${dataset_folder}/post_0_0.csv \
--nodes ${dataset_folder}/tag_0_0.csv \
--nodes ${dataset_folder}/tagclass_0_0.csv \
--relationships ${dataset_folder}/comment_hasCreator_person_0_0.csv \
--relationships ${dataset_folder}/comment_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/comment_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/comment_replyOf_comment_0_0.csv \
--relationships ${dataset_folder}/comment_replyOf_post_0_0.csv \
--relationships ${dataset_folder}/forum_containerOf_post_0_0.csv \
--relationships ${dataset_folder}/forum_hasMember_person_0_0.csv \
--relationships ${dataset_folder}/forum_hasModerator_person_0_0.csv \
--relationships ${dataset_folder}/forum_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/organisation_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/person_hasInterest_tag_0_0.csv \
--relationships ${dataset_folder}/person_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/person_knows_person_0_0.csv \
--relationships ${dataset_folder}/person_likes_comment_0_0.csv \
--relationships ${dataset_folder}/person_likes_post_0_0.csv \
--relationships ${dataset_folder}/person_studyAt_organisation_0_0.csv \
--relationships ${dataset_folder}/person_workAt_organisation_0_0.csv \
--relationships ${dataset_folder}/place_isPartOf_place_0_0.csv \
--relationships ${dataset_folder}/post_hasCreator_person_0_0.csv \
--relationships ${dataset_folder}/post_hasTag_tag_0_0.csv \
--relationships ${dataset_folder}/post_isLocatedIn_place_0_0.csv \
--relationships ${dataset_folder}/tag_hasType_tagclass_0_0.csv \
--relationships ${dataset_folder}/tagclass_isSubclassOf_tagclass_0_0.csv"
+--nodes ${dataset_dir}/csv/comment_0_0.csv \
+--nodes ${dataset_dir}/csv/forum_0_0.csv \
+--nodes ${dataset_dir}/csv/organisation_0_0.csv \
+--nodes ${dataset_dir}/csv/person_0_0.csv \
+--nodes ${dataset_dir}/csv/place_0_0.csv \
+--nodes ${dataset_dir}/csv/post_0_0.csv \
+--nodes ${dataset_dir}/csv/tag_0_0.csv \
+--nodes ${dataset_dir}/csv/tagclass_0_0.csv \
+--relationships ${dataset_dir}/csv/comment_hasCreator_person_0_0.csv \
+--relationships ${dataset_dir}/csv/comment_hasTag_tag_0_0.csv \
+--relationships ${dataset_dir}/csv/comment_isLocatedIn_place_0_0.csv \
+--relationships ${dataset_dir}/csv/comment_replyOf_comment_0_0.csv \
+--relationships ${dataset_dir}/csv/comment_replyOf_post_0_0.csv \
+--relationships ${dataset_dir}/csv/forum_containerOf_post_0_0.csv \
+--relationships ${dataset_dir}/csv/forum_hasMember_person_0_0.csv \
+--relationships ${dataset_dir}/csv/forum_hasModerator_person_0_0.csv \
+--relationships ${dataset_dir}/csv/forum_hasTag_tag_0_0.csv \
+--relationships ${dataset_dir}/csv/organisation_isLocatedIn_place_0_0.csv \
+--relationships ${dataset_dir}/csv/person_hasInterest_tag_0_0.csv \
+--relationships ${dataset_dir}/csv/person_isLocatedIn_place_0_0.csv \
+--relationships ${dataset_dir}/csv/person_knows_person_0_0.csv \
+--relationships ${dataset_dir}/csv/person_likes_comment_0_0.csv \
+--relationships ${dataset_dir}/csv/person_likes_post_0_0.csv \
+--relationships ${dataset_dir}/csv/person_studyAt_organisation_0_0.csv \
+--relationships ${dataset_dir}/csv/person_workAt_organisation_0_0.csv \
+--relationships ${dataset_dir}/csv/place_isPartOf_place_0_0.csv \
+--relationships ${dataset_dir}/csv/post_hasCreator_person_0_0.csv \
+--relationships ${dataset_dir}/csv/post_hasTag_tag_0_0.csv \
+--relationships ${dataset_dir}/csv/post_isLocatedIn_place_0_0.csv \
+--relationships ${dataset_dir}/csv/tag_hasType_tagclass_0_0.csv \
+--relationships ${dataset_dir}/csv/tagclass_isSubclassOf_tagclass_0_0.csv"

 # Convert to neo4j internal format.
-if [[ ! -d "${NEO4J_HOME}" ]]; then
-  NEO4J_HOME="/usr/share/neo4j"
-fi
-echo "Using NEO4J_HOME" ${NEO4J_HOME}
-mkdir -p ${dataset_folder}/neo4j
-cd ${dataset_folder}/neo4j
-echo "Converting CSV dataset to '${dataset_folder}/neo4j/graph.db'"
-rm -rf graph.db
-${NEO4J_HOME}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
+neo4j_database_dir=${dataset_dir}/neo4j/databases
+mkdir -p ${neo4j_database_dir}
+cd ${neo4j_database_dir}
+echo "Converting CSV dataset to '${neo4j_database_dir}/graph.db'"
+${neo4j_dir}/bin/neo4j-import --into graph.db ${csv_dataset} --delimiter "|" --array-delimiter ";"
+
 # Convert to memgraph internal format.
-echo "Using MEMGRAPH_HOME" ${memgraph_dir}
-mkdir -p ${dataset_folder}/memgraph
-cd ${dataset_folder}/memgraph
-echo "Converting CSV dataset to '${dataset_folder}/memgraph/graph.snapshot'"
-rm -rf graph.snapshot
-${memgraph_dir}/tools/csv_to_snapshot -o graph.snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
+memgraph_snapshot_dir=${dataset_dir}/memgraph/default
+mkdir -p ${memgraph_snapshot_dir}
+cd ${memgraph_snapshot_dir}
+echo "Converting CSV dataset to '${memgraph_snapshot_dir}/snapshot'"
+${base_dir}/tools/csv_to_snapshot -o snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
+
+echo "Done!"
--- a/tests/public_benchmark/ldbc/cleanup
+++ b/tests/public_benchmark/ldbc/cleanup
@ -0,0 +1,18 @@
+#!/bin/bash
+
+# go to script directory
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd ${script_dir}
+
+# remove archives
+rm *.tar.gz *.tar 2>/dev/null
+
+# remove logo
+rm plots/ldbc-logo.png 2>/dev/null
+
+# remove directories
+for folder in maven hadoop ldbc_snb_datagen ldbc_driver ldbc-snb-impls ve3; do
+    if [ -d "$folder" ]; then
+        rm -rf $folder
+    fi
+done
--- a/tests/public_benchmark/ldbc/config/memgraph.conf
+++ b/tests/public_benchmark/ldbc/config/memgraph.conf
--- a/tests/public_benchmark/ldbc/neo4j_config/neo4j.conf
+++ b/tests/public_benchmark/ldbc/neo4j_config/neo4j.conf
@ -13,13 +13,13 @@
 #dbms.directories.plugins=/var/lib/neo4j/plugins
 #dbms.directories.certificates=/var/lib/neo4j/certificates
 #dbms.directories.logs=/var/log/neo4j
-dbms.directories.lib=/usr/share/neo4j/lib
+#dbms.directories.lib=/usr/share/neo4j/lib
 #dbms.directories.run=/var/run/neo4j

 # This setting constrains all `LOAD CSV` import files to be under the `import` directory. Remove or comment it out to
 # allow files to be loaded from anywhere in the filesystem; this introduces possible security problems. See the
 # `LOAD CSV` section of the manual for details.
-dbms.directories.import=/var/lib/neo4j/import
+#dbms.directories.import=/var/lib/neo4j/import

 # Whether requests to Neo4j are authenticated.
 # To disable authentication, uncomment this line
@ -32,8 +32,8 @@ dbms.security.auth_enabled=false
 # calculated based on available system resources.
 # Uncomment these lines to set specific initial and maximum
 # heap size.
-dbms.memory.heap.initial_size=2096m
-dbms.memory.heap.max_size=4192m
+#dbms.memory.heap.initial_size=2096m
+#dbms.memory.heap.max_size=4192m

 # The amount of memory to use for mapping the store files, in bytes (or
 # kilobytes with the 'k' suffix, megabytes with 'm' and gigabytes with 'g').
@ -44,7 +44,7 @@ dbms.memory.heap.max_size=4192m
 # The default page cache memory assumes the machine is dedicated to running
 # Neo4j, and is heuristically set to 50% of RAM minus the max Java heap size.
 #dbms.memory.pagecache.size=10g
-dbms.query_cache_size=0
+#dbms.query_cache_size=0

 #*****************************************************************
 # Network connector configuration
@ -317,3 +317,6 @@ dbms.windows_service_name=neo4j
 # Other Neo4j system properties
 #********************************************************************
 dbms.jvm.additional=-Dunsupported.dbms.udc.source=debian
+
+# Disable Neo4j usage data collection
+dbms.udc.enabled=false
--- a/tests/public_benchmark/ldbc/continuous_integration
+++ b/tests/public_benchmark/ldbc/continuous_integration
@ -0,0 +1,17 @@
+# script used to run LDBC benchmarks on Apollo
+
+# setup dependencies
+TIMEOUT=1200 ./setup
+
+# build dataset
+TIMEOUT=3600 ./build_dataset
+
+# run read benchmarks
+TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
+TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
+./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
+
+# run update benchmarks
+TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
+TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
+./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
--- a/tests/public_benchmark/ldbc/ldbc-snb-impls-pom.xml
+++ b/tests/public_benchmark/ldbc/ldbc-snb-impls-pom.xml
@ -1,44 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>net.ellitron.ldbcsnbimpls</groupId>
-  <artifactId>ldbc-snb-impls</artifactId>
-  <packaging>pom</packaging>
-  <version>0.1.0</version>
-  <name>LDBC SNB Workload Implementations</name>
-  <url>https://github.com/ellitron/ldbc-snb-impls</url>
-  <description>
-    A collection of workload implementations for the LDBC SNB benchmark driver
-    (see https://github.com/ldbc/ldbc_driver).
-  </description>
-  <modules>
-    <module>snb-interactive-core</module>
-    <!--<module>snb-interactive-tools</module>-->
-    <module>snb-interactive-neo4j</module>
-    <!--<module>snb-interactive-titan</module>-->
-    <!--<module>snb-interactive-torc</module>-->
-  </modules>
-  <dependencies>
-    <dependency>
-      <groupId>com.ldbc.driver</groupId>
-      <artifactId>jeeves</artifactId>
-      <version>0.3-SNAPSHOT</version>
-    </dependency>
-  </dependencies>
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-  <properties>
-    <maven.compiler.source>1.8</maven.compiler.source>
-    <maven.compiler.target>1.8</maven.compiler.target>
-  </properties>
-</project>
--- a/tests/public_benchmark/ldbc/memgraph_snapshots/.gitignore
+++ b/tests/public_benchmark/ldbc/memgraph_snapshots/.gitignore
@ -1,2 +0,0 @@
-*
-!.gitignore
--- a/tests/public_benchmark/ldbc/mg
+++ b/tests/public_benchmark/ldbc/mg
@ -1,131 +0,0 @@
-#!/bin/bash
-
-function print_help () {
-    echo "Usage: $0 [OPTION]"
-    echo "Optional arguments:"
-    echo -e "  -h|--help -> Prints help."
-    echo -e "  --scale-factor Positive_Integer -> Defines the dataset size."
-    echo -e "  --transform-dataset -> Run just transform dataset (SNB -> Memgraph Snapshot)."
-    echo -e "  --copy-dataset -> Just copy dataset into the Memgraph snapshots path."
-    echo -e "  --run -> Just run Memgraph."
-}
-
-set -e
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-memgraph_dir="${script_dir}/../../.."
-# TODO: pass as an argument
-memgraph_build_dir="${memgraph_dir}/build"
-loadable_snapshot_path="${memgraph_build_dir}/snapshots/default/3000_01_01__01_01_01_00000"
-
-# Read the arguments.
-scale_factor=1
-run_all=true
-transform_dataset=false
-copy_dataset=false
-run_memgraph=false
-while [[ $# -gt 0 ]]
-do
-    case $1 in
-        -h|--help)
-        print_help
-        exit 1
-        ;;
-        --scale-factor)
-        scale_factor=$2
-        shift
-        ;;
-        --transform-dataset)
-        run_all=false
-        transform_dataset=true
-        ;;
-        --copy-dataset)
-        run_all=false
-        copy_dataset=true
-        ;;
-        --run)
-        run_all=false
-        run_memgraph=true
-        ;;
-        *)
-        # unknown option
-        ;;
-    esac
-    shift # past argument or value
-done
-
-snapshot_path="${script_dir}/memgraph_snapshots/snb_scale_${scale_factor}.snapshot"
-csv_folder="${script_dir}/neo4j_csv_dataset_scale_${scale_factor}"
-
-# Transform dataset into MemGraph Snapshot.
-if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
-    rm ${snapshot_path}
-    ${memgraph_dir}/tools/csv_to_snapshot -o ${snapshot_path} \
-        --nodes ${csv_folder}/comment_0_0.csv \
-        --nodes ${csv_folder}/forum_0_0.csv \
-        --nodes ${csv_folder}/organisation_0_0.csv \
-        --nodes ${csv_folder}/person_0_0.csv \
-        --nodes ${csv_folder}/place_0_0.csv \
-        --nodes ${csv_folder}/post_0_0.csv \
-        --nodes ${csv_folder}/tag_0_0.csv \
-        --nodes ${csv_folder}/tagclass_0_0.csv \
-        --relationships ${csv_folder}/comment_hasCreator_person_0_0.csv \
-        --relationships ${csv_folder}/comment_hasTag_tag_0_0.csv \
-        --relationships ${csv_folder}/comment_isLocatedIn_place_0_0.csv \
-        --relationships ${csv_folder}/comment_replyOf_comment_0_0.csv \
-        --relationships ${csv_folder}/comment_replyOf_post_0_0.csv \
-        --relationships ${csv_folder}/forum_containerOf_post_0_0.csv \
-        --relationships ${csv_folder}/forum_hasMember_person_0_0.csv \
-        --relationships ${csv_folder}/forum_hasModerator_person_0_0.csv \
-        --relationships ${csv_folder}/forum_hasTag_tag_0_0.csv \
-        --relationships ${csv_folder}/organisation_isLocatedIn_place_0_0.csv \
-        --relationships ${csv_folder}/person_hasInterest_tag_0_0.csv \
-        --relationships ${csv_folder}/person_isLocatedIn_place_0_0.csv \
-        --relationships ${csv_folder}/person_knows_person_0_0.csv \
-        --relationships ${csv_folder}/person_likes_comment_0_0.csv \
-        --relationships ${csv_folder}/person_likes_post_0_0.csv \
-        --relationships ${csv_folder}/person_studyAt_organisation_0_0.csv \
-        --relationships ${csv_folder}/person_workAt_organisation_0_0.csv \
-        --relationships ${csv_folder}/place_isPartOf_place_0_0.csv \
-        --relationships ${csv_folder}/post_hasCreator_person_0_0.csv \
-        --relationships ${csv_folder}/post_hasTag_tag_0_0.csv \
-        --relationships ${csv_folder}/post_isLocatedIn_place_0_0.csv \
-        --relationships ${csv_folder}/tag_hasType_tagclass_0_0.csv \
-        --relationships ${csv_folder}/tagclass_isSubclassOf_tagclass_0_0.csv \
-        --csv-delimiter "|" --array-delimiter ";"
-    echo "Dataset transformed."
-fi
-
-# Copy the dataset.
-if [[ ${run_all} = true ]] || [[ ${copy_dataset} = true ]] ; then
-    cp ${snapshot_path} ${loadable_snapshot_path}
-    echo "Dataset copied."
-fi
-
-# Run MemGraph.
-if [[ ${run_all} = true ]] || [[ ${run_memgraph} = true ]] ; then
-    ${memgraph_build_dir}/memgraph -flagfile ${memgraph_dir}/config/public_benchmark_ldbc.conf 2>&1 &
-    memgraph_pid=$!
-    sleep 200 # TODO: replace this with something that is going to work in all cases
-              #       not just in SNB scale 1 case
-
-    # Create indexes.
-    cd ${script_dir}
-    if [ ! -d "ve3" ]; then
-        virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
-        source ve3/bin/activate
-        pip install -r ${script_dir}/requirements_3.txt
-    fi
-    source ve3/bin/activate
-    python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
-
-    # On Ctrl-C stop Memgraph.
-    trap ctrl_c INT
-    function ctrl_c() {
-        kill -9 ${memgraph_pid}
-        exit 0
-    }
-    while true; do
-        sleep 1
-    done
-fi
--- a/tests/public_benchmark/ldbc/neo
+++ b/tests/public_benchmark/ldbc/neo
@ -1,95 +0,0 @@
-#!/bin/bash
-
-function print_help () {
-    echo "Usage: $0 [OPTION]"
-    echo "Optional arguments:"
-    echo -e "  -h|--help -> Prints help."
-    echo -e "  --scale-factor Positive_Integer -> Defines the dataset size."
-    echo -e "  --transform-dataset -> Run just transform dataset (SNB -> Neo4j CSV)."
-    echo -e "  --load-dataset -> Just load dataset into Neo4j."
-    echo -e "  --run -> Just run Neo4j."
-}
-
-set -e
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-# Read the arguments.
-scale_factor=1
-run_all=true
-transform_dataset=false
-load_dataset=false
-run_neo=false
-while [[ $# -gt 0 ]]
-do
-    case $1 in
-        -h|--help)
-        print_help
-        exit 1
-        ;;
-        --scale-factor)
-        scale_factor=$2
-        shift
-        ;;
-        --transform-dataset)
-        run_all=false
-        transform_dataset=true
-        ;;
-        --load-dataset)
-        run_all=false
-        load_dataset=true
-        ;;
-        --run)
-        run_all=false
-        run_neo=true
-        ;;
-        *)
-        # unknown option
-        ;;
-    esac
-    shift # past argument or value
-done
-
-dataset_folder=${script_dir}/neo4j_csv_dataset_scale_${scale_factor}
-
-# Transform the dataset into files on disk.
-if [[ ${run_all} = true ]] || [[ ${transform_dataset} = true ]] ; then
-    cd ${dataset_folder}
-    chmod +x import.sh
-    NEO4J_HOME=/usr/share/neo4j ./import.sh
-fi
-
-# Load the dataset into Neo4j.
-if [[ ${run_all} = true ]] || [[ ${load_dataset} = true ]] ; then
-    neo4j_data=${script_dir}/neo4j_home/data
-    neo4j_graphdb=${neo4j_data}/databases/graph.db
-    mkdir -p ${neo4j_graphdb}
-    rm -rf ${neo4j_graphdb}/*
-    cp -r ${dataset_folder}/graph.db/* ${neo4j_graphdb}/
-fi
-
-# Run Neo4j.
-if [[ ${run_all} = true ]] || [[ ${run_neo} = true ]] ; then
-    NEO4J_HOME=${script_dir}/neo4j_home NEO4J_CONF=${script_dir}/neo4j_config /usr/share/neo4j/bin/neo4j console 2>&1 &
-    neo_pid=$!
-    sleep 5
-
-    # Create indexes.
-    cd ${script_dir}
-    if [ ! -d "ve3" ]; then
-        virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
-        source ve3/bin/activate
-        pip install -r ${script_dir}/requirements_3.txt
-    fi
-    source ve3/bin/activate
-    python index_creation.py ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/scripts/indexCreation.neo4j
-
-    # On Ctrl-C stop Neo4j.
-    trap ctrl_c INT
-    function ctrl_c() {
-        kill -9 ${neo_pid}
-        exit 0
-    }
-    while true; do
-        sleep 1
-    done
-fi
--- a/tests/public_benchmark/ldbc/neo4j_home/.gitignore
+++ b/tests/public_benchmark/ldbc/neo4j_home/.gitignore
@ -1,2 +0,0 @@
-*
-!.gitignore
--- a/tests/public_benchmark/ldbc/requirements.txt
+++ b/tests/public_benchmark/ldbc/requirements.txt
@ -0,0 +1,3 @@
+neo4j-driver==1.4.0
+matplotlib==2.0.2
+numpy==1.13.1
--- a/tests/public_benchmark/ldbc/requirements_2.txt
+++ b/tests/public_benchmark/ldbc/requirements_2.txt
@ -1,10 +0,0 @@
-cycler==0.10.0
-functools32==3.2.3.post2
-matplotlib==2.0.2
-numpy==1.13.1
-pkg-resources==0.0.0
-pyparsing==2.2.0
-python-dateutil==2.6.1
-pytz==2017.2
-six==1.10.0
-subprocess32==3.2.7
--- a/tests/public_benchmark/ldbc/requirements_3.txt
+++ b/tests/public_benchmark/ldbc/requirements_3.txt
@ -1,2 +0,0 @@
-neo4j-driver==1.4.0
-pkg-resources==0.0.0
--- a/tests/public_benchmark/ldbc/run_benchmark
+++ b/tests/public_benchmark/ldbc/run_benchmark
@ -1,81 +1,230 @@
-#!/bin/bash
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-

-# Run the LDBC SNB interactive workload / benchmark.
-# The benchmark is executed with:
-#   * ldbc_driver -> workload executor
-#   * ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
+'''
+Run the LDBC SNB interactive workload / benchmark.
+The benchmark is executed with:
+    * ldbc_driver -> workload executor
+    * ldbc-snb-impls/snb-interactive-neo4j -> workload implementation
+'''

-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time

-function print_help () {
-    echo "Usage: $0 [OPTION]"
-    echo "Optional arguments:"
-    echo -e "  -h|--help -> Prints help."
-    echo -e "  --host -> Database host."
-    echo -e "  --port -> Database port."
-    echo -e "  --time-compression-ratio |"
-    echo -e "  --operation-count        | -> https://github.com/ldbc/ldbc_driver/wiki/Driver-Configuration"
-    echo -e "  --thread-count           |"
-    echo -e "  --result-file-prefix -> Result file prefix."
-    echo -e "  --properties-file -> Properties file used to select queries"
-}
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", ".."))

-# Default parameters.
-host=127.0.0.1
-port=7687
-time_compression_ratio=0.01
-operation_count=200
-thread_count=8
-result_file_prefix="undefined"
-properties_file="${script_dir}/ldbc-snb-impls-short-reads.properties"

-# Read the arguments.
-while [[ $# -gt 0 ]]
-do
-    case $1 in
-        -h|--help)
-        print_help
-        exit 1
-        ;;
-        --host)
-        host=$2
-        shift
-        ;;
-        --port)
-        port=$2
-        shift
-        ;;
-        --result-file-prefix)
-        result_file_prefix=$2
-        shift
-        ;;
-        --properties-file)
-        properties_file=$2
-        shift
-        ;;
-        --time-compression-ratio)
-        time_compression_ratio=$2
-        shift
-        ;;
-        --operation-count)
-        operation_count=$2
-        shift
-        ;;
-        --thread-count)
-        thread_count=$2
-        shift
-        ;;
-        *)
-        # unknown option
-        ;;
-    esac
-    shift # past argument or value
-done
+def wait_for_server(port, delay=1.0):
+    cmd = ["nc", "-z", "-w", "1", "127.0.0.1", str(port)]
+    while subprocess.call(cmd) != 0:
+        time.sleep(0.5)
+    time.sleep(delay)

-cd ${script_dir}/ldbc-snb-impls
-mvn clean compile assembly:single

-cd ${script_dir}/ldbc_driver
-java -cp target/jeeves-0.3-SNAPSHOT.jar:${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/target/snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar com.ldbc.driver.Client -P ${script_dir}/ldbc_driver/configuration/ldbc_driver_default.properties -P ${properties_file} -p ldbc.snb.interactive.updates_dir ${script_dir}/ldbc_snb_datagen/social_network -p host ${host} -p port ${port} -db net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb -p ldbc.snb.interactive.parameters_dir ${script_dir}/ldbc_snb_datagen/substitution_parameters --time_compression_ratio ${time_compression_ratio} --operation_count ${operation_count} --thread_count ${thread_count}
+class Memgraph:
+    def __init__(self, dataset, port, num_workers):
+        self.proc = None
+        self.dataset = dataset
+        self.port = str(port)
+        self.num_workers = str(num_workers)

-cp ${script_dir}/ldbc_driver/results/LDBC-results.json ${script_dir}/results/${result_file_prefix}-LDBC-results.json
+    def start(self):
+        # find executable path
+        binary = os.path.join(BASE_DIR, "build", "memgraph")
+        if not os.path.exists(binary):
+            binary = os.path.join(BASE_DIR, "build_release", "memgraph")
+
+        # database args
+        database_args = [binary, "--num-workers", self.num_workers,
+                         "--snapshot-directory", os.path.join(self.dataset,
+                                                              "memgraph"),
+                         "--recover-on-startup", "true",
+                         "--port", self.port]
+
+        # database env
+        env = {"MEMGRAPH_CONFIG": os.path.join(SCRIPT_DIR, "config",
+                                               "memgraph.conf")}
+
+        # start memgraph
+        self.proc = subprocess.Popen(database_args, env=env)
+        wait_for_server(self.port)
+
+    def stop(self):
+        self.proc.terminate()
+        if self.proc.wait() != 0:
+            raise Exception("Database exited with non-zero exit code!")
+
+
+class Neo:
+    def __init__(self, dataset, port):
+        self.proc = None
+        self.dataset = dataset
+        self.port = str(port)
+        self.http_port = str(int(port) + 7474)
+        self.home_dir = None
+
+    def start(self):
+        # create home directory
+        self.home_dir = tempfile.mkdtemp(dir="/dev/shm")
+
+        neo4j_dir = os.path.join(BASE_DIR, "libs", "neo4j")
+
+        try:
+            os.symlink(os.path.join(neo4j_dir, "lib"),
+                       os.path.join(self.home_dir, "lib"))
+            os.symlink(os.path.join(self.dataset, "neo4j"),
+                       os.path.join(self.home_dir, "data"))
+            conf_dir = os.path.join(self.home_dir, "conf")
+            conf_file = os.path.join(conf_dir, "neo4j.conf")
+            os.mkdir(conf_dir)
+            shutil.copyfile(os.path.join(SCRIPT_DIR, "config", "neo4j.conf"),
+                            conf_file)
+            with open(conf_file, "a") as f:
+                f.write("\ndbms.connector.bolt.listen_address=:" +
+                        self.port + "\n")
+                f.write("\ndbms.connector.http.listen_address=:" +
+                        self.http_port + "\n")
+
+            # environment
+            env = {"NEO4J_HOME": self.home_dir}
+
+            self.proc = subprocess.Popen([os.path.join(neo4j_dir, "bin",
+                                                       "neo4j"),
+                                          "console"], env=env, cwd=neo4j_dir)
+        except:
+            shutil.rmtree(self.home_dir)
+            raise Exception("Couldn't run Neo4j!")
+
+        wait_for_server(self.http_port, 2.0)
+
+    def stop(self):
+        self.proc.terminate()
+        ret = self.proc.wait()
+        if os.path.exists(self.home_dir):
+            shutil.rmtree(self.home_dir)
+        if ret != 0:
+            raise Exception("Database exited with non-zero exit code!")
+
+
+def parse_args():
+    argp = argparse.ArgumentParser(
+            description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    argp.add_argument('--scale', type=int, default=1,
+                      help='Dataset scale to use for benchmarking.')
+    argp.add_argument('--host', default='127.0.0.1', help='Database host.')
+    argp.add_argument('--port', default='7687', help='Database port.')
+    argp.add_argument('--time-compression-ratio', type=float, default=0.01,
+                      help='Compress/stretch durations between operation start '
+                           'times to increase/decrease benchmark load. '
+                           'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
+                           'benchmark 10x faster. Default is 0.01.')
+    argp.add_argument('--operation-count', type=int, default=200,
+                      help='Number of operations to generate during benchmark '
+                           'execution.')
+    argp.add_argument('--thread-count', type=int, default=8,
+                      help='Thread pool size to use for executing operation '
+                           'handlers.')
+    argp.add_argument('--time-unit', default='microseconds',
+                      choices=('nanoseconds', 'microseconds', 'milliseconds',
+                               'seconds', 'minutes'),
+                      help='Time unit to use for measuring performance metrics')
+    argp.add_argument('--result-file-prefix', default='',
+                      help='Result file name prefix')
+    argp.add_argument('--properties-file',
+                      default=os.path.join(
+                          SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
+                      help='Properties file used to select queries')
+    argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
+                      help='Run the database before starting LDBC')
+    argp.add_argument('--create-index', action='store_true', default=False,
+                      help='Create index in the running database.')
+    return argp.parse_args()
+
+
+LDBC_INTERACTIVE_NEO4J = \
+    os.path.join(SCRIPT_DIR,
+                 'ldbc-snb-impls', 'snb-interactive-neo4j', 'target',
+                 'snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar')
+LDBC_DEFAULT_PROPERTIES = \
+    os.path.join(SCRIPT_DIR, 'ldbc_driver', 'configuration',
+                 'ldbc_driver_default.properties')
+
+
+def create_index(port):
+    index_file = os.path.join(SCRIPT_DIR, 'ldbc-snb-impls',
+                              'snb-interactive-neo4j', 'scripts', 'indexCreation.neo4j')
+    subprocess.check_call(('ve3/bin/python3', 'index_creation.py', index_file, port),
+                          cwd=SCRIPT_DIR)
+    time.sleep(1.0)
+
+
+def main():
+    args = parse_args()
+    dataset = os.path.join(SCRIPT_DIR, "datasets", "scale_" + str(args.scale))
+
+    db = None
+    if args.run_db:
+        if args.host != "127.0.0.1":
+            raise Exception("Host parameter must point to localhost when "
+                            "this script starts the database!")
+        if args.run_db.lower() == 'memgraph':
+            db = Memgraph(dataset, args.port, args.thread_count)
+        elif args.run_db.lower() == 'neo4j':
+            db = Neo(dataset, args.port)
+
+    try:
+        if db:
+            db.start()
+        if args.create_index:
+            create_index(args.port)
+
+        # Run LDBC driver.
+        cp = 'target/jeeves-0.3-SNAPSHOT.jar:{}'.format(LDBC_INTERACTIVE_NEO4J)
+        updates_dir = os.path.join(dataset, 'social_network')
+        parameters_dir = os.path.join(dataset, 'substitution_parameters')
+        java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
+                    '-P', LDBC_DEFAULT_PROPERTIES,
+                    '-P', os.path.join(os.getcwd(), args.properties_file),
+                    '-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
+                    '-p', 'host', args.host, '-p', 'port', args.port,
+                    '-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
+                    '-p', 'ldbc.snb.interactive.parameters_dir', parameters_dir,
+                    '--time_compression_ratio', str(args.time_compression_ratio),
+                    '--operation_count', str(args.operation_count),
+                    '--thread_count', str(args.thread_count),
+                    '--time_unit', args.time_unit.upper())
+        subprocess.check_call(java_cmd, cwd=os.path.join(SCRIPT_DIR, 'ldbc_driver'))
+
+        # Copy the results to results dir.
+        ldbc_results = os.path.join(SCRIPT_DIR, 'ldbc_driver', 'results',
+                                    'LDBC-results.json')
+        results_dir = os.path.join(SCRIPT_DIR, 'results')
+        results_name = []
+        if args.result_file_prefix:
+            results_name.append(args.result_file_prefix)
+        if args.run_db:
+            results_name.append(args.run_db)
+        else:
+            results_name.append("external")
+        results_name.append("scale_" + str(args.scale))
+        results_name = "-".join(results_name + ["LDBC", "results.json"])
+        results_copy = os.path.join(results_dir, results_name)
+        shutil.copyfile(ldbc_results, results_copy)
+
+        print("Results saved to:", results_copy)
+
+    finally:
+        if db:
+            db.stop()
+
+    print("Done!")
+
+
+if __name__ == '__main__':
+    main()
--- a/tests/public_benchmark/ldbc/setup
+++ b/tests/public_benchmark/ldbc/setup
@ -0,0 +1,82 @@
+#!/bin/bash -e
+
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Cleanup
+cd ${script_dir}
+./cleanup
+
+# Find remote endpoints
+if [ "$USER" == "apollo" ]; then
+    deps_http_url="http://89.201.166.70:46744"
+    deps_git_url="git://89.201.166.70:46745"
+    deps_pypi_url="http://89.201.166.70:46746/root/pypi"
+    deps_pypi_trusted="89.201.166.70"
+else
+    deps_http_url="http://deps.memgraph.io"
+    deps_git_url="git://deps.memgraph.io"
+    deps_pypi_url="http://deps.memgraph.io:3141/root/pypi"
+    deps_pypi_trusted="deps.memgraph.io"
+fi
+
+# Download Maven
+cd ${script_dir}
+wget -nv $deps_http_url/ldbc/apache-maven-3.5.0-bin.tar.gz -O maven.tar.gz
+tar -xzf maven.tar.gz
+mv apache-maven-3.5.0 maven
+rm maven.tar.gz
+cd maven
+sed -r "s@</settings>@<localRepository>${script_dir}/maven/.m2</localRepository>\n</settings>@g" -i conf/settings.xml
+wget -nv $deps_http_url/ldbc/maven-cache.tar.gz
+tar -xzf maven-cache.tar.gz
+rm maven-cache.tar.gz
+mvn=${script_dir}/maven/bin/mvn
+
+# Download Hadoop
+cd ${script_dir}
+wget -nv $deps_http_url/ldbc/hadoop-2.7.4.tar.gz -O hadoop.tar.gz
+tar -xzf hadoop.tar.gz
+mv hadoop-2.7.4 hadoop
+rm hadoop.tar.gz
+# Edit Java Xmx settings because Hadoop has stupid default run scripts and they duplicate the -Xmx flag
+# https://stackoverflow.com/questions/2740725/duplicated-java-runtime-options-what-is-the-order-of-preference
+# "Depends on the JVM, perhaps the version...perhaps even how many paper clips you have on your desk at the time. It might not even work. Don't do that."
+sed -r "s@-Xmx512m @@g" -i hadoop/etc/hadoop/hadoop-env.sh
+
+# Download LDBC logo
+cd ${script_dir}/plots
+wget -nv $deps_http_url/ldbc/ldbc-logo.png
+
+# Setup ldbc_snb_datagen
+cd ${script_dir}
+#git clone https://github.com/ldbc/ldbc_snb_datagen.git
+git clone $deps_git_url/ldbc_snb_datagen.git
+cd ldbc_snb_datagen
+git checkout 46ccf9340c20d8cfde0e7e11c9297a4061117bd3
+sed -r "s@#!/bin/bash@#!/bin/bash -e@g" -i run.sh
+
+# Setup ldbc_driver
+cd ${script_dir}
+#git clone https://github.com/ldbc/ldbc_driver.git
+git clone $deps_git_url/ldbc_driver.git
+cd ldbc_driver
+git checkout 1bb441394c3cd3e23d4df5a87689b9d1c5e6f48f
+$mvn clean package -DskipTests
+$mvn install -DskipTests
+
+# Setup ldbc-snb-impls
+cd ${script_dir}
+#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
+git clone $deps_git_url/ldbc-snb-impls.git
+cd ldbc-snb-impls
+sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
+$mvn install
+$mvn clean compile assembly:single
+
+# Setup Python3 Virtualenv
+cd ${script_dir}
+virtualenv -p python3 ve3
+source ve3/bin/activate
+pip install -i $deps_pypi_url \
+            --trusted-host $deps_pypi_trusted -r requirements.txt
+deactivate
--- a/tests/public_benchmark/ldbc/setup_dependencies
+++ b/tests/public_benchmark/ldbc/setup_dependencies
@ -1,50 +0,0 @@
-#!/bin/bash
-
-# Setup all dependencies
-
-function command_fail {
-  echo $1
-  exit 1
-}
-
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-# Setup ldbc_snb_datagen
-cd ${script_dir}
-git clone https://github.com/ldbc/ldbc_snb_datagen
-
-# Setup ldbc_driver
-cd ${script_dir}
-git clone https://github.com/ldbc/ldbc_driver.git
-cd ${script_dir}/ldbc_driver
-mvn clean package -DskipTests || exit 1
-mvn install -DskipTests || exit 1
-
-# Setup ldbc-snb-impls
-cd ${script_dir}
-git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
-cp ${script_dir}/ldbc-snb-impls-pom.xml ${script_dir}/ldbc-snb-impls/pom.xml
-cd ${script_dir}/ldbc-snb-impls
-mvn install || exit 1
-
-# Use set -e after we have called git clone, to avoid exiting if we already
-# cloned something.
-set -e
-
-# Setup python virtual environment & Install dependencies
-cd ${script_dir}
-if ! which virtualenv > /dev/null 2>&1; then
-    command_fail "Please install virtualenv!"
-fi
-if [ ! -d "ve3" ]; then
-    virtualenv -p python3 ve3 || command_fail "Virtualenv setup failed."
-fi
-source ve3/bin/activate
-pip install -r ${script_dir}/requirements_3.txt
-deactivate
-if [ ! -d "ve2" ]; then
-    virtualenv -p python2 ve2 || command_fail "Virtualenv setup failed."
-fi
-source ve2/bin/activate
-pip install -r ${script_dir}/requirements_2.txt
-deactivate
--- a/tests/public_benchmark/ldbc/setup_system
+++ b/tests/public_benchmark/ldbc/setup_system
@ -1,37 +0,0 @@
-#!/bin/bash
-
-# System setup (root access is required)
-
-# Working directories
-set -e
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-mkdir -p ${script_dir}/tmp
-
-# Install OS packages
-cd ${script_dir}/tmp
-if which apt-get > /dev/null 2>&1; then
-  wget -O - http://debian.neo4j.org/neotechnology.gpg.key >> key.pgp || exit 1
-  sudo apt-key add key.pgp || exit 1
-  sudo echo 'deb http://debian.neo4j.org/repo stable/' | tee -a /etc/apt/sources.list.d/neo4j.list > /dev/null || exit 1
-  sudo apt-get update || exit 1
-  sudo apt-get install -y maven default-jdk neo4j || exit 1
-else
-  echo "Assuming that 'maven', 'jdk' and 'neo4j' are installed"
-fi
-
-# Install Hadoop
-cd ${script_dir}/tmp
-hadoop_version="hadoop-2.7.3"
-hadoop_tar="${hadoop_version}.tar.gz"
-hadoop_url="http://apache.mirrors.tds.net/hadoop/common/${hadoop_version}/${hadoop_tar}"
-wget ${hadoop_url}
-tar -xzf ${hadoop_tar}
-# TODO: root access is required here -> run hadoop under a current user
-echo "Moving hadoop to /usr/local/hadoop"
-sudo mv ${hadoop_version} /usr/local/hadoop
-
-# Performance Setup
-# echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >/dev/null
-
-# Cleanup
-rm -rf ${script_dir}/tmp
--- a/tools/apollo/generate
+++ b/tools/apollo/generate
@ -260,6 +260,23 @@ if mode == "release":
    RUNS.append(generate_run("stress_large", commands = cmd, infile = infile,
            slave_group = "remote_16c56g"))

+# public_benchmark/ldbc tests
+if mode == "release":
+    ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc")
+    neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j")
+    csv_to_snapshot_path = os.path.join(BASE_DIR, "tools", "csv_to_snapshot")
+    plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency")
+    infile = create_archive("ldbc", [binary_release_path, ldbc_path,
+            binary_release_link_path, neo4j_path, config_path,
+            csv_to_snapshot_path, plot_ldbc_latency_path],
+            cwd = WORKSPACE_DIR)
+    cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n"
+    outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \
+                    "\./memgraph/tests/public_benchmark/ldbc/plots/.+\n"
+    RUNS.append(generate_run("public_benchmark__ldbc", commands = cmd,
+            infile = infile, outfile_paths = outfile_paths,
+            slave_group = "remote_20c140g", enable_network = True))
+
 # store ARCHIVES and RUNS
 store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
 store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)
--- a/tools/plot_ldbc_latency
+++ b/tools/plot_ldbc_latency
@ -8,38 +8,51 @@ Latency Barchart (Based on LDBC JSON output).
 import json
 import os
 import numpy as np
+from argparse import ArgumentParser
+import string
+
+import matplotlib
+# Must set 'Agg' backend before importing pyplot
+# This is so the script works on headless machines (without X11)
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from matplotlib.cbook import get_sample_data
-from argparse import ArgumentParser


 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-COLORS = {
-    'memgraph': '#ff7300',
-    'neo4j': '#008cc2'
+COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors!
+LDBC_TIME_FACTORS = {
+    "SECONDS": 1.0,
+    "MILLISECONDS": 1000.0,
+    "MICROSECONDS": 1000000.0,
+    "NANOSECONDS": 1000000000.0
+}
+TIME_FACTORS = {
+    "s": 1.0,
+    "ms": 1000,
+    "us": 1000000,
+    "ns": 1000000000,
 }


 def parse_args():
    argp = ArgumentParser(description=__doc__)
-    argp.add_argument("--vendor-references", nargs="+",
-                      help="Short references that represent all the "
-                           "vendors that are going to be "
-                           "visualized on the plot.")
    argp.add_argument("--vendor-titles", nargs="+",
+                      default=["Memgraph", "Market leader"],
                      help="Vender titles that are going to appear "
                           "on the plot, e.g. legend titles.")
-    argp.add_argument("--plot-title", default="{{Plot title placeholder}}",
+    argp.add_argument("--plot-title", default="",
                      help="Plot title.")
    argp.add_argument("--logo-path", default=None,
                      help="Path to the logo that is going to be presented"
                           " instead of title.")
-    argp.add_argument("--results-dir",
-                      default=os.path.join(SCRIPT_DIR,
-                                           "../tests/public_benchmark"
-                                           "/ldbc/results"),
+    argp.add_argument("--results", nargs="+", required=True,
                      help="Path to the folder with result files in format "
                           "{{vendor-reference}}-LDBC-results.json")
+    argp.add_argument("--time-unit", choices=("s", "ms", "us", "ns"),
+                      default="ms", help="The time unit that should be used.")
+    argp.add_argument("--output", default="",
+                      help="Save plot to file (instead of displaying it).")
    argp.add_argument("--max-label-width", default=11, type=int,
                      help="Maximum length of the x-axis labels (-1 is unlimited)")
    return argp.parse_args()
@ -62,33 +75,43 @@ def main():
    args = parse_args()

    # Prepare the datastructure.
-    vendors = {}
-    for vendor_reference, vendor_title in zip(args.vendor_references,
-                                              args.vendor_titles):
-        vendors[vendor_reference] = {}
-        vendors[vendor_reference]['title'] = vendor_title
-        vendors[vendor_reference]['results_path'] = os.path.join(
-            args.results_dir, "%s-LDBC-results.json" % vendor_reference)
-        vendors[vendor_reference]['color'] = COLORS[vendor_reference]
-        vendors[vendor_reference]['latencies'] = []
-        vendors[vendor_reference]['query_names'] = []
+    vendors = []
+    for i, results_file, vendor_title in zip(range(len(args.results)),
+                                             args.results,
+                                             args.vendor_titles):
+        vendor = {}
+        vendor['title'] = vendor_title
+        vendor['results_file'] = results_file
+        vendor['color'] = COLORS[i]
+        vendor['results'] = []
+        vendors.append(vendor)
    assert len(vendors) == 2, "The graph is tailored for only 2 vendors."

    # Collect the benchmark data.
    print("LDBC Latency Data")
-    for vendor_reference, vendor_data in vendors.items():
-        print("Vendor: %s" % vendor_reference)
-        with open(vendor_data['results_path']) as results_file:
+    for vendor in vendors:
+        with open(vendor['results_file']) as results_file:
            results_data = json.load(results_file)
            for query_data in results_data["all_metrics"]:
-                mean_runtime = query_data["run_time"]["mean"]
+                mean_runtime = (query_data["run_time"]["mean"] /
+                        LDBC_TIME_FACTORS[results_data["unit"]] *
+                        TIME_FACTORS[args.time_unit])
                query_name = query_data['name']
-                print("%s -> %sms" % (query_name, str(mean_runtime)))
-                vendor_data['latencies'].append(mean_runtime)
-                vendor_data['query_names'].append(query_name)
+                vendor['results'].append((query_name, mean_runtime))
+
+    # Sort results.
+    for vendor in vendors:
+        vendor['results'].sort(key=lambda item: int("".join(filter(
+                lambda x: x in string.digits, item[0]))))
+
+    # Print results.
+    for vendor in vendors:
+        print("Vendor:", vendor['title'])
+        for query_name, latency in vendor['results']:
+            print("{} -> {:.3f}{}".format(query_name, latency, args.time_unit))

    # Consistency check.
-    all_query_names = [tuple(vd['query_names']) for vd in vendors.values()]
+    all_query_names = [tuple(res[0] for res in vd['results']) for vd in vendors]
    assert len(set(all_query_names)) == 1, \
        "Queries between different vendors are different!"
    query_names = all_query_names[0]
@ -97,7 +120,8 @@ def main():
    ind = np.arange(len(query_names))   # the x locations for the groups
    width = 0.40                        # the width of the bars
    fig, ax = plt.subplots()            # figure setup
-    ax.set_ylabel('Mean Latency (ms)')  # YAxis title
+    fig.set_size_inches(1920 / 96, 1080 / 96) # set figure size
+    ax.set_ylabel('Mean Latency (%s)' % (args.time_unit))  # YAxis title
    ax.set_facecolor('#dcdcdc')         # plot bg color (light gray)
    ax.set_xticks(ind + width / len(vendors))  # TODO: adjust (more vendors)

@ -121,26 +145,33 @@ def main():
        line.set_linestyle('--')
    ax.set_axisbelow(True)              # put the grid below all other elements
    plt.grid(True)                      # show grid
+    # Set plot title
+    ax.set_title(args.plot_title)
    # Draw logo or plot title
-    if args.logo_path is None:
-        ax.set_title(args.plot_title)
-    else:
+    if args.logo_path != None:
        # TODO: improve the logo positioning
-        im = plt.imread(get_sample_data(args.logo_path))
+        im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
+                                                     args.logo_path)))
        plt.gcf().subplots_adjust(top=0.85)
-        newax = fig.add_axes([0.4, 0.75, 0.2, 0.25], anchor='N')
+        newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
        newax.imshow(im)
        newax.axis('off')
    # Draw bars
-    for index, vendor_data in enumerate(vendors.values()):
-        rects = ax.bar(ind + index * width, vendor_data['latencies'], width,
-                       color=vendor_data['color'])
-        vendor_data['rects'] = rects
+    for index, vendor in enumerate(vendors):
+        latencies = [res[1] for res in vendor['results']]
+        rects = ax.bar(ind + index * width, latencies, width,
+                       color=vendor['color'])
+        vendor['rects'] = rects
        autolabel(ax, rects)
-    rects = [vd['rects'][0] for vd in vendors.values()]
-    titles = [vd['title'] for vd in vendors.values()]
+    rects = [vd['rects'][0] for vd in vendors]
+    titles = [vd['title'] for vd in vendors]
    ax.legend(rects, titles)           # Draw the legend.
-    plt.show()
+
+    if args.output == "":
+        plt.show()
+    else:
+        plt.savefig(args.output, dpi=96)
+


 if __name__ == '__main__':