memgraph/tests/public_benchmark/ldbc/setup_dataset
Teon Banek 77e574fcc5 Cleanup some errors in ldbc setup scripts
Summary:
Don't require setup_system to run as root, nor apt-get
Implement command_fail for ldbc/setup_dependencies
ldbc.setup_dataset: Find Java on ArchLinux

Reviewers: buda, mferencevic

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D729
2017-09-01 14:13:34 +02:00

70 lines
2.2 KiB
Bash
Executable File

#!/bin/bash
# Generate SNB dataset.
function print_help () {
echo "Usage: $0 [OPTION]"
echo "Optional arguments:"
echo -e " -h|--help -> Prints help."
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
}
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Read the arguments.
scale_factor=1
while [[ $# -gt 0 ]]
do
case $1 in
-h|--help)
print_help
exit 1
;;
--scale-factor)
scale_factor=$2
shift
;;
*)
# unknown option
;;
esac
shift # past argument or value
done
echo "Using scale_factor" $scale_factor
# Prepare the folder structure.
dataset_folder_prefix="neo4j_csv_dataset"
dataset_folder="${script_dir}/${dataset_folder_prefix}_scale_${scale_factor}"
mkdir -p ${dataset_folder}
rm -rf ${dataset_folder}/*
# Define scale factor.
cat > ${script_dir}/ldbc_snb_datagen/params.ini <<EOF
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.${scale_factor}
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
EOF
# Generate the dataset.
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
cd ${ldbc_snb_datagen_folder}
export HADOOP_OPTS="$HADOOP_OPTS -Xmx10240M"
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
export JAVA_HOME=/usr/lib/jvm/default-java/jre
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
export JAVA_HOME=/usr/lib/jvm/default-runtime/
else
echo "Unable to find JRE under /usr/lib/jvm"
exit 1
fi
echo "Using JAVA_HOME" $JAVA_HOME
HADOOP_HOME=/usr/local/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh
# Transform the dataset into Neo4j CSV format.
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
mvn exec:java \
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_folder}"