2017-08-26 03:36:42 +08:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# Generate SNB dataset.
|
|
|
|
|
|
|
|
function print_help () {
|
|
|
|
echo "Usage: $0 [OPTION]"
|
|
|
|
echo "Optional arguments:"
|
|
|
|
echo -e " -h|--help -> Prints help."
|
|
|
|
echo -e " --scale-factor Positive_Integer -> Defines the dataset size."
|
|
|
|
}
|
|
|
|
|
|
|
|
set -e
|
|
|
|
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
|
|
|
|
|
|
# Read the arguments.
|
|
|
|
scale_factor=1
|
|
|
|
while [[ $# -gt 0 ]]
|
|
|
|
do
|
|
|
|
case $1 in
|
|
|
|
-h|--help)
|
|
|
|
print_help
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
--scale-factor)
|
|
|
|
scale_factor=$2
|
|
|
|
shift
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
# unknown option
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
shift # past argument or value
|
|
|
|
done
|
|
|
|
|
2017-08-30 20:56:32 +08:00
|
|
|
echo "Using scale_factor" $scale_factor
|
2017-08-26 03:36:42 +08:00
|
|
|
# Prepare the folder structure.
|
|
|
|
dataset_folder_prefix="neo4j_csv_dataset"
|
|
|
|
dataset_folder="${script_dir}/${dataset_folder_prefix}_scale_${scale_factor}"
|
|
|
|
mkdir -p ${dataset_folder}
|
|
|
|
rm -rf ${dataset_folder}/*
|
|
|
|
|
|
|
|
# Define scale factor.
|
|
|
|
cat > ${script_dir}/ldbc_snb_datagen/params.ini <<EOF
|
|
|
|
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.${scale_factor}
|
|
|
|
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
|
|
|
|
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
|
|
|
|
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
|
|
|
|
EOF
|
|
|
|
|
|
|
|
# Generate the dataset.
|
|
|
|
ldbc_snb_datagen_folder=${script_dir}/ldbc_snb_datagen
|
|
|
|
cd ${ldbc_snb_datagen_folder}
|
|
|
|
export HADOOP_OPTS="$HADOOP_OPTS -Xmx10240M"
|
2017-08-30 20:56:32 +08:00
|
|
|
if [[ -d "/usr/lib/jvm/default-java/jre" ]]; then
|
|
|
|
export JAVA_HOME=/usr/lib/jvm/default-java/jre
|
|
|
|
elif [[ -d "/usr/lib/jvm/default-runtime/" ]]; then
|
|
|
|
export JAVA_HOME=/usr/lib/jvm/default-runtime/
|
|
|
|
else
|
|
|
|
echo "Unable to find JRE under /usr/lib/jvm"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
echo "Using JAVA_HOME" $JAVA_HOME
|
2017-08-26 03:36:42 +08:00
|
|
|
HADOOP_HOME=/usr/local/hadoop LDBC_SNB_DATAGEN_HOME=${ldbc_snb_datagen_folder} ./run.sh
|
|
|
|
|
|
|
|
# Transform the dataset into Neo4j CSV format.
|
|
|
|
cd ${script_dir}/ldbc-snb-impls/snb-interactive-neo4j
|
|
|
|
mvn exec:java \
|
|
|
|
-Dexec.mainClass="net.ellitron.ldbcsnbimpls.interactive.neo4j.util.DataFormatConverter" \
|
|
|
|
-Dexec.args="${ldbc_snb_datagen_folder}/social_network ${dataset_folder}"
|