From 6f849a14df98ef2ae54613819204004af59553a4 Mon Sep 17 00:00:00 2001 From: Katarina Supe <61758502+katarinasupe@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:04:36 +0100 Subject: [PATCH] Update cypherl transform script (#1701) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update cypherl transform script * Add new script and fix typo * Add convert to separate files script --------- Co-authored-by: Marko Budiselić --- import/n2mg_cypherl.sh | 8 +++- import/n2mg_separate_files_cypherl.sh | 61 ++++++++++++++++++++++++ import/n2mg_separate_files_cypherls.sh | 64 ++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 2 deletions(-) create mode 100755 import/n2mg_separate_files_cypherl.sh create mode 100755 import/n2mg_separate_files_cypherls.sh diff --git a/import/n2mg_cypherl.sh b/import/n2mg_cypherl.sh index b11f5d3e3..2605fc6c7 100755 --- a/import/n2mg_cypherl.sh +++ b/import/n2mg_cypherl.sh @@ -20,14 +20,18 @@ if [ ! -f "$INPUT" ]; then fi echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" -echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create consraints manually if needed${COLOR_NULL}" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT" sed -e 's/^:begin/BEGIN/g; s/^BEGIN$/BEGIN;/g;' \ -e 's/^:commit/COMMIT/g; s/^COMMIT$/COMMIT;/g;' \ -e '/^CALL/d; /^SCHEMA AWAIT/d;' \ -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ -e 's/) ON (n./(/g;' \ - -e '/^CREATE CONSTRAINT/d; /^DROP CONSTRAINT/d;' "$INPUT" > "$OUTPUT" + -e '/^CREATE CONSTRAINT/d; /^DROP CONSTRAINT/d;' "$INPUT" >> "$OUTPUT" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT" echo "" echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher file under $OUTPUT" diff --git a/import/n2mg_separate_files_cypherl.sh b/import/n2mg_separate_files_cypherl.sh new file mode 100755 index 000000000..98049f193 --- /dev/null +++ b/import/n2mg_separate_files_cypherl.sh @@ -0,0 +1,61 @@ +#!/bin/bash -e +COLOR_ORANGE="\e[38;5;208m" +COLOR_GREEN="\e[38;5;35m" +COLOR_RED="\e[0;31m" +COLOR_NULL="\e[0m" + +print_help() { + echo -e "${COLOR_ORANGE}HOW TO RUN:${COLOR_NULL} $0 input_file_schema_path input_file_nodes_path input_file_relationships_path input_file_cleanup_path output_file_path" + exit 1 +} + +if [ "$#" -ne 5 ]; then + print_help +fi +INPUT_SCHEMA="$1" +INPUT_NODES="$2" +INPUT_RELATIONSHIPS="$3" +INPUT_CLEANUP="$4" +OUTPUT="$5" + +if [ ! -f "$INPUT_SCHEMA" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_NODES" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_RELATIONSHIPS" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_CLEANUP" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT" + +sed -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ + -e 's/) ON (n./(/g;' \ + -e '/^CREATE CONSTRAINT/d' $INPUT_SCHEMA >> "$OUTPUT" + +cat "$INPUT_NODES" >> "$OUTPUT" +cat "$INPUT_RELATIONSHIPS" >> "$OUTPUT" + +sed -e '/^DROP CONSTRAINT/d' "$INPUT_CLEANUP" >> "$OUTPUT" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT" + +echo "" +echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher file under $OUTPUT" +echo "" +echo "Please import data by executing => \`cat $OUTPUT | mgconsole\`" diff --git a/import/n2mg_separate_files_cypherls.sh b/import/n2mg_separate_files_cypherls.sh new file mode 100755 index 000000000..5b9057e24 --- /dev/null +++ b/import/n2mg_separate_files_cypherls.sh @@ -0,0 +1,64 @@ +#!/bin/bash -e +COLOR_ORANGE="\e[38;5;208m" +COLOR_GREEN="\e[38;5;35m" +COLOR_RED="\e[0;31m" +COLOR_NULL="\e[0m" + +print_help() { + echo -e "${COLOR_ORANGE}HOW TO RUN:${COLOR_NULL} $0 input_file_schema_path input_file_nodes_path input_file_relationships_path input_file_cleanup_path output_file_schema_path output_file_nodes_path output_file_relationships_path output_file_cleanup_path" + exit 1 +} + +if [ "$#" -ne 8 ]; then + print_help +fi +INPUT_SCHEMA="$1" +INPUT_NODES="$2" +INPUT_RELATIONSHIPS="$3" +INPUT_CLEANUP="$4" +OUTPUT_SCHEMA="$5" +OUTPUT_NODES="$6" +OUTPUT_RELATIONSHIPS="$7" +OUTPUT_CLEANUP="$8" + +if [ ! -f "$INPUT_SCHEMA" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_NODES" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_RELATIONSHIPS" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_CLEANUP" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT_SCHEMA" + +sed -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ + -e 's/) ON (n./(/g;' \ + -e '/^CREATE CONSTRAINT/d' $INPUT_SCHEMA >> "$OUTPUT_SCHEMA" + +cat "$INPUT_NODES" > "$OUTPUT_NODES" +cat "$INPUT_RELATIONSHIPS" > "$OUTPUT_RELATIONSHIPS" + +sed -e '/^DROP CONSTRAINT/d' "$INPUT_CLEANUP" >> "$OUTPUT_CLEANUP" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT_CLEANUP" + +echo "" +echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher files under $OUTPUT_SCHEMA, $OUTPUT_NODES, $OUTPUT_RELATIONSHIPS and $OUTPUT_CLEANUP" +echo "" +echo "Please import data by executing => \`cat $OUTPUT_SCHEMA | mgconsole\`, \`cat $OUTPUT_NODES | mgconsole\`, \`cat $OUTPUT_RELATIONSHIPS | mgconsole\` and \`cat $OUTPUT_CLEANUP | mgconsole\`"