diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index f5814d958..1b8e36810 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -266,12 +266,11 @@ jobs: - name: Run e2e tests run: | - # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . + ./run.sh - name: Run stress test (plain) run: | @@ -293,7 +292,6 @@ jobs: run: | # Activate toolchain. source /opt/toolchain-v4/activate - cd build # create mgconsole @@ -340,10 +338,8 @@ jobs: run: | # Activate toolchain. source /opt/toolchain-v4/activate - # Initialize dependencies. ./init - # Build only memgraph release binarie. cd build cmake -DCMAKE_BUILD_TYPE=release .. @@ -352,7 +348,7 @@ jobs: - name: Run Jepsen tests run: | cd tests/jepsen - ./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs + ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs - name: Save Jepsen report uses: actions/upload-artifact@v3 diff --git a/.github/workflows/release_centos8.yaml b/.github/workflows/release_centos8.yaml index a80d72a0e..7096809e7 100644 --- a/.github/workflows/release_centos8.yaml +++ b/.github/workflows/release_centos8.yaml @@ -265,12 +265,11 @@ jobs: - name: Run e2e tests run: | - # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . + ./run.sh - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index faf868e21..b2c514630 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -264,12 +264,11 @@ jobs: - name: Run e2e tests run: | - # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . + ./run.sh - name: Run stress test (plain) run: | @@ -319,10 +318,8 @@ jobs: run: | # Activate toolchain. source /opt/toolchain-v4/activate - # Initialize dependencies. ./init - # Build only memgraph release binary. cd build cmake -DCMAKE_BUILD_TYPE=release .. @@ -331,7 +328,7 @@ jobs: - name: Run Jepsen tests run: | cd tests/jepsen - ./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs + ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs - name: Save Jepsen report uses: actions/upload-artifact@v3 diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index 18b9a8326..735d4d81c 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -264,12 +264,11 @@ jobs: - name: Run e2e tests run: | - # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . + ./run.sh - name: Run stress test (plain) run: | diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 4d68afa61..d5e4d46f4 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -261,6 +261,8 @@ DEFINE_double(query_execution_timeout_sec, 600, DEFINE_uint64(replication_replica_check_frequency_sec, 1, "The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: " "The MAIN instance allocates a new thread for each REPLICA."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_bool(replication_restore_state_on_startup, false, "Restore replication state on startup, e.g. recover replica"); // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) DEFINE_uint64( @@ -891,7 +893,7 @@ int main(int argc, char **argv) { .wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib, .wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx, .snapshot_on_exit = FLAGS_storage_snapshot_on_exit, - .restore_replication_state_on_startup = true, + .restore_replication_state_on_startup = FLAGS_replication_restore_state_on_startup, .items_per_batch = FLAGS_storage_items_per_batch, .recovery_thread_count = FLAGS_storage_recovery_thread_count, .allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery}, diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index 159fff10c..d31d78894 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -162,7 +162,7 @@ InMemoryStorage::InMemoryStorage(Config config) } } else { spdlog::warn( - "Replicastion configuration will NOT be stored. When the server restarts, replication state will be " + "Replication configuration will NOT be stored. When the server restarts, replication state will be " "forgotten."); } diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index c100e2f39..8cbd57136 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -350,7 +350,7 @@ uint64_t InMemoryStorage::ReplicationClient::ReplicateCurrentWal() { /// transactions while Snapshots contain all the data. For that reason we prefer /// WALs as much as possible. As the WAL file that is currently being updated /// can change during the process we ignore it as much as possible. Also, it -/// uses the transaction lock so lokcing it can be really expensive. After we +/// uses the transaction lock so locking it can be really expensive. After we /// fetch the list of finalized WALs, we try to find the longest chain of /// sequential WALs, starting from the latest one, that will update the recovery /// with the all missed updates. If the WAL chain cannot be created, replica is diff --git a/tests/e2e/README.md b/tests/e2e/README.md new file mode 100644 index 000000000..c401402e1 --- /dev/null +++ b/tests/e2e/README.md @@ -0,0 +1,13 @@ +# tests/e2e + +Framework to run end-to-end tests against Memgraph. + +## Notes + +* If you change something under this directory and below (even a Python + script), `make` has to be run again because all tests are copied to the build + directory and executed from there. +* Use/extend `run.sh` if you run any e2e tests: + * if all tests have to executed, use `run.sh` + * if a suite of tests have to be execute, take a look under `run.sh` how to do so + * if only a single test have to be execute, take a look at each individual binary/script, it's possible to manually pick the test diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index c5e767569..d2c55ae1d 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -187,4 +187,9 @@ startup_config_dict = { "Path to cypherl file that is used for configuring users and database schema before server starts.", ), "init_data_file": ("", "", "Path to cypherl file that is used for creating data after server starts."), + "replication_restore_state_on_startup": ( + "false", + "false", + "Restore replication state on startup, e.g. recover replica", + ), } diff --git a/tests/e2e/interactive_mg_runner.py b/tests/e2e/interactive_mg_runner.py index 762397a41..646e8e785 100644 --- a/tests/e2e/interactive_mg_runner.py +++ b/tests/e2e/interactive_mg_runner.py @@ -33,13 +33,11 @@ import atexit import logging import os -import subprocess import sys import tempfile import time from argparse import ArgumentParser from inspect import signature -from pathlib import Path import yaml @@ -77,9 +75,9 @@ ACTIONS = { "info": lambda context: info(context), "stop": lambda context, name: stop(context, name), "start": lambda context, name: start(context, name), - "sleep": lambda context, delta: time.sleep(float(delta)), - "exit": lambda context: sys.exit(1), - "quit": lambda context: sys.exit(1), + "sleep": lambda _, delta: time.sleep(float(delta)), + "exit": lambda _: sys.exit(1), + "quit": lambda _: sys.exit(1), } log = logging.getLogger("memgraph.tests.e2e") diff --git a/tests/e2e/memgraph.py b/tests/e2e/memgraph.py index 8416c629d..2c7546292 100755 --- a/tests/e2e/memgraph.py +++ b/tests/e2e/memgraph.py @@ -13,7 +13,6 @@ import copy import os import subprocess import sys -import tempfile import time import mgclient diff --git a/tests/e2e/replication/show_while_creating_invalid_state.py b/tests/e2e/replication/show_while_creating_invalid_state.py index 6bd36dc83..96088375d 100644 --- a/tests/e2e/replication/show_while_creating_invalid_state.py +++ b/tests/e2e/replication/show_while_creating_invalid_state.py @@ -147,27 +147,33 @@ def test_basic_recovery(connection): data_directory = tempfile.TemporaryDirectory() CONFIGURATION = { "replica_1": { - "args": ["--bolt-port", "7688", "--log-level=TRACE"], + "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica1.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], }, "replica_2": { - "args": ["--bolt-port", "7689", "--log-level=TRACE"], + "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica2.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "replica_3": { - "args": ["--bolt-port", "7690", "--log-level=TRACE"], + "args": ["--bolt-port", "7690", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica3.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"], }, "replica_4": { - "args": ["--bolt-port", "7691", "--log-level=TRACE"], + "args": ["--bolt-port", "7691", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica4.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10004;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--storage-recover-on-startup=true", + "--replication-restore-state-on-startup=true", + ], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory.name}", @@ -359,13 +365,19 @@ def test_replication_role_recovery(connection): data_directory = tempfile.TemporaryDirectory() CONFIGURATION = { "replica": { - "args": ["--bolt-port", "7688", "--log-level=TRACE"], + "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], "data_directory": f"{data_directory.name}/replica", }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--storage-recover-on-startup=true", + "--replication-restore-state-on-startup=true", + ], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory.name}/main", @@ -381,13 +393,19 @@ def test_replication_role_recovery(connection): # When we restart the replica, it does not need this query anymore since it needs to remember state CONFIGURATION = { "replica": { - "args": ["--bolt-port", "7688", "--log-level=TRACE"], + "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica.log", "setup_queries": [], "data_directory": f"{data_directory.name}/replica", }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--storage-recover-on-startup=true", + "--replication-restore-state-on-startup=true", + ], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory.name}/main", @@ -511,17 +529,23 @@ def test_basic_recovery_when_replica_is_kill_when_main_is_down(): data_directory = tempfile.TemporaryDirectory() CONFIGURATION = { "replica_1": { - "args": ["--bolt-port", "7688", "--log-level=TRACE"], + "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica1.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], }, "replica_2": { - "args": ["--bolt-port", "7689", "--log-level=TRACE"], + "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"], "log_file": "replica2.log", "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--storage-recover-on-startup=true", + "--replication-restore-state-on-startup=true", + ], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory.name}", diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh new file mode 100755 index 000000000..c3611b2bf --- /dev/null +++ b/tests/e2e/run.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# TODO(gitbuda): Setup mgclient and pymgclient properly. +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib + +print_help() { + echo -e "$0 ["workload name string"]" + echo -e "" + echo -e " NOTE: some tests require enterprise licence key," + echo -e " to run those define the folowing env vars:" + echo -e " * MEMGRAPH_ORGANIZATION_NAME" + echo -e " * MEMGRAPH_ENTERPRISE_LICENSE" + exit 1 +} +check_license() { + if [ ! -v MEMGRAPH_ORGANIZATION_NAME ] || [ ! -v MEMGRAPH_ENTERPRISE_LICENSE ]; then + echo "NOTE: MEMGRAPH_ORGANIZATION_NAME or MEMGRAPH_ENTERPRISE_LICENSE NOT defined -> dependent tests will NOT work" + fi +} + +if [ "$#" -eq 0 ]; then + check_license + # NOTE: If you want to run all tests under specific folder/section just + # replace the dot (root directory below) with the folder name, e.g. + # `--workloads-root-directory replication`. + python3 runner.py --workloads-root-directory . +elif [ "$#" -eq 1 ]; then + if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then + print_help + fi + check_license + # NOTE: --workload-name comes from each individual folder/section + # workloads.yaml file. E.g. `streams/workloads.yaml` has a list of + # `workloads:` and each workload has it's `-name`. + python3 runner.py --workloads-root-directory . --workload-name "$1" +else + print_help +fi diff --git a/tests/e2e/run_e2e.sh b/tests/e2e/run_e2e.sh deleted file mode 100644 index 55c376941..000000000 --- a/tests/e2e/run_e2e.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -# TODO: andi as a side project -python3 runner.py --workloads-root-directory disk_storage diff --git a/tests/e2e/transaction_queue/workloads.yaml b/tests/e2e/transaction_queue/workloads.yaml index d73c2ad13..b5f15facf 100644 --- a/tests/e2e/transaction_queue/workloads.yaml +++ b/tests/e2e/transaction_queue/workloads.yaml @@ -5,14 +5,6 @@ test_transaction_queue: &test_transaction_queue log_file: "transaction_queue.log" setup_queries: [] validation_queries: [] -disk_test_transaction_queue: &disk_test_transaction_queue - cluster: - main: - args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr"] - log_file: "transaction_queue.log" - setup_queries: ["STORAGE MODE ON_DISK_TRANSACTIONAL"] - validation_queries: [] - workloads: - name: "test-transaction-queue" # should be the same as the python file @@ -20,8 +12,3 @@ workloads: proc: "tests/e2e/transaction_queue/procedures/" args: ["transaction_queue/test_transaction_queue.py"] <<: *test_transaction_queue - - name: "test-transaction-queue on disk" # should be the same as the python file - binary: "tests/e2e/pytest_runner.sh" - proc: "tests/e2e/transaction_queue/procedures/" - args: ["transaction_queue/test_transaction_queue.py"] - <<: *disk_test_transaction_queue diff --git a/tests/jepsen/README.md b/tests/jepsen/README.md index 292e4964a..110397a5c 100644 --- a/tests/jepsen/README.md +++ b/tests/jepsen/README.md @@ -2,3 +2,31 @@ NOTE: Jepsen can only connect to the SSH server on the default 22 port. `--node` flag only takes the actual address (:port doesn't work). + +Jepsen run under CI: +``` +cd tests/jepsen +./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs +``` + +Local run of each test (including setup): +``` +cd tests/jepsen +./run.sh cluster-up +docker exec -it jepsen-control bash +cd memgraph +lein run test --workload bank --node-configs resources/node-config.edn +lein run test --workload large --node-configs resources/node-config.edn +``` + +Logs are located under `jepsen-control:/jepsen/memgraph/store`. + +If you setup cluster manually go to jepsen-control Docker container and ssh to all cluster nodes to save their host keys in known_hosts. +``` +docker exec -it jepsen-control bash +ssh n1 -> yes -> exit +ssh n2 -> yes -> exit +ssh n3 -> yes -> exit +ssh n4 -> yes -> exit +ssh n5 -> yes -> exit +``` diff --git a/tests/jepsen/jepsen_0.3.0.patch b/tests/jepsen/jepsen_0.3.0.patch new file mode 100644 index 000000000..be47cc8b4 --- /dev/null +++ b/tests/jepsen/jepsen_0.3.0.patch @@ -0,0 +1,13 @@ +diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile +index 6b2d3c0e..195a7a60 100644 +--- a/docker/control/Dockerfile ++++ b/docker/control/Dockerfile +@@ -7,7 +7,7 @@ ENV LEIN_ROOT true + # Jepsen dependencies + # + RUN apt-get -y -q update && \ +- apt-get install -qy openjdk-17-jdk-headless \ ++ apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \ + libjna-java \ + vim \ + emacs \ diff --git a/tests/jepsen/project.clj b/tests/jepsen/project.clj index f07bda16f..506bcff52 100644 --- a/tests/jepsen/project.clj +++ b/tests/jepsen/project.clj @@ -5,7 +5,10 @@ :url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"} :main jepsen.memgraph.core :dependencies [[org.clojure/clojure "1.10.0"] - [jepsen "0.2.1-SNAPSHOT"] + ;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions + [jepsen "0.2.4-SNAPSHOT"] [gorillalabs/neo4j-clj "4.1.0"]] :profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}} + ;; required to run 0.3.0 + ; :aot :all :repl-options {:init-ns jepsen.memgraph.core}) diff --git a/tests/jepsen/run.sh b/tests/jepsen/run.sh index f0764c327..acd115499 100755 --- a/tests/jepsen/run.sh +++ b/tests/jepsen/run.sh @@ -1,16 +1,18 @@ #!/bin/bash - set -Eeuo pipefail script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" MEMGRAPH_BINARY_PATH="../../build/memgraph" -# NOTE: On Ubuntu 22.04 0.3.2 uses non-existing docker compose --compatibility flag. -# NOTE: On Ubuntu 22.04 0.3.1 seems to be working. -JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.0}" +# NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ... +# NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag. +# NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable. +# TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0 +JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}" JEPSEN_ACTIVE_NODES_NO=5 CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn" CONTROL_LEIN_RUN_STDOUT_LOGS=1 CONTROL_LEIN_RUN_STDERR_LOGS=1 +_JEPSEN_RUN_EXIT_STATUS=0 PRINT_CONTEXT() { echo -e "MEMGRAPH_BINARY_PATH:\t\t $MEMGRAPH_BINARY_PATH" echo -e "JEPSEN_VERSION:\t\t\t $JEPSEN_VERSION" @@ -22,7 +24,7 @@ PRINT_CONTEXT() { HELP_EXIT() { echo "" - echo "HELP: $0 help|cluster-up|test [args]" + echo "HELP: $0 help|cluster-up|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]" echo "" echo " test args --binary MEMGRAPH_BINARY_PATH" echo " --ignore-run-stdout-logs Ignore lein run stdout logs." @@ -45,153 +47,235 @@ if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/nu ERROR "docker and docker-compose have to be installed." exit 1 fi -PRINT_CONTEXT if [ ! -d "$script_dir/jepsen" ]; then git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen" + if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then + if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then + cd "$script_dir/jepsen" + git apply "$script_dir/jepsen_0.3.0.patch" + cd "$script_dir" + fi + fi fi if [ "$#" -lt 1 ]; then HELP_EXIT fi +PROCESS_ARGS() { + shift + while [[ $# -gt 0 ]]; do + key="$1" + case $key in + --binary) + shift + MEMGRAPH_BINARY_PATH="$1" + shift + ;; + --ignore-run-stdout-logs) + CONTROL_LEIN_RUN_STDOUT_LOGS=0 + shift + ;; + --ignore-run-stderr-logs) + CONTROL_LEIN_RUN_STDERR_LOGS=0 + shift + ;; + --nodes-no) + shift + JEPSEN_ACTIVE_NODES_NO="$1" + shift + ;; + --run-args) + shift + CONTROL_LEIN_RUN_ARGS="$1" + shift + ;; + *) + ERROR "Unknown option $1." + HELP_EXIT + ;; + esac + done +} + +COPY_BINARIES() { + # Copy Memgraph binary, handles both cases, when binary is a sym link + # or a regular file. + binary_path="$MEMGRAPH_BINARY_PATH" + if [ -L "$binary_path" ]; then + binary_path=$(readlink "$binary_path") + fi + binary_name=$(basename -- "$binary_path") + for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do + jepsen_node_name="jepsen-n$iter" + docker_exec="docker exec $jepsen_node_name bash -c" + if [ "$binary_name" == "memgraph" ]; then + _binary_name="memgraph_tmp" + else + _binary_name="$binary_name" + fi + $docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph" + docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name" + $docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph" + $docker_exec "touch /opt/memgraph/memgraph.log" + INFO "Copying $binary_name to $jepsen_node_name DONE." + done + # Copy test files into the control node. + docker exec jepsen-control mkdir -p /jepsen/memgraph/store + docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/ + docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/ + docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/ + docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj + INFO "Copying test files to jepsen-control DONE." +} + +RUN_JEPSEN() { + __control_lein_run_args="$1" + # NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY. + # NOTE: ~/.bashrc has to be manually sourced when bash -c is used + # because some Jepsen config is there. + # To be able to archive the run result even if the run fails. + set +e + if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then + redirect_stdout_logs="/dev/null" + else + redirect_stdout_logs="/dev/stdout" + fi + if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then + redirect_stderr_logs="/dev/null" + else + redirect_stderr_logs="/dev/stderr" + fi + docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $__control_lein_run_args" 1> $redirect_stdout_logs 2> $redirect_stderr_logs + _JEPSEN_RUN_EXIT_STATUS=$? + set -e +} + +PROCESS_RESULTS() { + start_time="$1" + end_time="$2" + INFO "Process results..." + # Print and pack all test workload runs between start and end time. + all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-) + all_workload_run_folders="" + for workload in $all_workloads; do + for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do + if [[ "$time_folder" == "latest" ]]; then + continue + fi + # The early continue pattern here is nice because bash doesn't + # have >= for the string comparison (marginal values). + if [[ "$time_folder" < "$start_time" ]]; then + continue + fi + if [[ "$time_folder" > "$end_time" ]]; then + continue + fi + INFO "jepsen.log for $workload/$time_folder" + docker exec jepsen-control bash -c "tail -n 50 /jepsen/memgraph/store/$workload/$time_folder/jepsen.log" + all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder" + done + done + INFO "Packing results..." + docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders" + docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./ + INFO "Result processing (printing and packing) DONE." +} + # Initialize testing context by copying source/binary files. Inside CI, # Memgraph is tested on a single machine cluster based on Docker containers. # Once these tests will be part of the official Jepsen repo, the majority of # functionalities inside this script won't be needed because each node clones # the public repo. case $1 in - help) - HELP_EXIT - ;; # Start Jepsen Docker cluster of 5 nodes. To configure the cluster please # take a look under jepsen/docker/docker-compose.yml. # NOTE: If you delete the jepsen folder where docker config is located, # the current cluster is broken because it relies on the folder. That can # happen easiliy because the jepsen folder is git ignored. cluster-up) + PRINT_CONTEXT "$script_dir/jepsen/docker/bin/up" --daemon ;; - # Run tests against the specified Memgraph binary. - test) - shift - while [[ $# -gt 0 ]]; do - key="$1" - case $key in - --binary) - shift - MEMGRAPH_BINARY_PATH="$1" - shift - ;; - --ignore-run-stdout-logs) - CONTROL_LEIN_RUN_STDOUT_LOGS=0 - shift - ;; - --ignore-run-stderr-logs) - CONTROL_LEIN_RUN_STDERR_LOGS=0 - shift - ;; - --nodes-no) - shift - JEPSEN_ACTIVE_NODES_NO="$1" - shift - ;; - --run-args) - shift - CONTROL_LEIN_RUN_ARGS="$1" - shift - ;; - *) - ERROR "Unknown option $1." - HELP_EXIT - ;; - esac - done - # Copy Memgraph binary, handles both cases, when binary is a sym link - # or a regular file. - binary_path="$MEMGRAPH_BINARY_PATH" - if [ -L "$binary_path" ]; then - binary_path=$(readlink "$binary_path") - fi - binary_name=$(basename -- "$binary_path") + cluster-cleanup) + jepsen_control_exec="docker exec jepsen-control bash -c" + INFO "Deleting /jepsen/memgraph/store/* on jepsen-control" + $jepsen_control_exec "rm -rf /jepsen/memgraph/store/*" for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do jepsen_node_name="jepsen-n$iter" - docker_exec="docker exec $jepsen_node_name bash -c" - if [ "$binary_name" == "memgraph" ]; then - _binary_name="memgraph_tmp" - else - _binary_name="$binary_name" - fi - $docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph" - docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name" - $docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph" - $docker_exec "touch /opt/memgraph/memgraph.log" - INFO "Copying $binary_name to $jepsen_node_name DONE." + jepsen_node_exec="docker exec $jepsen_node_name bash -c" + INFO "Deleting /opt/memgraph/* on $jepsen_node_name" + $jepsen_node_exec "rm -rf /opt/memgraph/*" done + ;; - # Copy test files into the control node. - docker exec jepsen-control mkdir -p /jepsen/memgraph - docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/ - docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/ - docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/ - docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj - INFO "Copying test files to jepsen-control DONE." + cluster-dealloc) + ps=$(docker ps --filter name=jepsen* --filter status=running -q) + if [[ ! -z ${ps} ]]; then + echo "Killing ${ps}" + docker rm -f ${ps} + imgs=$(docker images "jepsen*" -q) + if [[ ! -z ${imgs} ]]; then + echo "Removing ${imgs}" + docker images "jepsen*" -q | xargs docker image rmi -f + else + echo "No Jepsen images detected!" + fi + else + echo "No Jepsen containers detected!" + fi + ;; + mgbuild) + PRINT_CONTEXT + echo "" + echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder" + exit 1 + ;; + + test) + PROCESS_ARGS "$@" + PRINT_CONTEXT + COPY_BINARIES start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z" - - # Run the test. - # NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY. - # NOTE: ~/.bashrc has to be manually sourced when bash -c is used - # because some Jepsen config is there. - set +e - if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then - redirect_stdout_logs="/dev/null" - else - redirect_stdout_logs="/dev/stdout" - fi - if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then - redirect_stderr_logs="/dev/null" - else - redirect_stderr_logs="/dev/stderr" - fi INFO "Jepsen run in progress... START_TIME: $start_time" - docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $CONTROL_LEIN_RUN_ARGS" 1> $redirect_stdout_logs 2> $redirect_stderr_logs - # To be able to archive the run result even if the run fails. - jepsen_run_exit_status=$? + RUN_JEPSEN "$CONTROL_LEIN_RUN_ARGS" end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z" INFO "Jepsen run DONE. END_TIME: $end_time" - set -e - - # Pack all test workload runs between start and end time. - all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-) - all_workload_run_folders="" - for workload in $all_workloads; do - for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do - if [[ "$time_folder" == "latest" ]]; then - continue - fi - # The early continue pattern here is nice because bash doesn't - # have >= for the string comparison (marginal values). - if [[ "$time_folder" < "$start_time" ]]; then - continue - fi - if [[ "$time_folder" > "$end_time" ]]; then - continue - fi - all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder" - done - done - docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders" - docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./ - INFO "Test and results packing DONE." - - # If the run has failed, this script also has to return non-zero status. - if [ "$jepsen_run_exit_status" -ne 0 ]; then - exit "$jepsen_run_exit_status" + PROCESS_RESULTS "$start_time" "$end_time" + # Exit if the jepsen run status is not 0 + if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then + ERROR "Jepsen FAILED" # important for the coder + exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI fi ;; + + test-all-individually) + PROCESS_ARGS "$@" + PRINT_CONTEXT + INFO "NOTE: CONTROL_LEIN_RUN_ARGS ignored" + COPY_BINARIES + start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z" + INFO "Jepsen run in progress... START_TIME: $start_time" + for workload in "bank" "large"; do + RUN_JEPSEN "test --workload $workload --node-configs resources/node-config.edn" + if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then + break + fi + done + end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z" + INFO "Jepsen run DONE. END_TIME: $end_time" + PROCESS_RESULTS "$start_time" "$end_time" + # Exit if the jepsen run status is not 0 + if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then + ERROR "Jepsen FAILED" # important for the coder + exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI + fi + ;; + *) - HELP_EXIT + HELP_EXIT ;; esac diff --git a/tests/jepsen/src/jepsen/memgraph/core.clj b/tests/jepsen/src/jepsen/memgraph/core.clj index 816cb19d6..0209556d9 100644 --- a/tests/jepsen/src/jepsen/memgraph/core.clj +++ b/tests/jepsen/src/jepsen/memgraph/core.clj @@ -146,9 +146,23 @@ ["-w" "--workload NAME" "Test workload to run" :parse-fn keyword :validate [workloads (cli/one-of workloads)]] - [nil "--node-configs PATH" "Path to the node configuration file." + [nil "--node-configs PATH" "Path to a file containing a list of node config." :parse-fn #(-> % e/load-configuration)]]) +(defn single-test + "Takes base CLI options and constructs a single test." + [opts] + (let [workload (if (:workload opts) + (:workload opts) + (throw (Exception. "Workload undefined"))) + node-config (if (:node-configs opts) + (first (merge-node-configurations (:nodes opts) (list (first (:node-configs opts))))) + (throw (Exception. "Node configs undefined"))) + test-opts (assoc opts + :node-config node-config + :workload workload)] + (memgraph-test test-opts))) + (defn all-tests "Takes base CLI options and constructs a sequence of test options." [opts] @@ -169,7 +183,7 @@ [& args] (cli/run! (merge (cli/test-all-cmd {:tests-fn all-tests :opt-spec cli-opts}) - (cli/single-test-cmd {:test-fn memgraph-test + (cli/single-test-cmd {:test-fn single-test :opt-spec cli-opts}) (cli/serve-cmd)) args)) diff --git a/tests/jepsen/src/jepsen/memgraph/nemesis.clj b/tests/jepsen/src/jepsen/memgraph/nemesis.clj index 534f4e134..4b77abf15 100644 --- a/tests/jepsen/src/jepsen/memgraph/nemesis.clj +++ b/tests/jepsen/src/jepsen/memgraph/nemesis.clj @@ -1,7 +1,7 @@ (ns jepsen.memgraph.nemesis "Memgraph nemesis" (:require [jepsen [nemesis :as nemesis] - [generator :as gen]] + [generator :as gen]] [jepsen.memgraph.support :as s])) (defn node-killer @@ -16,10 +16,10 @@ "Can kill and restart all processess and initiate network partitions." [opts] (nemesis/compose - {{:kill-node :start - :restart-node :stop} (node-killer) - {:start-partition-halves :start - :stop-partition-halves :stop} (nemesis/partition-random-halves)})) + {{:kill-node :start + :restart-node :stop} (node-killer) + {:start-partition-halves :start + :stop-partition-halves :stop} (nemesis/partition-random-halves)})) (defn op "Construct a nemesis op" @@ -36,7 +36,7 @@ (apply concat) gen/mix (gen/stagger (:interval opts)) - (gen/phases (gen/sleep 10)))) + (gen/phases (gen/sleep 60)))) (defn nemesis "Composite nemesis and generator" diff --git a/tests/jepsen/src/jepsen/memgraph/support.clj b/tests/jepsen/src/jepsen/memgraph/support.clj index 42e5f4ab8..c55e0f878 100644 --- a/tests/jepsen/src/jepsen/memgraph/support.clj +++ b/tests/jepsen/src/jepsen/memgraph/support.clj @@ -2,8 +2,8 @@ (:require [clojure.string :as str] [clojure.tools.logging :refer [info]] [jepsen [db :as db] - [control :as c] - [util :as util :refer [meh]]] + [control :as c] + [util :as util :refer [meh]]] [jepsen.control.util :as cu] [jepsen.os.debian :as debian])) @@ -44,7 +44,7 @@ (throw (Exception. (str local-binary " is not there."))))) (info node "Memgraph binary is there" local-binary) (start-node! test node) - (Thread/sleep 2000))) + (Thread/sleep 5000))) ;; TODO(gitbuda): The sleep after Jepsen starting Memgraph is for sure questionable. (teardown! [_ test node] (info node "Tearing down Memgraph") (stop-node! test node)