Improve e2e and replication testing setup (#1061)

* Add `--replication-restore-state-on-startup` with `false` as default Co-authored-by: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io> Co-authored-by: Andi Skrgat <andi8647@gmail.com>
2023-07-19 21:18:43 +02:00 · 2023-07-19 21:18:43 +02:00 · 3b9133fd5a
commit 3b9133fd5a
parent 9d056e7649
22 changed files with 376 additions and 182 deletions
--- a/.github/workflows/diff.yaml
+++ b/.github/workflows/diff.yaml
@ -266,12 +266,11 @@ jobs:
      - name: Run e2e tests
        run: |
          # TODO(gitbuda): Setup mgclient and pymgclient properly.
          cd tests
          ./setup.sh
          source ve3/bin/activate
          cd e2e
-          LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
+          ./run.sh
      - name: Run stress test (plain)
        run: |
@ -293,7 +292,6 @@ jobs:
        run: |
          # Activate toolchain.
          source /opt/toolchain-v4/activate
          cd build
          # create mgconsole
@ -340,10 +338,8 @@ jobs:
        run: |
          # Activate toolchain.
          source /opt/toolchain-v4/activate
          # Initialize dependencies.
          ./init
          # Build only memgraph release binarie.
          cd build
          cmake -DCMAKE_BUILD_TYPE=release ..
@ -352,7 +348,7 @@ jobs:
      - name: Run Jepsen tests
        run: |
          cd tests/jepsen
-          ./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
+          ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
      - name: Save Jepsen report
        uses: actions/upload-artifact@v3
--- a/.github/workflows/release_centos8.yaml
+++ b/.github/workflows/release_centos8.yaml
@ -265,12 +265,11 @@ jobs:
      - name: Run e2e tests
        run: |
          # TODO(gitbuda): Setup mgclient and pymgclient properly.
          cd tests
          ./setup.sh
          source ve3/bin/activate
          cd e2e
-          LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
+          ./run.sh
      - name: Run stress test (plain)
        run: |
--- a/.github/workflows/release_debian10.yaml
+++ b/.github/workflows/release_debian10.yaml
@ -264,12 +264,11 @@ jobs:
      - name: Run e2e tests
        run: |
          # TODO(gitbuda): Setup mgclient and pymgclient properly.
          cd tests
          ./setup.sh
          source ve3/bin/activate
          cd e2e
-          LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
+          ./run.sh
      - name: Run stress test (plain)
        run: |
@ -319,10 +318,8 @@ jobs:
        run: |
          # Activate toolchain.
          source /opt/toolchain-v4/activate
          # Initialize dependencies.
          ./init
          # Build only memgraph release binary.
          cd build
          cmake -DCMAKE_BUILD_TYPE=release ..
@ -331,7 +328,7 @@ jobs:
      - name: Run Jepsen tests
        run: |
          cd tests/jepsen
-          ./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
+          ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
      - name: Save Jepsen report
        uses: actions/upload-artifact@v3
--- a/.github/workflows/release_ubuntu2004.yaml
+++ b/.github/workflows/release_ubuntu2004.yaml
@ -264,12 +264,11 @@ jobs:
      - name: Run e2e tests
        run: |
          # TODO(gitbuda): Setup mgclient and pymgclient properly.
          cd tests
          ./setup.sh
          source ve3/bin/activate
          cd e2e
-          LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
+          ./run.sh
      - name: Run stress test (plain)
        run: |
--- a/src/memgraph.cpp
+++ b/src/memgraph.cpp
@ -261,6 +261,8 @@ DEFINE_double(query_execution_timeout_sec, 600,
 DEFINE_uint64(replication_replica_check_frequency_sec, 1,
              "The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: "
              "The MAIN instance allocates a new thread for each REPLICA.");
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 DEFINE_bool(replication_restore_state_on_startup, false, "Restore replication state on startup, e.g. recover replica");
 // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
 DEFINE_uint64(
@ -891,7 +893,7 @@ int main(int argc, char **argv) {
                     .wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib,
                     .wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx,
                     .snapshot_on_exit = FLAGS_storage_snapshot_on_exit,
-                     .restore_replication_state_on_startup = true,
+                     .restore_replication_state_on_startup = FLAGS_replication_restore_state_on_startup,
                     .items_per_batch = FLAGS_storage_items_per_batch,
                     .recovery_thread_count = FLAGS_storage_recovery_thread_count,
                     .allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery},
--- a/src/storage/v2/inmemory/storage.cpp
+++ b/src/storage/v2/inmemory/storage.cpp
@ -162,7 +162,7 @@ InMemoryStorage::InMemoryStorage(Config config)
    }
  } else {
    spdlog::warn(
-        "Replicastion configuration will NOT be stored. When the server restarts, replication state will be "
+        "Replication configuration will NOT be stored. When the server restarts, replication state will be "
        "forgotten.");
  }
--- a/src/storage/v2/replication/replication_client.cpp
+++ b/src/storage/v2/replication/replication_client.cpp
@ -350,7 +350,7 @@ uint64_t InMemoryStorage::ReplicationClient::ReplicateCurrentWal() {
 /// transactions while Snapshots contain all the data. For that reason we prefer
 /// WALs as much as possible. As the WAL file that is currently being updated
 /// can change during the process we ignore it as much as possible. Also, it
-/// uses the transaction lock so lokcing it can be really expensive. After we
+/// uses the transaction lock so locking it can be really expensive. After we
 /// fetch the list of finalized WALs, we try to find the longest chain of
 /// sequential WALs, starting from the latest one, that will update the recovery
 /// with the all missed updates. If the WAL chain cannot be created, replica is
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@ -0,0 +1,13 @@
 # tests/e2e
 Framework to run end-to-end tests against Memgraph.
 ## Notes
 * If you change something under this directory and below (even a Python
  script), `make` has to be run again because all tests are copied to the build
  directory and executed from there.
 * Use/extend `run.sh` if you run any e2e tests:
  * if all tests have to executed, use `run.sh`
  * if a suite of tests have to be execute, take a look under `run.sh` how to do so
  * if only a single test have to be execute, take a look at each individual binary/script, it's possible to manually pick the test
--- a/tests/e2e/configuration/default_config.py
+++ b/tests/e2e/configuration/default_config.py
@ -187,4 +187,9 @@ startup_config_dict = {
        "Path to cypherl file that is used for configuring users and database schema before server starts.",
    ),
    "init_data_file": ("", "", "Path to cypherl file that is used for creating data after server starts."),
    "replication_restore_state_on_startup": (
        "false",
        "false",
        "Restore replication state on startup, e.g. recover replica",
    ),
 }
--- a/tests/e2e/interactive_mg_runner.py
+++ b/tests/e2e/interactive_mg_runner.py
@ -33,13 +33,11 @@
 import atexit
 import logging
 import os
 import subprocess
 import sys
 import tempfile
 import time
 from argparse import ArgumentParser
 from inspect import signature
 from pathlib import Path
 import yaml
@ -77,9 +75,9 @@ ACTIONS = {
    "info": lambda context: info(context),
    "stop": lambda context, name: stop(context, name),
    "start": lambda context, name: start(context, name),
-    "sleep": lambda context, delta: time.sleep(float(delta)),
+    "sleep": lambda _, delta: time.sleep(float(delta)),
-    "exit": lambda context: sys.exit(1),
+    "exit": lambda _: sys.exit(1),
-    "quit": lambda context: sys.exit(1),
+    "quit": lambda _: sys.exit(1),
 }
 log = logging.getLogger("memgraph.tests.e2e")
--- a/tests/e2e/memgraph.py
+++ b/tests/e2e/memgraph.py
@ -13,7 +13,6 @@ import copy
 import os
 import subprocess
 import sys
 import tempfile
 import time
 import mgclient
--- a/tests/e2e/replication/show_while_creating_invalid_state.py
+++ b/tests/e2e/replication/show_while_creating_invalid_state.py
@ -147,27 +147,33 @@ def test_basic_recovery(connection):
    data_directory = tempfile.TemporaryDirectory()
    CONFIGURATION = {
        "replica_1": {
-            "args": ["--bolt-port", "7688", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica1.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
        },
        "replica_2": {
-            "args": ["--bolt-port", "7689", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica2.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
        },
        "replica_3": {
-            "args": ["--bolt-port", "7690", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7690", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica3.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"],
        },
        "replica_4": {
-            "args": ["--bolt-port", "7691", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7691", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica4.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10004;"],
        },
        "main": {
-            "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
+            "args": [
                "--bolt-port",
                "7687",
                "--log-level=TRACE",
                "--storage-recover-on-startup=true",
                "--replication-restore-state-on-startup=true",
            ],
            "log_file": "main.log",
            "setup_queries": [],
            "data_directory": f"{data_directory.name}",
@ -359,13 +365,19 @@ def test_replication_role_recovery(connection):
    data_directory = tempfile.TemporaryDirectory()
    CONFIGURATION = {
        "replica": {
-            "args": ["--bolt-port", "7688", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
            "data_directory": f"{data_directory.name}/replica",
        },
        "main": {
-            "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
+            "args": [
                "--bolt-port",
                "7687",
                "--log-level=TRACE",
                "--storage-recover-on-startup=true",
                "--replication-restore-state-on-startup=true",
            ],
            "log_file": "main.log",
            "setup_queries": [],
            "data_directory": f"{data_directory.name}/main",
@ -381,13 +393,19 @@ def test_replication_role_recovery(connection):
    # When we restart the replica, it does not need this query anymore since it needs to remember state
    CONFIGURATION = {
        "replica": {
-            "args": ["--bolt-port", "7688", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica.log",
            "setup_queries": [],
            "data_directory": f"{data_directory.name}/replica",
        },
        "main": {
-            "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
+            "args": [
                "--bolt-port",
                "7687",
                "--log-level=TRACE",
                "--storage-recover-on-startup=true",
                "--replication-restore-state-on-startup=true",
            ],
            "log_file": "main.log",
            "setup_queries": [],
            "data_directory": f"{data_directory.name}/main",
@ -511,17 +529,23 @@ def test_basic_recovery_when_replica_is_kill_when_main_is_down():
    data_directory = tempfile.TemporaryDirectory()
    CONFIGURATION = {
        "replica_1": {
-            "args": ["--bolt-port", "7688", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica1.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
        },
        "replica_2": {
-            "args": ["--bolt-port", "7689", "--log-level=TRACE"],
+            "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
            "log_file": "replica2.log",
            "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
        },
        "main": {
-            "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
+            "args": [
                "--bolt-port",
                "7687",
                "--log-level=TRACE",
                "--storage-recover-on-startup=true",
                "--replication-restore-state-on-startup=true",
            ],
            "log_file": "main.log",
            "setup_queries": [],
            "data_directory": f"{data_directory.name}",
--- a/tests/e2e/run.sh
+++ b/tests/e2e/run.sh
@ -0,0 +1,37 @@
 #!/bin/bash
 # TODO(gitbuda): Setup mgclient and pymgclient properly.
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib
 print_help() {
  echo -e "$0 ["workload name string"]"
  echo -e ""
  echo -e "  NOTE: some tests require enterprise licence key,"
  echo -e "        to run those define the folowing env vars:"
  echo -e "          * MEMGRAPH_ORGANIZATION_NAME"
  echo -e "          * MEMGRAPH_ENTERPRISE_LICENSE"
  exit 1
 }
 check_license() {
  if [ ! -v MEMGRAPH_ORGANIZATION_NAME ] || [ ! -v MEMGRAPH_ENTERPRISE_LICENSE ]; then
    echo "NOTE: MEMGRAPH_ORGANIZATION_NAME or MEMGRAPH_ENTERPRISE_LICENSE NOT defined -> dependent tests will NOT work"
  fi
 }
 if [ "$#" -eq 0 ]; then
  check_license
  # NOTE: If you want to run all tests under specific folder/section just
  # replace the dot (root directory below) with the folder name, e.g.
  # `--workloads-root-directory replication`.
  python3 runner.py --workloads-root-directory .
 elif [ "$#" -eq 1 ]; then
  if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
    print_help
  fi
  check_license
  # NOTE: --workload-name comes from each individual folder/section
  # workloads.yaml file. E.g. `streams/workloads.yaml` has a list of
  # `workloads:` and each workload has it's `-name`.
  python3 runner.py --workloads-root-directory . --workload-name "$1"
 else
  print_help
 fi
--- a/tests/e2e/run_e2e.sh
+++ b/tests/e2e/run_e2e.sh
@ -1,4 +0,0 @@
 #!/bin/bash
 # TODO: andi as a side project
 python3 runner.py --workloads-root-directory disk_storage
--- a/tests/e2e/transaction_queue/workloads.yaml
+++ b/tests/e2e/transaction_queue/workloads.yaml
@ -5,14 +5,6 @@ test_transaction_queue: &test_transaction_queue
      log_file: "transaction_queue.log"
      setup_queries: []
      validation_queries: []
 disk_test_transaction_queue: &disk_test_transaction_queue
  cluster:
    main:
      args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr"]
      log_file: "transaction_queue.log"
      setup_queries: ["STORAGE MODE ON_DISK_TRANSACTIONAL"]
      validation_queries: []
 workloads:
  - name: "test-transaction-queue" # should be the same as the python file
@ -20,8 +12,3 @@ workloads:
    proc: "tests/e2e/transaction_queue/procedures/"
    args: ["transaction_queue/test_transaction_queue.py"]
    <<: *test_transaction_queue
  - name: "test-transaction-queue on disk" # should be the same as the python file
    binary: "tests/e2e/pytest_runner.sh"
    proc: "tests/e2e/transaction_queue/procedures/"
    args: ["transaction_queue/test_transaction_queue.py"]
    <<: *disk_test_transaction_queue
--- a/tests/jepsen/README.md
+++ b/tests/jepsen/README.md
@ -2,3 +2,31 @@
 NOTE: Jepsen can only connect to the SSH server on the default 22 port.
 `--node` flag only takes the actual address (:port doesn't work).
 Jepsen run under CI:
 ```
 cd tests/jepsen
 ./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
 ```
 Local run of each test (including setup):
 ```
 cd tests/jepsen
 ./run.sh cluster-up
 docker exec -it jepsen-control bash
 cd memgraph
 lein run test --workload bank --node-configs resources/node-config.edn
 lein run test --workload large --node-configs resources/node-config.edn
 ```
 Logs are located under `jepsen-control:/jepsen/memgraph/store`.
 If you setup cluster manually go to jepsen-control Docker container and ssh to all cluster nodes to save their host keys in known_hosts.
 ```
 docker exec -it jepsen-control bash
 ssh n1 -> yes -> exit
 ssh n2 -> yes -> exit
 ssh n3 -> yes -> exit
 ssh n4 -> yes -> exit
 ssh n5 -> yes -> exit
 ```
--- a/tests/jepsen/jepsen_0.3.0.patch
+++ b/tests/jepsen/jepsen_0.3.0.patch
@ -0,0 +1,13 @@
 diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile
 index 6b2d3c0e..195a7a60 100644
 --- a/docker/control/Dockerfile
 +++ b/docker/control/Dockerfile
@@ -7,7 +7,7 @@ ENV LEIN_ROOT true
 # Jepsen dependencies
 #
 RUN apt-get -y -q update && \
 -    apt-get install -qy openjdk-17-jdk-headless \
 +    apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \
     libjna-java \
     vim \
     emacs \
--- a/tests/jepsen/project.clj
+++ b/tests/jepsen/project.clj
@ -5,7 +5,10 @@
            :url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"}
  :main jepsen.memgraph.core
  :dependencies [[org.clojure/clojure "1.10.0"]
-                 [jepsen "0.2.1-SNAPSHOT"]
+                 ;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions
                 [jepsen "0.2.4-SNAPSHOT"]
                 [gorillalabs/neo4j-clj "4.1.0"]]
  :profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}}
  ;; required to run 0.3.0
  ; :aot :all
  :repl-options {:init-ns jepsen.memgraph.core})
--- a/tests/jepsen/run.sh
+++ b/tests/jepsen/run.sh
@ -1,16 +1,18 @@
 #!/bin/bash
 set -Eeuo pipefail
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 MEMGRAPH_BINARY_PATH="../../build/memgraph"
-# NOTE: On Ubuntu 22.04 0.3.2 uses non-existing docker compose --compatibility flag.
+# NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ...
-# NOTE: On Ubuntu 22.04 0.3.1 seems to be working.
+# NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag.
-JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.0}"
+# NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable.
 # TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0
 JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}"
 JEPSEN_ACTIVE_NODES_NO=5
 CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn"
 CONTROL_LEIN_RUN_STDOUT_LOGS=1
 CONTROL_LEIN_RUN_STDERR_LOGS=1
 _JEPSEN_RUN_EXIT_STATUS=0
 PRINT_CONTEXT() {
    echo -e "MEMGRAPH_BINARY_PATH:\t\t $MEMGRAPH_BINARY_PATH"
    echo -e "JEPSEN_VERSION:\t\t\t $JEPSEN_VERSION"
@ -22,7 +24,7 @@ PRINT_CONTEXT() {
 HELP_EXIT() {
    echo ""
-    echo "HELP: $0 help|cluster-up|test [args]"
+    echo "HELP: $0 help|cluster-up|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]"
    echo ""
    echo "    test args --binary                 MEMGRAPH_BINARY_PATH"
    echo "              --ignore-run-stdout-logs Ignore lein run stdout logs."
@ -45,153 +47,235 @@ if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/nu
  ERROR "docker and docker-compose have to be installed."
  exit 1
 fi
 PRINT_CONTEXT
 if [ ! -d "$script_dir/jepsen" ]; then
    git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen"
    if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then
        if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then
            cd "$script_dir/jepsen"
            git apply "$script_dir/jepsen_0.3.0.patch"
            cd "$script_dir"
        fi
    fi
 fi
 if [ "$#" -lt 1 ]; then
    HELP_EXIT
 fi
 PROCESS_ARGS() {
    shift
    while [[ $# -gt 0 ]]; do
        key="$1"
        case $key in
            --binary)
                shift
                MEMGRAPH_BINARY_PATH="$1"
                shift
            ;;
            --ignore-run-stdout-logs)
                CONTROL_LEIN_RUN_STDOUT_LOGS=0
                shift
            ;;
            --ignore-run-stderr-logs)
                CONTROL_LEIN_RUN_STDERR_LOGS=0
                shift
            ;;
            --nodes-no)
                shift
                JEPSEN_ACTIVE_NODES_NO="$1"
                shift
            ;;
            --run-args)
                shift
                CONTROL_LEIN_RUN_ARGS="$1"
                shift
            ;;
            *)
                ERROR "Unknown option $1."
                HELP_EXIT
            ;;
        esac
    done
 }
 COPY_BINARIES() {
   # Copy Memgraph binary, handles both cases, when binary is a sym link
   # or a regular file.
   binary_path="$MEMGRAPH_BINARY_PATH"
   if [ -L "$binary_path" ]; then
       binary_path=$(readlink "$binary_path")
   fi
   binary_name=$(basename -- "$binary_path")
   for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do
       jepsen_node_name="jepsen-n$iter"
       docker_exec="docker exec $jepsen_node_name bash -c"
       if [ "$binary_name" == "memgraph" ]; then
         _binary_name="memgraph_tmp"
       else
         _binary_name="$binary_name"
       fi
       $docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph"
       docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name"
       $docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph"
       $docker_exec "touch /opt/memgraph/memgraph.log"
       INFO "Copying $binary_name to $jepsen_node_name DONE."
   done
   # Copy test files into the control node.
   docker exec jepsen-control mkdir -p /jepsen/memgraph/store
   docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/
   docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/
   docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/
   docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj
   INFO "Copying test files to jepsen-control DONE."
 }
 RUN_JEPSEN() {
    __control_lein_run_args="$1"
    # NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY.
    # NOTE: ~/.bashrc has to be manually sourced when bash -c is used
    #       because some Jepsen config is there.
    # To be able to archive the run result even if the run fails.
    set +e
    if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then
        redirect_stdout_logs="/dev/null"
    else
        redirect_stdout_logs="/dev/stdout"
    fi
    if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then
        redirect_stderr_logs="/dev/null"
    else
        redirect_stderr_logs="/dev/stderr"
    fi
    docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $__control_lein_run_args" 1> $redirect_stdout_logs 2> $redirect_stderr_logs
    _JEPSEN_RUN_EXIT_STATUS=$?
    set -e
 }
 PROCESS_RESULTS() {
    start_time="$1"
    end_time="$2"
    INFO "Process results..."
    # Print and pack all test workload runs between start and end time.
    all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-)
    all_workload_run_folders=""
    for workload in $all_workloads; do
        for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do
            if [[ "$time_folder" == "latest" ]]; then
                continue
            fi
            # The early continue pattern here is nice because bash doesn't
            # have >= for the string comparison (marginal values).
            if [[ "$time_folder" < "$start_time" ]]; then
                continue
            fi
            if [[ "$time_folder" > "$end_time" ]]; then
                continue
            fi
            INFO "jepsen.log for $workload/$time_folder"
            docker exec jepsen-control bash -c "tail -n 50 /jepsen/memgraph/store/$workload/$time_folder/jepsen.log"
            all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder"
        done
    done
    INFO "Packing results..."
    docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders"
    docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./
    INFO "Result processing (printing and packing) DONE."
 }
 # Initialize testing context by copying source/binary files. Inside CI,
 # Memgraph is tested on a single machine cluster based on Docker containers.
 # Once these tests will be part of the official Jepsen repo, the majority of
 # functionalities inside this script won't be needed because each node clones
 # the public repo.
 case $1 in
    help)
        HELP_EXIT
    ;;
    # Start Jepsen Docker cluster of 5 nodes. To configure the cluster please
    # take a look under jepsen/docker/docker-compose.yml.
    # NOTE: If you delete the jepsen folder where docker config is located,
    # the current cluster is broken because it relies on the folder. That can
    # happen easiliy because the jepsen folder is git ignored.
    cluster-up)
        PRINT_CONTEXT
        "$script_dir/jepsen/docker/bin/up" --daemon
    ;;
    # Run tests against the specified Memgraph binary.
    test)
        shift
        while [[ $# -gt 0 ]]; do
            key="$1"
            case $key in
                --binary)
                    shift
                    MEMGRAPH_BINARY_PATH="$1"
                    shift
                ;;
                --ignore-run-stdout-logs)
                    CONTROL_LEIN_RUN_STDOUT_LOGS=0
                    shift
                ;;
                --ignore-run-stderr-logs)
                    CONTROL_LEIN_RUN_STDERR_LOGS=0
                    shift
                ;;
                --nodes-no)
                    shift
                    JEPSEN_ACTIVE_NODES_NO="$1"
                    shift
                ;;
                --run-args)
                    shift
                    CONTROL_LEIN_RUN_ARGS="$1"
                    shift
                ;;
                *)
                    ERROR "Unknown option $1."
                    HELP_EXIT
                ;;
            esac
        done
-        # Copy Memgraph binary, handles both cases, when binary is a sym link
+    cluster-cleanup)
-        # or a regular file.
+        jepsen_control_exec="docker exec jepsen-control bash -c"
-        binary_path="$MEMGRAPH_BINARY_PATH"
+        INFO "Deleting /jepsen/memgraph/store/* on jepsen-control"
-        if [ -L "$binary_path" ]; then
+        $jepsen_control_exec "rm -rf /jepsen/memgraph/store/*"
            binary_path=$(readlink "$binary_path")
        fi
        binary_name=$(basename -- "$binary_path")
        for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do
            jepsen_node_name="jepsen-n$iter"
-            docker_exec="docker exec $jepsen_node_name bash -c"
+            jepsen_node_exec="docker exec $jepsen_node_name bash -c"
-            if [ "$binary_name" == "memgraph" ]; then
+            INFO "Deleting /opt/memgraph/* on $jepsen_node_name"
-              _binary_name="memgraph_tmp"
+            $jepsen_node_exec "rm -rf /opt/memgraph/*"
            else
              _binary_name="$binary_name"
            fi
            $docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph"
            docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name"
            $docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph"
            $docker_exec "touch /opt/memgraph/memgraph.log"
            INFO "Copying $binary_name to $jepsen_node_name DONE."
        done
    ;;
-        # Copy test files into the control node.
+    cluster-dealloc)
-        docker exec jepsen-control mkdir -p /jepsen/memgraph
+        ps=$(docker ps --filter name=jepsen* --filter status=running -q)
-        docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/
+        if [[ ! -z ${ps} ]]; then
-        docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/
+            echo "Killing ${ps}"
-        docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/
+            docker rm -f ${ps}
-        docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj
+            imgs=$(docker images "jepsen*" -q)
-        INFO "Copying test files to jepsen-control DONE."
+            if [[ ! -z ${imgs} ]]; then
                echo "Removing ${imgs}"
                docker images "jepsen*" -q | xargs docker image rmi -f
            else
                echo "No Jepsen images detected!"
            fi
        else
            echo "No Jepsen containers detected!"
        fi
    ;;
    mgbuild)
        PRINT_CONTEXT
        echo ""
        echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder"
        exit 1
    ;;
    test)
        PROCESS_ARGS "$@"
        PRINT_CONTEXT
        COPY_BINARIES
        start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
        # Run the test.
        # NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY.
        # NOTE: ~/.bashrc has to be manually sourced when bash -c is used
        #       because some Jepsen config is there.
        set +e
        if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then
            redirect_stdout_logs="/dev/null"
        else
            redirect_stdout_logs="/dev/stdout"
        fi
        if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then
            redirect_stderr_logs="/dev/null"
        else
            redirect_stderr_logs="/dev/stderr"
        fi
        INFO "Jepsen run in progress... START_TIME: $start_time"
-        docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $CONTROL_LEIN_RUN_ARGS" 1> $redirect_stdout_logs 2> $redirect_stderr_logs
+        RUN_JEPSEN "$CONTROL_LEIN_RUN_ARGS"
        # To be able to archive the run result even if the run fails.
        jepsen_run_exit_status=$?
        end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
        INFO "Jepsen run DONE. END_TIME: $end_time"
-        set -e
+        PROCESS_RESULTS "$start_time" "$end_time"
-
+        # Exit if the jepsen run status is not 0
-        # Pack all test workload runs between start and end time.
+        if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
-        all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-)
+            ERROR "Jepsen FAILED" # important for the coder
-        all_workload_run_folders=""
+            exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
        for workload in $all_workloads; do
            for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do
                if [[ "$time_folder" == "latest" ]]; then
                    continue
                fi
                # The early continue pattern here is nice because bash doesn't
                # have >= for the string comparison (marginal values).
                if [[ "$time_folder" < "$start_time" ]]; then
                    continue
                fi
                if [[ "$time_folder" > "$end_time" ]]; then
                    continue
                fi
                all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder"
            done
        done
        docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders"
        docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./
        INFO "Test and results packing DONE."
        # If the run has failed, this script also has to return non-zero status.
        if [ "$jepsen_run_exit_status" -ne 0 ]; then
            exit "$jepsen_run_exit_status"
        fi
    ;;
    test-all-individually)
        PROCESS_ARGS "$@"
        PRINT_CONTEXT
        INFO "NOTE: CONTROL_LEIN_RUN_ARGS ignored"
        COPY_BINARIES
        start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
        INFO "Jepsen run in progress... START_TIME: $start_time"
        for workload in "bank" "large"; do
          RUN_JEPSEN "test --workload $workload --node-configs resources/node-config.edn"
          if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
            break
          fi
        done
        end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
        INFO "Jepsen run DONE. END_TIME: $end_time"
        PROCESS_RESULTS "$start_time" "$end_time"
        # Exit if the jepsen run status is not 0
        if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
            ERROR "Jepsen FAILED" # important for the coder
            exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
        fi
    ;;
    *)
-    HELP_EXIT
+        HELP_EXIT
    ;;
 esac
--- a/tests/jepsen/src/jepsen/memgraph/core.clj
+++ b/tests/jepsen/src/jepsen/memgraph/core.clj
@ -146,9 +146,23 @@
   ["-w" "--workload NAME" "Test workload to run"
    :parse-fn keyword
    :validate [workloads (cli/one-of workloads)]]
-   [nil "--node-configs PATH" "Path to the node configuration file."
+   [nil "--node-configs PATH" "Path to a file containing a list of node config."
    :parse-fn #(-> % e/load-configuration)]])
 (defn single-test
  "Takes base CLI options and constructs a single test."
  [opts]
  (let [workload (if (:workload opts)
                   (:workload opts)
                   (throw (Exception. "Workload undefined")))
        node-config (if (:node-configs opts)
                      (first (merge-node-configurations (:nodes opts) (list (first (:node-configs opts)))))
                      (throw (Exception. "Node configs undefined")))
        test-opts (assoc opts
                         :node-config node-config
                         :workload workload)]
    (memgraph-test test-opts)))
 (defn all-tests
  "Takes base CLI options and constructs a sequence of test options."
  [opts]
@ -169,7 +183,7 @@
  [& args]
  (cli/run! (merge (cli/test-all-cmd {:tests-fn all-tests
                                      :opt-spec cli-opts})
-                   (cli/single-test-cmd {:test-fn memgraph-test
+                   (cli/single-test-cmd {:test-fn single-test
                                         :opt-spec cli-opts})
                   (cli/serve-cmd))
            args))
--- a/tests/jepsen/src/jepsen/memgraph/nemesis.clj
+++ b/tests/jepsen/src/jepsen/memgraph/nemesis.clj
@ -1,7 +1,7 @@
 (ns jepsen.memgraph.nemesis
  "Memgraph nemesis"
  (:require [jepsen [nemesis :as nemesis]
-                    [generator :as gen]]
+             [generator :as gen]]
            [jepsen.memgraph.support :as s]))
 (defn node-killer
@ -16,10 +16,10 @@
  "Can kill and restart all processess and initiate network partitions."
  [opts]
  (nemesis/compose
-    {{:kill-node    :start
+   {{:kill-node    :start
-      :restart-node :stop} (node-killer)
+     :restart-node :stop} (node-killer)
-     {:start-partition-halves :start
+    {:start-partition-halves :start
-      :stop-partition-halves  :stop} (nemesis/partition-random-halves)}))
+     :stop-partition-halves  :stop} (nemesis/partition-random-halves)}))
 (defn op
  "Construct a nemesis op"
@ -36,7 +36,7 @@
       (apply concat)
       gen/mix
       (gen/stagger (:interval opts))
-       (gen/phases (gen/sleep 10))))
+       (gen/phases (gen/sleep 60))))
 (defn nemesis
  "Composite nemesis and generator"
--- a/tests/jepsen/src/jepsen/memgraph/support.clj
+++ b/tests/jepsen/src/jepsen/memgraph/support.clj
@ -2,8 +2,8 @@
  (:require [clojure.string :as str]
            [clojure.tools.logging :refer [info]]
            [jepsen [db :as db]
-                    [control :as c]
+             [control :as c]
-                    [util :as util :refer [meh]]]
+             [util :as util :refer [meh]]]
            [jepsen.control.util :as cu]
            [jepsen.os.debian :as debian]))
@ -44,7 +44,7 @@
               (throw (Exception. (str local-binary " is not there.")))))
        (info node "Memgraph binary is there" local-binary)
        (start-node! test node)
-        (Thread/sleep 2000)))
+        (Thread/sleep 5000))) ;; TODO(gitbuda): The sleep after Jepsen starting Memgraph is for sure questionable.
    (teardown! [_ test node]
      (info node "Tearing down Memgraph")
      (stop-node! test node)