Improve e2e and replication testing setup ()

* Add `--replication-restore-state-on-startup` with `false` as default

Co-authored-by: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io>
Co-authored-by: Andi Skrgat <andi8647@gmail.com>
This commit is contained in:
Marko Budiselić 2023-07-19 21:18:43 +02:00 committed by GitHub
parent 9d056e7649
commit 3b9133fd5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 376 additions and 182 deletions

View File

@ -266,12 +266,11 @@ jobs:
- name: Run e2e tests - name: Run e2e tests
run: | run: |
# TODO(gitbuda): Setup mgclient and pymgclient properly.
cd tests cd tests
./setup.sh ./setup.sh
source ve3/bin/activate source ve3/bin/activate
cd e2e cd e2e
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . ./run.sh
- name: Run stress test (plain) - name: Run stress test (plain)
run: | run: |
@ -293,7 +292,6 @@ jobs:
run: | run: |
# Activate toolchain. # Activate toolchain.
source /opt/toolchain-v4/activate source /opt/toolchain-v4/activate
cd build cd build
# create mgconsole # create mgconsole
@ -340,10 +338,8 @@ jobs:
run: | run: |
# Activate toolchain. # Activate toolchain.
source /opt/toolchain-v4/activate source /opt/toolchain-v4/activate
# Initialize dependencies. # Initialize dependencies.
./init ./init
# Build only memgraph release binarie. # Build only memgraph release binarie.
cd build cd build
cmake -DCMAKE_BUILD_TYPE=release .. cmake -DCMAKE_BUILD_TYPE=release ..
@ -352,7 +348,7 @@ jobs:
- name: Run Jepsen tests - name: Run Jepsen tests
run: | run: |
cd tests/jepsen cd tests/jepsen
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
- name: Save Jepsen report - name: Save Jepsen report
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3

View File

@ -265,12 +265,11 @@ jobs:
- name: Run e2e tests - name: Run e2e tests
run: | run: |
# TODO(gitbuda): Setup mgclient and pymgclient properly.
cd tests cd tests
./setup.sh ./setup.sh
source ve3/bin/activate source ve3/bin/activate
cd e2e cd e2e
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . ./run.sh
- name: Run stress test (plain) - name: Run stress test (plain)
run: | run: |

View File

@ -264,12 +264,11 @@ jobs:
- name: Run e2e tests - name: Run e2e tests
run: | run: |
# TODO(gitbuda): Setup mgclient and pymgclient properly.
cd tests cd tests
./setup.sh ./setup.sh
source ve3/bin/activate source ve3/bin/activate
cd e2e cd e2e
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . ./run.sh
- name: Run stress test (plain) - name: Run stress test (plain)
run: | run: |
@ -319,10 +318,8 @@ jobs:
run: | run: |
# Activate toolchain. # Activate toolchain.
source /opt/toolchain-v4/activate source /opt/toolchain-v4/activate
# Initialize dependencies. # Initialize dependencies.
./init ./init
# Build only memgraph release binary. # Build only memgraph release binary.
cd build cd build
cmake -DCMAKE_BUILD_TYPE=release .. cmake -DCMAKE_BUILD_TYPE=release ..
@ -331,7 +328,7 @@ jobs:
- name: Run Jepsen tests - name: Run Jepsen tests
run: | run: |
cd tests/jepsen cd tests/jepsen
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs ./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
- name: Save Jepsen report - name: Save Jepsen report
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3

View File

@ -264,12 +264,11 @@ jobs:
- name: Run e2e tests - name: Run e2e tests
run: | run: |
# TODO(gitbuda): Setup mgclient and pymgclient properly.
cd tests cd tests
./setup.sh ./setup.sh
source ve3/bin/activate source ve3/bin/activate
cd e2e cd e2e
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . ./run.sh
- name: Run stress test (plain) - name: Run stress test (plain)
run: | run: |

View File

@ -261,6 +261,8 @@ DEFINE_double(query_execution_timeout_sec, 600,
DEFINE_uint64(replication_replica_check_frequency_sec, 1, DEFINE_uint64(replication_replica_check_frequency_sec, 1,
"The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: " "The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: "
"The MAIN instance allocates a new thread for each REPLICA."); "The MAIN instance allocates a new thread for each REPLICA.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_bool(replication_restore_state_on_startup, false, "Restore replication state on startup, e.g. recover replica");
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_uint64( DEFINE_uint64(
@ -891,7 +893,7 @@ int main(int argc, char **argv) {
.wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib, .wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib,
.wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx, .wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx,
.snapshot_on_exit = FLAGS_storage_snapshot_on_exit, .snapshot_on_exit = FLAGS_storage_snapshot_on_exit,
.restore_replication_state_on_startup = true, .restore_replication_state_on_startup = FLAGS_replication_restore_state_on_startup,
.items_per_batch = FLAGS_storage_items_per_batch, .items_per_batch = FLAGS_storage_items_per_batch,
.recovery_thread_count = FLAGS_storage_recovery_thread_count, .recovery_thread_count = FLAGS_storage_recovery_thread_count,
.allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery}, .allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery},

View File

@ -162,7 +162,7 @@ InMemoryStorage::InMemoryStorage(Config config)
} }
} else { } else {
spdlog::warn( spdlog::warn(
"Replicastion configuration will NOT be stored. When the server restarts, replication state will be " "Replication configuration will NOT be stored. When the server restarts, replication state will be "
"forgotten."); "forgotten.");
} }

View File

@ -350,7 +350,7 @@ uint64_t InMemoryStorage::ReplicationClient::ReplicateCurrentWal() {
/// transactions while Snapshots contain all the data. For that reason we prefer /// transactions while Snapshots contain all the data. For that reason we prefer
/// WALs as much as possible. As the WAL file that is currently being updated /// WALs as much as possible. As the WAL file that is currently being updated
/// can change during the process we ignore it as much as possible. Also, it /// can change during the process we ignore it as much as possible. Also, it
/// uses the transaction lock so lokcing it can be really expensive. After we /// uses the transaction lock so locking it can be really expensive. After we
/// fetch the list of finalized WALs, we try to find the longest chain of /// fetch the list of finalized WALs, we try to find the longest chain of
/// sequential WALs, starting from the latest one, that will update the recovery /// sequential WALs, starting from the latest one, that will update the recovery
/// with the all missed updates. If the WAL chain cannot be created, replica is /// with the all missed updates. If the WAL chain cannot be created, replica is

13
tests/e2e/README.md Normal file
View File

@ -0,0 +1,13 @@
# tests/e2e
Framework to run end-to-end tests against Memgraph.
## Notes
* If you change something under this directory and below (even a Python
script), `make` has to be run again because all tests are copied to the build
directory and executed from there.
* Use/extend `run.sh` if you run any e2e tests:
* if all tests have to executed, use `run.sh`
* if a suite of tests have to be execute, take a look under `run.sh` how to do so
* if only a single test have to be execute, take a look at each individual binary/script, it's possible to manually pick the test

View File

@ -187,4 +187,9 @@ startup_config_dict = {
"Path to cypherl file that is used for configuring users and database schema before server starts.", "Path to cypherl file that is used for configuring users and database schema before server starts.",
), ),
"init_data_file": ("", "", "Path to cypherl file that is used for creating data after server starts."), "init_data_file": ("", "", "Path to cypherl file that is used for creating data after server starts."),
"replication_restore_state_on_startup": (
"false",
"false",
"Restore replication state on startup, e.g. recover replica",
),
} }

View File

@ -33,13 +33,11 @@
import atexit import atexit
import logging import logging
import os import os
import subprocess
import sys import sys
import tempfile import tempfile
import time import time
from argparse import ArgumentParser from argparse import ArgumentParser
from inspect import signature from inspect import signature
from pathlib import Path
import yaml import yaml
@ -77,9 +75,9 @@ ACTIONS = {
"info": lambda context: info(context), "info": lambda context: info(context),
"stop": lambda context, name: stop(context, name), "stop": lambda context, name: stop(context, name),
"start": lambda context, name: start(context, name), "start": lambda context, name: start(context, name),
"sleep": lambda context, delta: time.sleep(float(delta)), "sleep": lambda _, delta: time.sleep(float(delta)),
"exit": lambda context: sys.exit(1), "exit": lambda _: sys.exit(1),
"quit": lambda context: sys.exit(1), "quit": lambda _: sys.exit(1),
} }
log = logging.getLogger("memgraph.tests.e2e") log = logging.getLogger("memgraph.tests.e2e")

View File

@ -13,7 +13,6 @@ import copy
import os import os
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
import mgclient import mgclient

View File

@ -147,27 +147,33 @@ def test_basic_recovery(connection):
data_directory = tempfile.TemporaryDirectory() data_directory = tempfile.TemporaryDirectory()
CONFIGURATION = { CONFIGURATION = {
"replica_1": { "replica_1": {
"args": ["--bolt-port", "7688", "--log-level=TRACE"], "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica1.log", "log_file": "replica1.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
}, },
"replica_2": { "replica_2": {
"args": ["--bolt-port", "7689", "--log-level=TRACE"], "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica2.log", "log_file": "replica2.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
}, },
"replica_3": { "replica_3": {
"args": ["--bolt-port", "7690", "--log-level=TRACE"], "args": ["--bolt-port", "7690", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica3.log", "log_file": "replica3.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"],
}, },
"replica_4": { "replica_4": {
"args": ["--bolt-port", "7691", "--log-level=TRACE"], "args": ["--bolt-port", "7691", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica4.log", "log_file": "replica4.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10004;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10004;"],
}, },
"main": { "main": {
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], "args": [
"--bolt-port",
"7687",
"--log-level=TRACE",
"--storage-recover-on-startup=true",
"--replication-restore-state-on-startup=true",
],
"log_file": "main.log", "log_file": "main.log",
"setup_queries": [], "setup_queries": [],
"data_directory": f"{data_directory.name}", "data_directory": f"{data_directory.name}",
@ -359,13 +365,19 @@ def test_replication_role_recovery(connection):
data_directory = tempfile.TemporaryDirectory() data_directory = tempfile.TemporaryDirectory()
CONFIGURATION = { CONFIGURATION = {
"replica": { "replica": {
"args": ["--bolt-port", "7688", "--log-level=TRACE"], "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica.log", "log_file": "replica.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
"data_directory": f"{data_directory.name}/replica", "data_directory": f"{data_directory.name}/replica",
}, },
"main": { "main": {
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], "args": [
"--bolt-port",
"7687",
"--log-level=TRACE",
"--storage-recover-on-startup=true",
"--replication-restore-state-on-startup=true",
],
"log_file": "main.log", "log_file": "main.log",
"setup_queries": [], "setup_queries": [],
"data_directory": f"{data_directory.name}/main", "data_directory": f"{data_directory.name}/main",
@ -381,13 +393,19 @@ def test_replication_role_recovery(connection):
# When we restart the replica, it does not need this query anymore since it needs to remember state # When we restart the replica, it does not need this query anymore since it needs to remember state
CONFIGURATION = { CONFIGURATION = {
"replica": { "replica": {
"args": ["--bolt-port", "7688", "--log-level=TRACE"], "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica.log", "log_file": "replica.log",
"setup_queries": [], "setup_queries": [],
"data_directory": f"{data_directory.name}/replica", "data_directory": f"{data_directory.name}/replica",
}, },
"main": { "main": {
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], "args": [
"--bolt-port",
"7687",
"--log-level=TRACE",
"--storage-recover-on-startup=true",
"--replication-restore-state-on-startup=true",
],
"log_file": "main.log", "log_file": "main.log",
"setup_queries": [], "setup_queries": [],
"data_directory": f"{data_directory.name}/main", "data_directory": f"{data_directory.name}/main",
@ -511,17 +529,23 @@ def test_basic_recovery_when_replica_is_kill_when_main_is_down():
data_directory = tempfile.TemporaryDirectory() data_directory = tempfile.TemporaryDirectory()
CONFIGURATION = { CONFIGURATION = {
"replica_1": { "replica_1": {
"args": ["--bolt-port", "7688", "--log-level=TRACE"], "args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica1.log", "log_file": "replica1.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
}, },
"replica_2": { "replica_2": {
"args": ["--bolt-port", "7689", "--log-level=TRACE"], "args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
"log_file": "replica2.log", "log_file": "replica2.log",
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
}, },
"main": { "main": {
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], "args": [
"--bolt-port",
"7687",
"--log-level=TRACE",
"--storage-recover-on-startup=true",
"--replication-restore-state-on-startup=true",
],
"log_file": "main.log", "log_file": "main.log",
"setup_queries": [], "setup_queries": [],
"data_directory": f"{data_directory.name}", "data_directory": f"{data_directory.name}",

37
tests/e2e/run.sh Executable file
View File

@ -0,0 +1,37 @@
#!/bin/bash
# TODO(gitbuda): Setup mgclient and pymgclient properly.
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib
print_help() {
echo -e "$0 ["workload name string"]"
echo -e ""
echo -e " NOTE: some tests require enterprise licence key,"
echo -e " to run those define the folowing env vars:"
echo -e " * MEMGRAPH_ORGANIZATION_NAME"
echo -e " * MEMGRAPH_ENTERPRISE_LICENSE"
exit 1
}
check_license() {
if [ ! -v MEMGRAPH_ORGANIZATION_NAME ] || [ ! -v MEMGRAPH_ENTERPRISE_LICENSE ]; then
echo "NOTE: MEMGRAPH_ORGANIZATION_NAME or MEMGRAPH_ENTERPRISE_LICENSE NOT defined -> dependent tests will NOT work"
fi
}
if [ "$#" -eq 0 ]; then
check_license
# NOTE: If you want to run all tests under specific folder/section just
# replace the dot (root directory below) with the folder name, e.g.
# `--workloads-root-directory replication`.
python3 runner.py --workloads-root-directory .
elif [ "$#" -eq 1 ]; then
if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
print_help
fi
check_license
# NOTE: --workload-name comes from each individual folder/section
# workloads.yaml file. E.g. `streams/workloads.yaml` has a list of
# `workloads:` and each workload has it's `-name`.
python3 runner.py --workloads-root-directory . --workload-name "$1"
else
print_help
fi

View File

@ -1,4 +0,0 @@
#!/bin/bash
# TODO: andi as a side project
python3 runner.py --workloads-root-directory disk_storage

View File

@ -5,14 +5,6 @@ test_transaction_queue: &test_transaction_queue
log_file: "transaction_queue.log" log_file: "transaction_queue.log"
setup_queries: [] setup_queries: []
validation_queries: [] validation_queries: []
disk_test_transaction_queue: &disk_test_transaction_queue
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr"]
log_file: "transaction_queue.log"
setup_queries: ["STORAGE MODE ON_DISK_TRANSACTIONAL"]
validation_queries: []
workloads: workloads:
- name: "test-transaction-queue" # should be the same as the python file - name: "test-transaction-queue" # should be the same as the python file
@ -20,8 +12,3 @@ workloads:
proc: "tests/e2e/transaction_queue/procedures/" proc: "tests/e2e/transaction_queue/procedures/"
args: ["transaction_queue/test_transaction_queue.py"] args: ["transaction_queue/test_transaction_queue.py"]
<<: *test_transaction_queue <<: *test_transaction_queue
- name: "test-transaction-queue on disk" # should be the same as the python file
binary: "tests/e2e/pytest_runner.sh"
proc: "tests/e2e/transaction_queue/procedures/"
args: ["transaction_queue/test_transaction_queue.py"]
<<: *disk_test_transaction_queue

View File

@ -2,3 +2,31 @@
NOTE: Jepsen can only connect to the SSH server on the default 22 port. NOTE: Jepsen can only connect to the SSH server on the default 22 port.
`--node` flag only takes the actual address (:port doesn't work). `--node` flag only takes the actual address (:port doesn't work).
Jepsen run under CI:
```
cd tests/jepsen
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
```
Local run of each test (including setup):
```
cd tests/jepsen
./run.sh cluster-up
docker exec -it jepsen-control bash
cd memgraph
lein run test --workload bank --node-configs resources/node-config.edn
lein run test --workload large --node-configs resources/node-config.edn
```
Logs are located under `jepsen-control:/jepsen/memgraph/store`.
If you setup cluster manually go to jepsen-control Docker container and ssh to all cluster nodes to save their host keys in known_hosts.
```
docker exec -it jepsen-control bash
ssh n1 -> yes -> exit
ssh n2 -> yes -> exit
ssh n3 -> yes -> exit
ssh n4 -> yes -> exit
ssh n5 -> yes -> exit
```

View File

@ -0,0 +1,13 @@
diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile
index 6b2d3c0e..195a7a60 100644
--- a/docker/control/Dockerfile
+++ b/docker/control/Dockerfile
@@ -7,7 +7,7 @@ ENV LEIN_ROOT true
# Jepsen dependencies
#
RUN apt-get -y -q update && \
- apt-get install -qy openjdk-17-jdk-headless \
+ apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \
libjna-java \
vim \
emacs \

View File

@ -5,7 +5,10 @@
:url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"} :url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"}
:main jepsen.memgraph.core :main jepsen.memgraph.core
:dependencies [[org.clojure/clojure "1.10.0"] :dependencies [[org.clojure/clojure "1.10.0"]
[jepsen "0.2.1-SNAPSHOT"] ;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions
[jepsen "0.2.4-SNAPSHOT"]
[gorillalabs/neo4j-clj "4.1.0"]] [gorillalabs/neo4j-clj "4.1.0"]]
:profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}} :profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}}
;; required to run 0.3.0
; :aot :all
:repl-options {:init-ns jepsen.memgraph.core}) :repl-options {:init-ns jepsen.memgraph.core})

View File

@ -1,16 +1,18 @@
#!/bin/bash #!/bin/bash
set -Eeuo pipefail set -Eeuo pipefail
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
MEMGRAPH_BINARY_PATH="../../build/memgraph" MEMGRAPH_BINARY_PATH="../../build/memgraph"
# NOTE: On Ubuntu 22.04 0.3.2 uses non-existing docker compose --compatibility flag. # NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ...
# NOTE: On Ubuntu 22.04 0.3.1 seems to be working. # NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag.
JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.0}" # NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable.
# TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0
JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}"
JEPSEN_ACTIVE_NODES_NO=5 JEPSEN_ACTIVE_NODES_NO=5
CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn" CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn"
CONTROL_LEIN_RUN_STDOUT_LOGS=1 CONTROL_LEIN_RUN_STDOUT_LOGS=1
CONTROL_LEIN_RUN_STDERR_LOGS=1 CONTROL_LEIN_RUN_STDERR_LOGS=1
_JEPSEN_RUN_EXIT_STATUS=0
PRINT_CONTEXT() { PRINT_CONTEXT() {
echo -e "MEMGRAPH_BINARY_PATH:\t\t $MEMGRAPH_BINARY_PATH" echo -e "MEMGRAPH_BINARY_PATH:\t\t $MEMGRAPH_BINARY_PATH"
echo -e "JEPSEN_VERSION:\t\t\t $JEPSEN_VERSION" echo -e "JEPSEN_VERSION:\t\t\t $JEPSEN_VERSION"
@ -22,7 +24,7 @@ PRINT_CONTEXT() {
HELP_EXIT() { HELP_EXIT() {
echo "" echo ""
echo "HELP: $0 help|cluster-up|test [args]" echo "HELP: $0 help|cluster-up|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]"
echo "" echo ""
echo " test args --binary MEMGRAPH_BINARY_PATH" echo " test args --binary MEMGRAPH_BINARY_PATH"
echo " --ignore-run-stdout-logs Ignore lein run stdout logs." echo " --ignore-run-stdout-logs Ignore lein run stdout logs."
@ -45,35 +47,23 @@ if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/nu
ERROR "docker and docker-compose have to be installed." ERROR "docker and docker-compose have to be installed."
exit 1 exit 1
fi fi
PRINT_CONTEXT
if [ ! -d "$script_dir/jepsen" ]; then if [ ! -d "$script_dir/jepsen" ]; then
git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen" git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen"
if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then
if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then
cd "$script_dir/jepsen"
git apply "$script_dir/jepsen_0.3.0.patch"
cd "$script_dir"
fi
fi
fi fi
if [ "$#" -lt 1 ]; then if [ "$#" -lt 1 ]; then
HELP_EXIT HELP_EXIT
fi fi
# Initialize testing context by copying source/binary files. Inside CI, PROCESS_ARGS() {
# Memgraph is tested on a single machine cluster based on Docker containers.
# Once these tests will be part of the official Jepsen repo, the majority of
# functionalities inside this script won't be needed because each node clones
# the public repo.
case $1 in
help)
HELP_EXIT
;;
# Start Jepsen Docker cluster of 5 nodes. To configure the cluster please
# take a look under jepsen/docker/docker-compose.yml.
# NOTE: If you delete the jepsen folder where docker config is located,
# the current cluster is broken because it relies on the folder. That can
# happen easiliy because the jepsen folder is git ignored.
cluster-up)
"$script_dir/jepsen/docker/bin/up" --daemon
;;
# Run tests against the specified Memgraph binary.
test)
shift shift
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
key="$1" key="$1"
@ -107,7 +97,9 @@ case $1 in
;; ;;
esac esac
done done
}
COPY_BINARIES() {
# Copy Memgraph binary, handles both cases, when binary is a sym link # Copy Memgraph binary, handles both cases, when binary is a sym link
# or a regular file. # or a regular file.
binary_path="$MEMGRAPH_BINARY_PATH" binary_path="$MEMGRAPH_BINARY_PATH"
@ -129,21 +121,21 @@ case $1 in
$docker_exec "touch /opt/memgraph/memgraph.log" $docker_exec "touch /opt/memgraph/memgraph.log"
INFO "Copying $binary_name to $jepsen_node_name DONE." INFO "Copying $binary_name to $jepsen_node_name DONE."
done done
# Copy test files into the control node. # Copy test files into the control node.
docker exec jepsen-control mkdir -p /jepsen/memgraph docker exec jepsen-control mkdir -p /jepsen/memgraph/store
docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/ docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/
docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/ docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/
docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/ docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/
docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj
INFO "Copying test files to jepsen-control DONE." INFO "Copying test files to jepsen-control DONE."
}
start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z" RUN_JEPSEN() {
__control_lein_run_args="$1"
# Run the test.
# NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY. # NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY.
# NOTE: ~/.bashrc has to be manually sourced when bash -c is used # NOTE: ~/.bashrc has to be manually sourced when bash -c is used
# because some Jepsen config is there. # because some Jepsen config is there.
# To be able to archive the run result even if the run fails.
set +e set +e
if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then
redirect_stdout_logs="/dev/null" redirect_stdout_logs="/dev/null"
@ -155,15 +147,16 @@ case $1 in
else else
redirect_stderr_logs="/dev/stderr" redirect_stderr_logs="/dev/stderr"
fi fi
INFO "Jepsen run in progress... START_TIME: $start_time" docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $__control_lein_run_args" 1> $redirect_stdout_logs 2> $redirect_stderr_logs
docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $CONTROL_LEIN_RUN_ARGS" 1> $redirect_stdout_logs 2> $redirect_stderr_logs _JEPSEN_RUN_EXIT_STATUS=$?
# To be able to archive the run result even if the run fails.
jepsen_run_exit_status=$?
end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
INFO "Jepsen run DONE. END_TIME: $end_time"
set -e set -e
}
# Pack all test workload runs between start and end time. PROCESS_RESULTS() {
start_time="$1"
end_time="$2"
INFO "Process results..."
# Print and pack all test workload runs between start and end time.
all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-) all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-)
all_workload_run_folders="" all_workload_run_folders=""
for workload in $all_workloads; do for workload in $all_workloads; do
@ -179,18 +172,109 @@ case $1 in
if [[ "$time_folder" > "$end_time" ]]; then if [[ "$time_folder" > "$end_time" ]]; then
continue continue
fi fi
INFO "jepsen.log for $workload/$time_folder"
docker exec jepsen-control bash -c "tail -n 50 /jepsen/memgraph/store/$workload/$time_folder/jepsen.log"
all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder" all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder"
done done
done done
INFO "Packing results..."
docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders" docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders"
docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./ docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./
INFO "Test and results packing DONE." INFO "Result processing (printing and packing) DONE."
}
# If the run has failed, this script also has to return non-zero status. # Initialize testing context by copying source/binary files. Inside CI,
if [ "$jepsen_run_exit_status" -ne 0 ]; then # Memgraph is tested on a single machine cluster based on Docker containers.
exit "$jepsen_run_exit_status" # Once these tests will be part of the official Jepsen repo, the majority of
# functionalities inside this script won't be needed because each node clones
# the public repo.
case $1 in
# Start Jepsen Docker cluster of 5 nodes. To configure the cluster please
# take a look under jepsen/docker/docker-compose.yml.
# NOTE: If you delete the jepsen folder where docker config is located,
# the current cluster is broken because it relies on the folder. That can
# happen easiliy because the jepsen folder is git ignored.
cluster-up)
PRINT_CONTEXT
"$script_dir/jepsen/docker/bin/up" --daemon
;;
cluster-cleanup)
jepsen_control_exec="docker exec jepsen-control bash -c"
INFO "Deleting /jepsen/memgraph/store/* on jepsen-control"
$jepsen_control_exec "rm -rf /jepsen/memgraph/store/*"
for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do
jepsen_node_name="jepsen-n$iter"
jepsen_node_exec="docker exec $jepsen_node_name bash -c"
INFO "Deleting /opt/memgraph/* on $jepsen_node_name"
$jepsen_node_exec "rm -rf /opt/memgraph/*"
done
;;
cluster-dealloc)
ps=$(docker ps --filter name=jepsen* --filter status=running -q)
if [[ ! -z ${ps} ]]; then
echo "Killing ${ps}"
docker rm -f ${ps}
imgs=$(docker images "jepsen*" -q)
if [[ ! -z ${imgs} ]]; then
echo "Removing ${imgs}"
docker images "jepsen*" -q | xargs docker image rmi -f
else
echo "No Jepsen images detected!"
fi
else
echo "No Jepsen containers detected!"
fi fi
;; ;;
mgbuild)
PRINT_CONTEXT
echo ""
echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder"
exit 1
;;
test)
PROCESS_ARGS "$@"
PRINT_CONTEXT
COPY_BINARIES
start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
INFO "Jepsen run in progress... START_TIME: $start_time"
RUN_JEPSEN "$CONTROL_LEIN_RUN_ARGS"
end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
INFO "Jepsen run DONE. END_TIME: $end_time"
PROCESS_RESULTS "$start_time" "$end_time"
# Exit if the jepsen run status is not 0
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
ERROR "Jepsen FAILED" # important for the coder
exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
fi
;;
test-all-individually)
PROCESS_ARGS "$@"
PRINT_CONTEXT
INFO "NOTE: CONTROL_LEIN_RUN_ARGS ignored"
COPY_BINARIES
start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
INFO "Jepsen run in progress... START_TIME: $start_time"
for workload in "bank" "large"; do
RUN_JEPSEN "test --workload $workload --node-configs resources/node-config.edn"
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
break
fi
done
end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
INFO "Jepsen run DONE. END_TIME: $end_time"
PROCESS_RESULTS "$start_time" "$end_time"
# Exit if the jepsen run status is not 0
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
ERROR "Jepsen FAILED" # important for the coder
exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
fi
;;
*) *)
HELP_EXIT HELP_EXIT
;; ;;

View File

@ -146,9 +146,23 @@
["-w" "--workload NAME" "Test workload to run" ["-w" "--workload NAME" "Test workload to run"
:parse-fn keyword :parse-fn keyword
:validate [workloads (cli/one-of workloads)]] :validate [workloads (cli/one-of workloads)]]
[nil "--node-configs PATH" "Path to the node configuration file." [nil "--node-configs PATH" "Path to a file containing a list of node config."
:parse-fn #(-> % e/load-configuration)]]) :parse-fn #(-> % e/load-configuration)]])
(defn single-test
"Takes base CLI options and constructs a single test."
[opts]
(let [workload (if (:workload opts)
(:workload opts)
(throw (Exception. "Workload undefined")))
node-config (if (:node-configs opts)
(first (merge-node-configurations (:nodes opts) (list (first (:node-configs opts)))))
(throw (Exception. "Node configs undefined")))
test-opts (assoc opts
:node-config node-config
:workload workload)]
(memgraph-test test-opts)))
(defn all-tests (defn all-tests
"Takes base CLI options and constructs a sequence of test options." "Takes base CLI options and constructs a sequence of test options."
[opts] [opts]
@ -169,7 +183,7 @@
[& args] [& args]
(cli/run! (merge (cli/test-all-cmd {:tests-fn all-tests (cli/run! (merge (cli/test-all-cmd {:tests-fn all-tests
:opt-spec cli-opts}) :opt-spec cli-opts})
(cli/single-test-cmd {:test-fn memgraph-test (cli/single-test-cmd {:test-fn single-test
:opt-spec cli-opts}) :opt-spec cli-opts})
(cli/serve-cmd)) (cli/serve-cmd))
args)) args))

View File

@ -36,7 +36,7 @@
(apply concat) (apply concat)
gen/mix gen/mix
(gen/stagger (:interval opts)) (gen/stagger (:interval opts))
(gen/phases (gen/sleep 10)))) (gen/phases (gen/sleep 60))))
(defn nemesis (defn nemesis
"Composite nemesis and generator" "Composite nemesis and generator"

View File

@ -44,7 +44,7 @@
(throw (Exception. (str local-binary " is not there."))))) (throw (Exception. (str local-binary " is not there.")))))
(info node "Memgraph binary is there" local-binary) (info node "Memgraph binary is there" local-binary)
(start-node! test node) (start-node! test node)
(Thread/sleep 2000))) (Thread/sleep 5000))) ;; TODO(gitbuda): The sleep after Jepsen starting Memgraph is for sure questionable.
(teardown! [_ test node] (teardown! [_ test node]
(info node "Tearing down Memgraph") (info node "Tearing down Memgraph")
(stop-node! test node) (stop-node! test node)