Improve e2e and replication testing setup (#1061)
* Add `--replication-restore-state-on-startup` with `false` as default Co-authored-by: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io> Co-authored-by: Andi Skrgat <andi8647@gmail.com>
This commit is contained in:
parent
9d056e7649
commit
3b9133fd5a
8
.github/workflows/diff.yaml
vendored
8
.github/workflows/diff.yaml
vendored
@ -266,12 +266,11 @@ jobs:
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
# TODO(gitbuda): Setup mgclient and pymgclient properly.
|
||||
cd tests
|
||||
./setup.sh
|
||||
source ve3/bin/activate
|
||||
cd e2e
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
|
||||
./run.sh
|
||||
|
||||
- name: Run stress test (plain)
|
||||
run: |
|
||||
@ -293,7 +292,6 @@ jobs:
|
||||
run: |
|
||||
# Activate toolchain.
|
||||
source /opt/toolchain-v4/activate
|
||||
|
||||
cd build
|
||||
|
||||
# create mgconsole
|
||||
@ -340,10 +338,8 @@ jobs:
|
||||
run: |
|
||||
# Activate toolchain.
|
||||
source /opt/toolchain-v4/activate
|
||||
|
||||
# Initialize dependencies.
|
||||
./init
|
||||
|
||||
# Build only memgraph release binarie.
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=release ..
|
||||
@ -352,7 +348,7 @@ jobs:
|
||||
- name: Run Jepsen tests
|
||||
run: |
|
||||
cd tests/jepsen
|
||||
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
|
||||
./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
|
||||
|
||||
- name: Save Jepsen report
|
||||
uses: actions/upload-artifact@v3
|
||||
|
3
.github/workflows/release_centos8.yaml
vendored
3
.github/workflows/release_centos8.yaml
vendored
@ -265,12 +265,11 @@ jobs:
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
# TODO(gitbuda): Setup mgclient and pymgclient properly.
|
||||
cd tests
|
||||
./setup.sh
|
||||
source ve3/bin/activate
|
||||
cd e2e
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
|
||||
./run.sh
|
||||
|
||||
- name: Run stress test (plain)
|
||||
run: |
|
||||
|
7
.github/workflows/release_debian10.yaml
vendored
7
.github/workflows/release_debian10.yaml
vendored
@ -264,12 +264,11 @@ jobs:
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
# TODO(gitbuda): Setup mgclient and pymgclient properly.
|
||||
cd tests
|
||||
./setup.sh
|
||||
source ve3/bin/activate
|
||||
cd e2e
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
|
||||
./run.sh
|
||||
|
||||
- name: Run stress test (plain)
|
||||
run: |
|
||||
@ -319,10 +318,8 @@ jobs:
|
||||
run: |
|
||||
# Activate toolchain.
|
||||
source /opt/toolchain-v4/activate
|
||||
|
||||
# Initialize dependencies.
|
||||
./init
|
||||
|
||||
# Build only memgraph release binary.
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=release ..
|
||||
@ -331,7 +328,7 @@ jobs:
|
||||
- name: Run Jepsen tests
|
||||
run: |
|
||||
cd tests/jepsen
|
||||
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
|
||||
./run.sh test-all-individually --binary ../../build/memgraph --ignore-run-stdout-logs --ignore-run-stderr-logs
|
||||
|
||||
- name: Save Jepsen report
|
||||
uses: actions/upload-artifact@v3
|
||||
|
3
.github/workflows/release_ubuntu2004.yaml
vendored
3
.github/workflows/release_ubuntu2004.yaml
vendored
@ -264,12 +264,11 @@ jobs:
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
# TODO(gitbuda): Setup mgclient and pymgclient properly.
|
||||
cd tests
|
||||
./setup.sh
|
||||
source ve3/bin/activate
|
||||
cd e2e
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory .
|
||||
./run.sh
|
||||
|
||||
- name: Run stress test (plain)
|
||||
run: |
|
||||
|
@ -261,6 +261,8 @@ DEFINE_double(query_execution_timeout_sec, 600,
|
||||
DEFINE_uint64(replication_replica_check_frequency_sec, 1,
|
||||
"The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: "
|
||||
"The MAIN instance allocates a new thread for each REPLICA.");
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
DEFINE_bool(replication_restore_state_on_startup, false, "Restore replication state on startup, e.g. recover replica");
|
||||
|
||||
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
|
||||
DEFINE_uint64(
|
||||
@ -891,7 +893,7 @@ int main(int argc, char **argv) {
|
||||
.wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib,
|
||||
.wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx,
|
||||
.snapshot_on_exit = FLAGS_storage_snapshot_on_exit,
|
||||
.restore_replication_state_on_startup = true,
|
||||
.restore_replication_state_on_startup = FLAGS_replication_restore_state_on_startup,
|
||||
.items_per_batch = FLAGS_storage_items_per_batch,
|
||||
.recovery_thread_count = FLAGS_storage_recovery_thread_count,
|
||||
.allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery},
|
||||
|
@ -162,7 +162,7 @@ InMemoryStorage::InMemoryStorage(Config config)
|
||||
}
|
||||
} else {
|
||||
spdlog::warn(
|
||||
"Replicastion configuration will NOT be stored. When the server restarts, replication state will be "
|
||||
"Replication configuration will NOT be stored. When the server restarts, replication state will be "
|
||||
"forgotten.");
|
||||
}
|
||||
|
||||
|
@ -350,7 +350,7 @@ uint64_t InMemoryStorage::ReplicationClient::ReplicateCurrentWal() {
|
||||
/// transactions while Snapshots contain all the data. For that reason we prefer
|
||||
/// WALs as much as possible. As the WAL file that is currently being updated
|
||||
/// can change during the process we ignore it as much as possible. Also, it
|
||||
/// uses the transaction lock so lokcing it can be really expensive. After we
|
||||
/// uses the transaction lock so locking it can be really expensive. After we
|
||||
/// fetch the list of finalized WALs, we try to find the longest chain of
|
||||
/// sequential WALs, starting from the latest one, that will update the recovery
|
||||
/// with the all missed updates. If the WAL chain cannot be created, replica is
|
||||
|
13
tests/e2e/README.md
Normal file
13
tests/e2e/README.md
Normal file
@ -0,0 +1,13 @@
|
||||
# tests/e2e
|
||||
|
||||
Framework to run end-to-end tests against Memgraph.
|
||||
|
||||
## Notes
|
||||
|
||||
* If you change something under this directory and below (even a Python
|
||||
script), `make` has to be run again because all tests are copied to the build
|
||||
directory and executed from there.
|
||||
* Use/extend `run.sh` if you run any e2e tests:
|
||||
* if all tests have to executed, use `run.sh`
|
||||
* if a suite of tests have to be execute, take a look under `run.sh` how to do so
|
||||
* if only a single test have to be execute, take a look at each individual binary/script, it's possible to manually pick the test
|
@ -187,4 +187,9 @@ startup_config_dict = {
|
||||
"Path to cypherl file that is used for configuring users and database schema before server starts.",
|
||||
),
|
||||
"init_data_file": ("", "", "Path to cypherl file that is used for creating data after server starts."),
|
||||
"replication_restore_state_on_startup": (
|
||||
"false",
|
||||
"false",
|
||||
"Restore replication state on startup, e.g. recover replica",
|
||||
),
|
||||
}
|
||||
|
@ -33,13 +33,11 @@
|
||||
import atexit
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from inspect import signature
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
@ -77,9 +75,9 @@ ACTIONS = {
|
||||
"info": lambda context: info(context),
|
||||
"stop": lambda context, name: stop(context, name),
|
||||
"start": lambda context, name: start(context, name),
|
||||
"sleep": lambda context, delta: time.sleep(float(delta)),
|
||||
"exit": lambda context: sys.exit(1),
|
||||
"quit": lambda context: sys.exit(1),
|
||||
"sleep": lambda _, delta: time.sleep(float(delta)),
|
||||
"exit": lambda _: sys.exit(1),
|
||||
"quit": lambda _: sys.exit(1),
|
||||
}
|
||||
|
||||
log = logging.getLogger("memgraph.tests.e2e")
|
||||
|
@ -13,7 +13,6 @@ import copy
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import mgclient
|
||||
|
@ -147,27 +147,33 @@ def test_basic_recovery(connection):
|
||||
data_directory = tempfile.TemporaryDirectory()
|
||||
CONFIGURATION = {
|
||||
"replica_1": {
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica1.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
|
||||
},
|
||||
"replica_2": {
|
||||
"args": ["--bolt-port", "7689", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica2.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
|
||||
},
|
||||
"replica_3": {
|
||||
"args": ["--bolt-port", "7690", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7690", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica3.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"],
|
||||
},
|
||||
"replica_4": {
|
||||
"args": ["--bolt-port", "7691", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7691", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica4.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10004;"],
|
||||
},
|
||||
"main": {
|
||||
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level=TRACE",
|
||||
"--storage-recover-on-startup=true",
|
||||
"--replication-restore-state-on-startup=true",
|
||||
],
|
||||
"log_file": "main.log",
|
||||
"setup_queries": [],
|
||||
"data_directory": f"{data_directory.name}",
|
||||
@ -359,13 +365,19 @@ def test_replication_role_recovery(connection):
|
||||
data_directory = tempfile.TemporaryDirectory()
|
||||
CONFIGURATION = {
|
||||
"replica": {
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
|
||||
"data_directory": f"{data_directory.name}/replica",
|
||||
},
|
||||
"main": {
|
||||
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level=TRACE",
|
||||
"--storage-recover-on-startup=true",
|
||||
"--replication-restore-state-on-startup=true",
|
||||
],
|
||||
"log_file": "main.log",
|
||||
"setup_queries": [],
|
||||
"data_directory": f"{data_directory.name}/main",
|
||||
@ -381,13 +393,19 @@ def test_replication_role_recovery(connection):
|
||||
# When we restart the replica, it does not need this query anymore since it needs to remember state
|
||||
CONFIGURATION = {
|
||||
"replica": {
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica.log",
|
||||
"setup_queries": [],
|
||||
"data_directory": f"{data_directory.name}/replica",
|
||||
},
|
||||
"main": {
|
||||
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level=TRACE",
|
||||
"--storage-recover-on-startup=true",
|
||||
"--replication-restore-state-on-startup=true",
|
||||
],
|
||||
"log_file": "main.log",
|
||||
"setup_queries": [],
|
||||
"data_directory": f"{data_directory.name}/main",
|
||||
@ -511,17 +529,23 @@ def test_basic_recovery_when_replica_is_kill_when_main_is_down():
|
||||
data_directory = tempfile.TemporaryDirectory()
|
||||
CONFIGURATION = {
|
||||
"replica_1": {
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7688", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica1.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"],
|
||||
},
|
||||
"replica_2": {
|
||||
"args": ["--bolt-port", "7689", "--log-level=TRACE"],
|
||||
"args": ["--bolt-port", "7689", "--log-level=TRACE", "--replication-restore-state-on-startup=true"],
|
||||
"log_file": "replica2.log",
|
||||
"setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"],
|
||||
},
|
||||
"main": {
|
||||
"args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"],
|
||||
"args": [
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level=TRACE",
|
||||
"--storage-recover-on-startup=true",
|
||||
"--replication-restore-state-on-startup=true",
|
||||
],
|
||||
"log_file": "main.log",
|
||||
"setup_queries": [],
|
||||
"data_directory": f"{data_directory.name}",
|
||||
|
37
tests/e2e/run.sh
Executable file
37
tests/e2e/run.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# TODO(gitbuda): Setup mgclient and pymgclient properly.
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib
|
||||
|
||||
print_help() {
|
||||
echo -e "$0 ["workload name string"]"
|
||||
echo -e ""
|
||||
echo -e " NOTE: some tests require enterprise licence key,"
|
||||
echo -e " to run those define the folowing env vars:"
|
||||
echo -e " * MEMGRAPH_ORGANIZATION_NAME"
|
||||
echo -e " * MEMGRAPH_ENTERPRISE_LICENSE"
|
||||
exit 1
|
||||
}
|
||||
check_license() {
|
||||
if [ ! -v MEMGRAPH_ORGANIZATION_NAME ] || [ ! -v MEMGRAPH_ENTERPRISE_LICENSE ]; then
|
||||
echo "NOTE: MEMGRAPH_ORGANIZATION_NAME or MEMGRAPH_ENTERPRISE_LICENSE NOT defined -> dependent tests will NOT work"
|
||||
fi
|
||||
}
|
||||
|
||||
if [ "$#" -eq 0 ]; then
|
||||
check_license
|
||||
# NOTE: If you want to run all tests under specific folder/section just
|
||||
# replace the dot (root directory below) with the folder name, e.g.
|
||||
# `--workloads-root-directory replication`.
|
||||
python3 runner.py --workloads-root-directory .
|
||||
elif [ "$#" -eq 1 ]; then
|
||||
if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
|
||||
print_help
|
||||
fi
|
||||
check_license
|
||||
# NOTE: --workload-name comes from each individual folder/section
|
||||
# workloads.yaml file. E.g. `streams/workloads.yaml` has a list of
|
||||
# `workloads:` and each workload has it's `-name`.
|
||||
python3 runner.py --workloads-root-directory . --workload-name "$1"
|
||||
else
|
||||
print_help
|
||||
fi
|
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO: andi as a side project
|
||||
python3 runner.py --workloads-root-directory disk_storage
|
@ -5,14 +5,6 @@ test_transaction_queue: &test_transaction_queue
|
||||
log_file: "transaction_queue.log"
|
||||
setup_queries: []
|
||||
validation_queries: []
|
||||
disk_test_transaction_queue: &disk_test_transaction_queue
|
||||
cluster:
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr"]
|
||||
log_file: "transaction_queue.log"
|
||||
setup_queries: ["STORAGE MODE ON_DISK_TRANSACTIONAL"]
|
||||
validation_queries: []
|
||||
|
||||
|
||||
workloads:
|
||||
- name: "test-transaction-queue" # should be the same as the python file
|
||||
@ -20,8 +12,3 @@ workloads:
|
||||
proc: "tests/e2e/transaction_queue/procedures/"
|
||||
args: ["transaction_queue/test_transaction_queue.py"]
|
||||
<<: *test_transaction_queue
|
||||
- name: "test-transaction-queue on disk" # should be the same as the python file
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
proc: "tests/e2e/transaction_queue/procedures/"
|
||||
args: ["transaction_queue/test_transaction_queue.py"]
|
||||
<<: *disk_test_transaction_queue
|
||||
|
@ -2,3 +2,31 @@
|
||||
|
||||
NOTE: Jepsen can only connect to the SSH server on the default 22 port.
|
||||
`--node` flag only takes the actual address (:port doesn't work).
|
||||
|
||||
Jepsen run under CI:
|
||||
```
|
||||
cd tests/jepsen
|
||||
./run.sh test --binary ../../build/memgraph --run-args "test-all --node-configs resources/node-config.edn" --ignore-run-stdout-logs --ignore-run-stderr-logs
|
||||
```
|
||||
|
||||
Local run of each test (including setup):
|
||||
```
|
||||
cd tests/jepsen
|
||||
./run.sh cluster-up
|
||||
docker exec -it jepsen-control bash
|
||||
cd memgraph
|
||||
lein run test --workload bank --node-configs resources/node-config.edn
|
||||
lein run test --workload large --node-configs resources/node-config.edn
|
||||
```
|
||||
|
||||
Logs are located under `jepsen-control:/jepsen/memgraph/store`.
|
||||
|
||||
If you setup cluster manually go to jepsen-control Docker container and ssh to all cluster nodes to save their host keys in known_hosts.
|
||||
```
|
||||
docker exec -it jepsen-control bash
|
||||
ssh n1 -> yes -> exit
|
||||
ssh n2 -> yes -> exit
|
||||
ssh n3 -> yes -> exit
|
||||
ssh n4 -> yes -> exit
|
||||
ssh n5 -> yes -> exit
|
||||
```
|
||||
|
13
tests/jepsen/jepsen_0.3.0.patch
Normal file
13
tests/jepsen/jepsen_0.3.0.patch
Normal file
@ -0,0 +1,13 @@
|
||||
diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile
|
||||
index 6b2d3c0e..195a7a60 100644
|
||||
--- a/docker/control/Dockerfile
|
||||
+++ b/docker/control/Dockerfile
|
||||
@@ -7,7 +7,7 @@ ENV LEIN_ROOT true
|
||||
# Jepsen dependencies
|
||||
#
|
||||
RUN apt-get -y -q update && \
|
||||
- apt-get install -qy openjdk-17-jdk-headless \
|
||||
+ apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \
|
||||
libjna-java \
|
||||
vim \
|
||||
emacs \
|
@ -5,7 +5,10 @@
|
||||
:url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"}
|
||||
:main jepsen.memgraph.core
|
||||
:dependencies [[org.clojure/clojure "1.10.0"]
|
||||
[jepsen "0.2.1-SNAPSHOT"]
|
||||
;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions
|
||||
[jepsen "0.2.4-SNAPSHOT"]
|
||||
[gorillalabs/neo4j-clj "4.1.0"]]
|
||||
:profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}}
|
||||
;; required to run 0.3.0
|
||||
; :aot :all
|
||||
:repl-options {:init-ns jepsen.memgraph.core})
|
||||
|
@ -1,16 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -Eeuo pipefail
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
MEMGRAPH_BINARY_PATH="../../build/memgraph"
|
||||
# NOTE: On Ubuntu 22.04 0.3.2 uses non-existing docker compose --compatibility flag.
|
||||
# NOTE: On Ubuntu 22.04 0.3.1 seems to be working.
|
||||
JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.0}"
|
||||
# NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ...
|
||||
# NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag.
|
||||
# NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable.
|
||||
# TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0
|
||||
JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}"
|
||||
JEPSEN_ACTIVE_NODES_NO=5
|
||||
CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn"
|
||||
CONTROL_LEIN_RUN_STDOUT_LOGS=1
|
||||
CONTROL_LEIN_RUN_STDERR_LOGS=1
|
||||
_JEPSEN_RUN_EXIT_STATUS=0
|
||||
PRINT_CONTEXT() {
|
||||
echo -e "MEMGRAPH_BINARY_PATH:\t\t $MEMGRAPH_BINARY_PATH"
|
||||
echo -e "JEPSEN_VERSION:\t\t\t $JEPSEN_VERSION"
|
||||
@ -22,7 +24,7 @@ PRINT_CONTEXT() {
|
||||
|
||||
HELP_EXIT() {
|
||||
echo ""
|
||||
echo "HELP: $0 help|cluster-up|test [args]"
|
||||
echo "HELP: $0 help|cluster-up|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]"
|
||||
echo ""
|
||||
echo " test args --binary MEMGRAPH_BINARY_PATH"
|
||||
echo " --ignore-run-stdout-logs Ignore lein run stdout logs."
|
||||
@ -45,153 +47,235 @@ if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/nu
|
||||
ERROR "docker and docker-compose have to be installed."
|
||||
exit 1
|
||||
fi
|
||||
PRINT_CONTEXT
|
||||
|
||||
if [ ! -d "$script_dir/jepsen" ]; then
|
||||
git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen"
|
||||
if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then
|
||||
if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then
|
||||
cd "$script_dir/jepsen"
|
||||
git apply "$script_dir/jepsen_0.3.0.patch"
|
||||
cd "$script_dir"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$#" -lt 1 ]; then
|
||||
HELP_EXIT
|
||||
fi
|
||||
|
||||
PROCESS_ARGS() {
|
||||
shift
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case $key in
|
||||
--binary)
|
||||
shift
|
||||
MEMGRAPH_BINARY_PATH="$1"
|
||||
shift
|
||||
;;
|
||||
--ignore-run-stdout-logs)
|
||||
CONTROL_LEIN_RUN_STDOUT_LOGS=0
|
||||
shift
|
||||
;;
|
||||
--ignore-run-stderr-logs)
|
||||
CONTROL_LEIN_RUN_STDERR_LOGS=0
|
||||
shift
|
||||
;;
|
||||
--nodes-no)
|
||||
shift
|
||||
JEPSEN_ACTIVE_NODES_NO="$1"
|
||||
shift
|
||||
;;
|
||||
--run-args)
|
||||
shift
|
||||
CONTROL_LEIN_RUN_ARGS="$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
ERROR "Unknown option $1."
|
||||
HELP_EXIT
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
COPY_BINARIES() {
|
||||
# Copy Memgraph binary, handles both cases, when binary is a sym link
|
||||
# or a regular file.
|
||||
binary_path="$MEMGRAPH_BINARY_PATH"
|
||||
if [ -L "$binary_path" ]; then
|
||||
binary_path=$(readlink "$binary_path")
|
||||
fi
|
||||
binary_name=$(basename -- "$binary_path")
|
||||
for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do
|
||||
jepsen_node_name="jepsen-n$iter"
|
||||
docker_exec="docker exec $jepsen_node_name bash -c"
|
||||
if [ "$binary_name" == "memgraph" ]; then
|
||||
_binary_name="memgraph_tmp"
|
||||
else
|
||||
_binary_name="$binary_name"
|
||||
fi
|
||||
$docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph"
|
||||
docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name"
|
||||
$docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph"
|
||||
$docker_exec "touch /opt/memgraph/memgraph.log"
|
||||
INFO "Copying $binary_name to $jepsen_node_name DONE."
|
||||
done
|
||||
# Copy test files into the control node.
|
||||
docker exec jepsen-control mkdir -p /jepsen/memgraph/store
|
||||
docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/
|
||||
docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/
|
||||
docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/
|
||||
docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj
|
||||
INFO "Copying test files to jepsen-control DONE."
|
||||
}
|
||||
|
||||
RUN_JEPSEN() {
|
||||
__control_lein_run_args="$1"
|
||||
# NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY.
|
||||
# NOTE: ~/.bashrc has to be manually sourced when bash -c is used
|
||||
# because some Jepsen config is there.
|
||||
# To be able to archive the run result even if the run fails.
|
||||
set +e
|
||||
if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then
|
||||
redirect_stdout_logs="/dev/null"
|
||||
else
|
||||
redirect_stdout_logs="/dev/stdout"
|
||||
fi
|
||||
if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then
|
||||
redirect_stderr_logs="/dev/null"
|
||||
else
|
||||
redirect_stderr_logs="/dev/stderr"
|
||||
fi
|
||||
docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $__control_lein_run_args" 1> $redirect_stdout_logs 2> $redirect_stderr_logs
|
||||
_JEPSEN_RUN_EXIT_STATUS=$?
|
||||
set -e
|
||||
}
|
||||
|
||||
PROCESS_RESULTS() {
|
||||
start_time="$1"
|
||||
end_time="$2"
|
||||
INFO "Process results..."
|
||||
# Print and pack all test workload runs between start and end time.
|
||||
all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-)
|
||||
all_workload_run_folders=""
|
||||
for workload in $all_workloads; do
|
||||
for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do
|
||||
if [[ "$time_folder" == "latest" ]]; then
|
||||
continue
|
||||
fi
|
||||
# The early continue pattern here is nice because bash doesn't
|
||||
# have >= for the string comparison (marginal values).
|
||||
if [[ "$time_folder" < "$start_time" ]]; then
|
||||
continue
|
||||
fi
|
||||
if [[ "$time_folder" > "$end_time" ]]; then
|
||||
continue
|
||||
fi
|
||||
INFO "jepsen.log for $workload/$time_folder"
|
||||
docker exec jepsen-control bash -c "tail -n 50 /jepsen/memgraph/store/$workload/$time_folder/jepsen.log"
|
||||
all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder"
|
||||
done
|
||||
done
|
||||
INFO "Packing results..."
|
||||
docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders"
|
||||
docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./
|
||||
INFO "Result processing (printing and packing) DONE."
|
||||
}
|
||||
|
||||
# Initialize testing context by copying source/binary files. Inside CI,
|
||||
# Memgraph is tested on a single machine cluster based on Docker containers.
|
||||
# Once these tests will be part of the official Jepsen repo, the majority of
|
||||
# functionalities inside this script won't be needed because each node clones
|
||||
# the public repo.
|
||||
case $1 in
|
||||
help)
|
||||
HELP_EXIT
|
||||
;;
|
||||
# Start Jepsen Docker cluster of 5 nodes. To configure the cluster please
|
||||
# take a look under jepsen/docker/docker-compose.yml.
|
||||
# NOTE: If you delete the jepsen folder where docker config is located,
|
||||
# the current cluster is broken because it relies on the folder. That can
|
||||
# happen easiliy because the jepsen folder is git ignored.
|
||||
cluster-up)
|
||||
PRINT_CONTEXT
|
||||
"$script_dir/jepsen/docker/bin/up" --daemon
|
||||
;;
|
||||
# Run tests against the specified Memgraph binary.
|
||||
test)
|
||||
shift
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case $key in
|
||||
--binary)
|
||||
shift
|
||||
MEMGRAPH_BINARY_PATH="$1"
|
||||
shift
|
||||
;;
|
||||
--ignore-run-stdout-logs)
|
||||
CONTROL_LEIN_RUN_STDOUT_LOGS=0
|
||||
shift
|
||||
;;
|
||||
--ignore-run-stderr-logs)
|
||||
CONTROL_LEIN_RUN_STDERR_LOGS=0
|
||||
shift
|
||||
;;
|
||||
--nodes-no)
|
||||
shift
|
||||
JEPSEN_ACTIVE_NODES_NO="$1"
|
||||
shift
|
||||
;;
|
||||
--run-args)
|
||||
shift
|
||||
CONTROL_LEIN_RUN_ARGS="$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
ERROR "Unknown option $1."
|
||||
HELP_EXIT
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Copy Memgraph binary, handles both cases, when binary is a sym link
|
||||
# or a regular file.
|
||||
binary_path="$MEMGRAPH_BINARY_PATH"
|
||||
if [ -L "$binary_path" ]; then
|
||||
binary_path=$(readlink "$binary_path")
|
||||
fi
|
||||
binary_name=$(basename -- "$binary_path")
|
||||
cluster-cleanup)
|
||||
jepsen_control_exec="docker exec jepsen-control bash -c"
|
||||
INFO "Deleting /jepsen/memgraph/store/* on jepsen-control"
|
||||
$jepsen_control_exec "rm -rf /jepsen/memgraph/store/*"
|
||||
for iter in $(seq 1 "$JEPSEN_ACTIVE_NODES_NO"); do
|
||||
jepsen_node_name="jepsen-n$iter"
|
||||
docker_exec="docker exec $jepsen_node_name bash -c"
|
||||
if [ "$binary_name" == "memgraph" ]; then
|
||||
_binary_name="memgraph_tmp"
|
||||
else
|
||||
_binary_name="$binary_name"
|
||||
fi
|
||||
$docker_exec "rm -rf /opt/memgraph/ && mkdir -p /opt/memgraph"
|
||||
docker cp "$binary_path" "$jepsen_node_name":/opt/memgraph/"$_binary_name"
|
||||
$docker_exec "ln -s /opt/memgraph/$_binary_name /opt/memgraph/memgraph"
|
||||
$docker_exec "touch /opt/memgraph/memgraph.log"
|
||||
INFO "Copying $binary_name to $jepsen_node_name DONE."
|
||||
jepsen_node_exec="docker exec $jepsen_node_name bash -c"
|
||||
INFO "Deleting /opt/memgraph/* on $jepsen_node_name"
|
||||
$jepsen_node_exec "rm -rf /opt/memgraph/*"
|
||||
done
|
||||
;;
|
||||
|
||||
# Copy test files into the control node.
|
||||
docker exec jepsen-control mkdir -p /jepsen/memgraph
|
||||
docker cp "$script_dir/src/." jepsen-control:/jepsen/memgraph/src/
|
||||
docker cp "$script_dir/test/." jepsen-control:/jepsen/memgraph/test/
|
||||
docker cp "$script_dir/resources/." jepsen-control:/jepsen/memgraph/resources/
|
||||
docker cp "$script_dir/project.clj" jepsen-control:/jepsen/memgraph/project.clj
|
||||
INFO "Copying test files to jepsen-control DONE."
|
||||
cluster-dealloc)
|
||||
ps=$(docker ps --filter name=jepsen* --filter status=running -q)
|
||||
if [[ ! -z ${ps} ]]; then
|
||||
echo "Killing ${ps}"
|
||||
docker rm -f ${ps}
|
||||
imgs=$(docker images "jepsen*" -q)
|
||||
if [[ ! -z ${imgs} ]]; then
|
||||
echo "Removing ${imgs}"
|
||||
docker images "jepsen*" -q | xargs docker image rmi -f
|
||||
else
|
||||
echo "No Jepsen images detected!"
|
||||
fi
|
||||
else
|
||||
echo "No Jepsen containers detected!"
|
||||
fi
|
||||
;;
|
||||
|
||||
mgbuild)
|
||||
PRINT_CONTEXT
|
||||
echo ""
|
||||
echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
test)
|
||||
PROCESS_ARGS "$@"
|
||||
PRINT_CONTEXT
|
||||
COPY_BINARIES
|
||||
start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
|
||||
|
||||
# Run the test.
|
||||
# NOTE: docker exec -t is NOT ok because gh CI user does NOT have TTY.
|
||||
# NOTE: ~/.bashrc has to be manually sourced when bash -c is used
|
||||
# because some Jepsen config is there.
|
||||
set +e
|
||||
if [ "$CONTROL_LEIN_RUN_STDOUT_LOGS" -eq 0 ]; then
|
||||
redirect_stdout_logs="/dev/null"
|
||||
else
|
||||
redirect_stdout_logs="/dev/stdout"
|
||||
fi
|
||||
if [ "$CONTROL_LEIN_RUN_STDERR_LOGS" -eq 0 ]; then
|
||||
redirect_stderr_logs="/dev/null"
|
||||
else
|
||||
redirect_stderr_logs="/dev/stderr"
|
||||
fi
|
||||
INFO "Jepsen run in progress... START_TIME: $start_time"
|
||||
docker exec jepsen-control bash -c "source ~/.bashrc && cd memgraph && lein run $CONTROL_LEIN_RUN_ARGS" 1> $redirect_stdout_logs 2> $redirect_stderr_logs
|
||||
# To be able to archive the run result even if the run fails.
|
||||
jepsen_run_exit_status=$?
|
||||
RUN_JEPSEN "$CONTROL_LEIN_RUN_ARGS"
|
||||
end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
|
||||
INFO "Jepsen run DONE. END_TIME: $end_time"
|
||||
set -e
|
||||
|
||||
# Pack all test workload runs between start and end time.
|
||||
all_workloads=$(docker exec jepsen-control bash -c 'ls /jepsen/memgraph/store/' | grep test-)
|
||||
all_workload_run_folders=""
|
||||
for workload in $all_workloads; do
|
||||
for time_folder in $(docker exec jepsen-control bash -c "ls /jepsen/memgraph/store/$workload"); do
|
||||
if [[ "$time_folder" == "latest" ]]; then
|
||||
continue
|
||||
fi
|
||||
# The early continue pattern here is nice because bash doesn't
|
||||
# have >= for the string comparison (marginal values).
|
||||
if [[ "$time_folder" < "$start_time" ]]; then
|
||||
continue
|
||||
fi
|
||||
if [[ "$time_folder" > "$end_time" ]]; then
|
||||
continue
|
||||
fi
|
||||
all_workload_run_folders="$all_workload_run_folders /jepsen/memgraph/store/$workload/$time_folder"
|
||||
done
|
||||
done
|
||||
docker exec jepsen-control bash -c "tar -czvf /jepsen/memgraph/Jepsen.tar.gz $all_workload_run_folders"
|
||||
docker cp jepsen-control:/jepsen/memgraph/Jepsen.tar.gz ./
|
||||
INFO "Test and results packing DONE."
|
||||
|
||||
# If the run has failed, this script also has to return non-zero status.
|
||||
if [ "$jepsen_run_exit_status" -ne 0 ]; then
|
||||
exit "$jepsen_run_exit_status"
|
||||
PROCESS_RESULTS "$start_time" "$end_time"
|
||||
# Exit if the jepsen run status is not 0
|
||||
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
|
||||
ERROR "Jepsen FAILED" # important for the coder
|
||||
exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
|
||||
fi
|
||||
;;
|
||||
|
||||
test-all-individually)
|
||||
PROCESS_ARGS "$@"
|
||||
PRINT_CONTEXT
|
||||
INFO "NOTE: CONTROL_LEIN_RUN_ARGS ignored"
|
||||
COPY_BINARIES
|
||||
start_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
|
||||
INFO "Jepsen run in progress... START_TIME: $start_time"
|
||||
for workload in "bank" "large"; do
|
||||
RUN_JEPSEN "test --workload $workload --node-configs resources/node-config.edn"
|
||||
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
end_time="$(docker exec jepsen-control bash -c 'date -u +"%Y%m%dT%H%M%S"').000Z"
|
||||
INFO "Jepsen run DONE. END_TIME: $end_time"
|
||||
PROCESS_RESULTS "$start_time" "$end_time"
|
||||
# Exit if the jepsen run status is not 0
|
||||
if [ "$_JEPSEN_RUN_EXIT_STATUS" -ne 0 ]; then
|
||||
ERROR "Jepsen FAILED" # important for the coder
|
||||
exit "$_JEPSEN_RUN_EXIT_STATUS" # important for CI
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
HELP_EXIT
|
||||
HELP_EXIT
|
||||
;;
|
||||
esac
|
||||
|
@ -146,9 +146,23 @@
|
||||
["-w" "--workload NAME" "Test workload to run"
|
||||
:parse-fn keyword
|
||||
:validate [workloads (cli/one-of workloads)]]
|
||||
[nil "--node-configs PATH" "Path to the node configuration file."
|
||||
[nil "--node-configs PATH" "Path to a file containing a list of node config."
|
||||
:parse-fn #(-> % e/load-configuration)]])
|
||||
|
||||
(defn single-test
|
||||
"Takes base CLI options and constructs a single test."
|
||||
[opts]
|
||||
(let [workload (if (:workload opts)
|
||||
(:workload opts)
|
||||
(throw (Exception. "Workload undefined")))
|
||||
node-config (if (:node-configs opts)
|
||||
(first (merge-node-configurations (:nodes opts) (list (first (:node-configs opts)))))
|
||||
(throw (Exception. "Node configs undefined")))
|
||||
test-opts (assoc opts
|
||||
:node-config node-config
|
||||
:workload workload)]
|
||||
(memgraph-test test-opts)))
|
||||
|
||||
(defn all-tests
|
||||
"Takes base CLI options and constructs a sequence of test options."
|
||||
[opts]
|
||||
@ -169,7 +183,7 @@
|
||||
[& args]
|
||||
(cli/run! (merge (cli/test-all-cmd {:tests-fn all-tests
|
||||
:opt-spec cli-opts})
|
||||
(cli/single-test-cmd {:test-fn memgraph-test
|
||||
(cli/single-test-cmd {:test-fn single-test
|
||||
:opt-spec cli-opts})
|
||||
(cli/serve-cmd))
|
||||
args))
|
||||
|
@ -1,7 +1,7 @@
|
||||
(ns jepsen.memgraph.nemesis
|
||||
"Memgraph nemesis"
|
||||
(:require [jepsen [nemesis :as nemesis]
|
||||
[generator :as gen]]
|
||||
[generator :as gen]]
|
||||
[jepsen.memgraph.support :as s]))
|
||||
|
||||
(defn node-killer
|
||||
@ -16,10 +16,10 @@
|
||||
"Can kill and restart all processess and initiate network partitions."
|
||||
[opts]
|
||||
(nemesis/compose
|
||||
{{:kill-node :start
|
||||
:restart-node :stop} (node-killer)
|
||||
{:start-partition-halves :start
|
||||
:stop-partition-halves :stop} (nemesis/partition-random-halves)}))
|
||||
{{:kill-node :start
|
||||
:restart-node :stop} (node-killer)
|
||||
{:start-partition-halves :start
|
||||
:stop-partition-halves :stop} (nemesis/partition-random-halves)}))
|
||||
|
||||
(defn op
|
||||
"Construct a nemesis op"
|
||||
@ -36,7 +36,7 @@
|
||||
(apply concat)
|
||||
gen/mix
|
||||
(gen/stagger (:interval opts))
|
||||
(gen/phases (gen/sleep 10))))
|
||||
(gen/phases (gen/sleep 60))))
|
||||
|
||||
(defn nemesis
|
||||
"Composite nemesis and generator"
|
||||
|
@ -2,8 +2,8 @@
|
||||
(:require [clojure.string :as str]
|
||||
[clojure.tools.logging :refer [info]]
|
||||
[jepsen [db :as db]
|
||||
[control :as c]
|
||||
[util :as util :refer [meh]]]
|
||||
[control :as c]
|
||||
[util :as util :refer [meh]]]
|
||||
[jepsen.control.util :as cu]
|
||||
[jepsen.os.debian :as debian]))
|
||||
|
||||
@ -44,7 +44,7 @@
|
||||
(throw (Exception. (str local-binary " is not there.")))))
|
||||
(info node "Memgraph binary is there" local-binary)
|
||||
(start-node! test node)
|
||||
(Thread/sleep 2000)))
|
||||
(Thread/sleep 5000))) ;; TODO(gitbuda): The sleep after Jepsen starting Memgraph is for sure questionable.
|
||||
(teardown! [_ test node]
|
||||
(info node "Tearing down Memgraph")
|
||||
(stop-node! test node)
|
||||
|
Loading…
Reference in New Issue
Block a user