f85095c203
Summary: During the following scenario: - start a HA cluster with 3 machines - find the leader and start sending queries - SIGTERM the leader but leave other 2 machines untouched The leader would be stuck in the shutdown phase. This was happening because during the shutdown phase of the Bolt server, a `graph_db_accessor` would try to commit a transaction after we've already shut down Raft server. Raft, although not running, is still thinking it's in the Leader mode. Tx Engine calls the `SafeToCommit` method to Commit transactions, and ends up in an infinite loop. Since Raft was shut down it won't handle any of the incoming RPCs and won't change it's mode. The fix here is to shut down the Bolt server before Raft, so we don't have any pending commits once Raft is shut down. Reviewers: ipaljak Reviewed By: ipaljak Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1853
84 lines
1.7 KiB
Bash
Executable File
84 lines
1.7 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
## Helper functions
|
|
|
|
function wait_for_server {
|
|
port=$1
|
|
while ! nc -z -w 1 127.0.0.1 $port; do
|
|
sleep 0.1
|
|
done
|
|
sleep 1
|
|
}
|
|
|
|
function echo_info { printf "\033[1;36m~~ $1 ~~\033[0m\n"; }
|
|
function echo_success { printf "\033[1;32m~~ $1 ~~\033[0m\n\n"; }
|
|
function echo_failure { printf "\033[1;31m~~ $1 ~~\033[0m\n\n"; }
|
|
|
|
## Environment setup
|
|
|
|
# Get script location.
|
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
cd "$DIR"
|
|
|
|
# Find memgraph binaries.
|
|
binary_dir="$DIR/../../../build"
|
|
if [ ! -d $binary_dir ]; then
|
|
binary_dir="$DIR/../../../build_release"
|
|
fi
|
|
|
|
# Results for apollo
|
|
RESULTS="$DIR/.apollo_measurements"
|
|
|
|
# Benchmark parameters
|
|
DURATION=10
|
|
|
|
# Startup
|
|
declare -a HA_PIDS
|
|
|
|
for server_id in 1 2 3
|
|
do
|
|
$binary_dir/memgraph_ha --server_id $server_id \
|
|
--coordination_config_file="coordination.json" \
|
|
--raft_config_file="raft.json" \
|
|
--port $((7686 + $server_id)) \
|
|
--db-recover-on-startup=false \
|
|
--durability_directory=dur$server_id &
|
|
HA_PIDS[$server_id]=$!
|
|
wait_for_server $((7686 + $server_id))
|
|
done
|
|
|
|
# Allow some time for leader election.
|
|
sleep 10
|
|
|
|
# Start the memgraph process and wait for it to start.
|
|
echo_info "Starting HA benchmark"
|
|
$binary_dir/tests/feature_benchmark/ha/benchmark \
|
|
--duration=$DURATION \
|
|
--output-file=$RESULTS &
|
|
pid=$!
|
|
|
|
wait -n $pid
|
|
code=$?
|
|
|
|
# Shutdown
|
|
for server_id in 1 2 3
|
|
do
|
|
kill -15 ${HA_PIDS[$server_id]}
|
|
done
|
|
|
|
# Cleanup
|
|
for server_id in 1 2 3
|
|
do
|
|
wait -n ${HA_PIDS[$server_id]}
|
|
rm -r dur$server_id
|
|
done
|
|
|
|
if [ $code -eq 0 ]; then
|
|
echo_success "Benchmark finished successfully"
|
|
else
|
|
echo_failure "Benchmark didn't finish successfully"
|
|
exit $code
|
|
fi
|
|
|
|
exit 0
|