Add -R option to restart the cluster incrementally
This commit is contained in:
@ -3,6 +3,7 @@ set -e
|
|||||||
|
|
||||||
iterations=1
|
iterations=1
|
||||||
restartInterval=never
|
restartInterval=never
|
||||||
|
rollingRestart=false
|
||||||
maybeNoLeaderRotation=
|
maybeNoLeaderRotation=
|
||||||
extraNodes=0
|
extraNodes=0
|
||||||
walletRpcEndpoint=
|
walletRpcEndpoint=
|
||||||
@ -21,6 +22,9 @@ Start a local cluster and run sanity on it
|
|||||||
options:
|
options:
|
||||||
-i [number] - Number of times to run sanity (default: $iterations)
|
-i [number] - Number of times to run sanity (default: $iterations)
|
||||||
-k [number] - Restart the cluster after this number of sanity iterations (default: $restartInterval)
|
-k [number] - Restart the cluster after this number of sanity iterations (default: $restartInterval)
|
||||||
|
-R - Restart the cluster by incrementially stopping and restarting
|
||||||
|
nodes (at the cadence specified by -k). When disabled all
|
||||||
|
nodes will be first killed then restarted (default: $rollingRestart)
|
||||||
-b - Disable leader rotation
|
-b - Disable leader rotation
|
||||||
-x - Add an extra fullnode (may be supplied multiple times)
|
-x - Add an extra fullnode (may be supplied multiple times)
|
||||||
-r - Select the RPC endpoint hosted by a node that starts as
|
-r - Select the RPC endpoint hosted by a node that starts as
|
||||||
@ -33,7 +37,7 @@ EOF
|
|||||||
|
|
||||||
cd "$(dirname "$0")"/..
|
cd "$(dirname "$0")"/..
|
||||||
|
|
||||||
while getopts "h?i:k:brx" opt; do
|
while getopts "h?i:k:brxR" opt; do
|
||||||
case $opt in
|
case $opt in
|
||||||
h | \?)
|
h | \?)
|
||||||
usage
|
usage
|
||||||
@ -53,6 +57,9 @@ while getopts "h?i:k:brx" opt; do
|
|||||||
r)
|
r)
|
||||||
walletRpcEndpoint="--rpc-port 18899"
|
walletRpcEndpoint="--rpc-port 18899"
|
||||||
;;
|
;;
|
||||||
|
R)
|
||||||
|
rollingRestart=true
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
usage "Error: unhandled option: $opt"
|
usage "Error: unhandled option: $opt"
|
||||||
;;
|
;;
|
||||||
@ -76,40 +83,100 @@ numNodes=$((2 + extraNodes))
|
|||||||
pids=()
|
pids=()
|
||||||
logs=()
|
logs=()
|
||||||
|
|
||||||
|
getNodeLogFile() {
|
||||||
|
declare cmd=$1
|
||||||
|
declare baseCmd
|
||||||
|
baseCmd=$(basename "${cmd// */}" .sh)
|
||||||
|
echo "log-$baseCmd.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
startNode() {
|
||||||
|
declare cmd=$1
|
||||||
|
echo "--- Start $cmd"
|
||||||
|
declare log
|
||||||
|
log=$(getNodeLogFile "$cmd")
|
||||||
|
rm -f "$log"
|
||||||
|
$cmd > "$log" 2>&1 &
|
||||||
|
declare pid=$!
|
||||||
|
pids+=("$pid")
|
||||||
|
echo "pid: $pid"
|
||||||
|
}
|
||||||
|
|
||||||
startNodes() {
|
startNodes() {
|
||||||
declare addLogs=false
|
declare addLogs=false
|
||||||
if [[ ${#logs[@]} -eq 0 ]]; then
|
if [[ ${#logs[@]} -eq 0 ]]; then
|
||||||
addLogs=true
|
addLogs=true
|
||||||
fi
|
fi
|
||||||
for cmd in "${nodes[@]}"; do
|
for cmd in "${nodes[@]}"; do
|
||||||
echo "--- Start $cmd"
|
startNode "$cmd"
|
||||||
baseCmd=$(basename "${cmd// */}" .sh)
|
|
||||||
declare log=log-$baseCmd.txt
|
|
||||||
rm -f "$log"
|
|
||||||
$cmd > "$log" 2>&1 &
|
|
||||||
if $addLogs; then
|
if $addLogs; then
|
||||||
logs+=("$log")
|
logs+=("$(getNodeLogFile "$cmd")")
|
||||||
fi
|
fi
|
||||||
declare pid=$!
|
|
||||||
pids+=("$pid")
|
|
||||||
echo "pid: $pid"
|
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
killNodes() {
|
killNode() {
|
||||||
echo "--- Killing nodes"
|
declare pid=$1
|
||||||
|
echo "kill $pid"
|
||||||
set +e
|
set +e
|
||||||
for pid in "${pids[@]}"; do
|
|
||||||
if kill "$pid"; then
|
if kill "$pid"; then
|
||||||
wait "$pid"
|
wait "$pid"
|
||||||
else
|
else
|
||||||
echo -e "^^^ +++\\nWarning: unable to kill $pid"
|
echo -e "^^^ +++\\nWarning: unable to kill $pid"
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
set -e
|
set -e
|
||||||
|
}
|
||||||
|
|
||||||
|
killNodes() {
|
||||||
|
echo "--- Killing nodes"
|
||||||
|
for pid in "${pids[@]}"; do
|
||||||
|
killNode "$pid"
|
||||||
|
done
|
||||||
pids=()
|
pids=()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rollingNodeRestart() {
|
||||||
|
if [[ ${#logs[@]} -ne ${#nodes[@]} ]]; then
|
||||||
|
echo "Error: log/nodes array length mismatch"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ${#pids[@]} -ne ${#nodes[@]} ]]; then
|
||||||
|
echo "Error: pids/nodes array length mismatch"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
declare oldPids=("${pids[@]}")
|
||||||
|
for i in $(seq 0 $((${#logs[@]} - 1))); do
|
||||||
|
declare pid=${oldPids[$i]}
|
||||||
|
declare cmd=${nodes[$i]}
|
||||||
|
if [[ $i -eq 0 ]]; then
|
||||||
|
# First cmd should be the drone, don't restart it.
|
||||||
|
[[ "$cmd" = "multinode-demo/drone.sh" ]]
|
||||||
|
pids+=("$pid")
|
||||||
|
else
|
||||||
|
echo "--- Restarting $pid: $cmd"
|
||||||
|
killNode "$pid"
|
||||||
|
# Delay 20 seconds to ensure the remaining cluster nodes will
|
||||||
|
# hit CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS (currently 15 seconds) for the
|
||||||
|
# node that was just stopped
|
||||||
|
echo "(sleeping for 20 seconds)"
|
||||||
|
sleep 20
|
||||||
|
startNode "$cmd"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# 'Atomically' remove the old pids from the pids array
|
||||||
|
declare oldPidsList
|
||||||
|
oldPidsList="$(printf ":%s" "${oldPids[@]}"):"
|
||||||
|
declare newPids=("${pids[0]}") # 0 = drone pid
|
||||||
|
for pid in "${pids[@]}"; do
|
||||||
|
[[ $oldPidsList =~ :$pid: ]] || {
|
||||||
|
newPids+=("$pid")
|
||||||
|
}
|
||||||
|
done
|
||||||
|
pids=("${newPids[@]}")
|
||||||
|
}
|
||||||
|
|
||||||
verifyLedger() {
|
verifyLedger() {
|
||||||
for ledger in bootstrap-leader fullnode; do
|
for ledger in bootstrap-leader fullnode; do
|
||||||
echo "--- $ledger ledger verification"
|
echo "--- $ledger ledger verification"
|
||||||
@ -200,10 +267,14 @@ while [[ $iteration -le $iterations ]]; do
|
|||||||
iteration=$((iteration + 1))
|
iteration=$((iteration + 1))
|
||||||
|
|
||||||
if [[ $restartInterval != never && $((iteration % restartInterval)) -eq 0 ]]; then
|
if [[ $restartInterval != never && $((iteration % restartInterval)) -eq 0 ]]; then
|
||||||
|
if $rollingRestart; then
|
||||||
|
rollingNodeRestart
|
||||||
|
else
|
||||||
killNodes
|
killNodes
|
||||||
verifyLedger
|
verifyLedger
|
||||||
startNodes
|
startNodes
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
killNodes
|
killNodes
|
||||||
|
Reference in New Issue
Block a user