GCE-based nodes now reboot on maintenance events instead of terminating (#5861)

This commit is contained in:
Michael Vines
2019-09-10 12:30:06 -07:00
committed by GitHub
parent 0d7efe5176
commit fc4aa71193
4 changed files with 96 additions and 44 deletions

View File

@ -3,8 +3,6 @@ set -e
cd "$(dirname "$0")"/../..
echo "$(date) | $0 $*" > client.log
deployMethod="$1"
entrypointIp="$2"
clientToRun="$3"
@ -49,17 +47,6 @@ skip)
exit 1
esac
(
sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh
) > oom-monitor.log 2>&1 &
echo $! > oom-monitor.pid
scripts/fd-monitor.sh > fd-monitor.log 2>&1 &
echo $! > fd-monitor.pid
scripts/net-stats.sh > net-stats.log 2>&1 &
echo $! > net-stats.pid
! tmux list-sessions || tmux kill-session
case $clientToRun in
solana-bench-tps)
net/scripts/rsync-retry.sh -vPrc \
@ -97,6 +84,26 @@ solana-bench-exchange)
exit 1
esac
cat > ~/solana/on-reboot <<EOF
#!/usr/bin/env bash
cd ~/solana
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
echo "$(date) | $0 $*" >> client.log
(
sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh
) > oom-monitor.log 2>&1 &
echo $! > oom-monitor.pid
scripts/fd-monitor.sh > fd-monitor.log 2>&1 &
echo $! > fd-monitor.pid
scripts/net-stats.sh > net-stats.log 2>&1 &
echo $! > net-stats.pid
! tmux list-sessions || tmux kill-session
tmux new -s "$clientToRun" -d "
while true; do
echo === Client start: \$(date) | tee -a client.log
@ -106,5 +113,11 @@ tmux new -s "$clientToRun" -d "
$metricsWriteDatapoint 'testnet-deploy client-complete=1'
done
"
EOF
chmod +x ~/solana/on-reboot
echo "@reboot ~/solana/on-reboot" | crontab -
~/solana/on-reboot
sleep 1
tmux capture-pane -t "$clientToRun" -p -S -100