Add more error checking, better logging, avoid hitting GCP login quota

This commit is contained in:
Michael Vines
2018-07-15 09:23:35 -07:00
parent 60848b9d95
commit 4e177877c9

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash -e
# #
# Refreshes the Solana software running on the Testnet full nodes # Refreshes the Solana software running on the Testnet full nodes
# #
@ -30,15 +30,17 @@ while read -r vmName vmZone status; do
done < <(gcloud compute instances list --filter="labels.testnet-mode=validator" --format 'value(name,zone,status)') done < <(gcloud compute instances list --filter="labels.testnet-mode=validator" --format 'value(name,zone,status)')
echo "--- Refreshing" echo "--- Refreshing leader"
leader=true leader=true
logfiles=()
for info in "${vmlist[@]}"; do for info in "${vmlist[@]}"; do
vmName=${info%:*} vmName=${info%:*}
vmZone=${info#*:} vmZone=${info#*:}
echo "Starting refresh for $vmName" echo "Starting refresh for $vmName"
( (
echo "--- Processing $vmName in zone $vmZone" SECONDS=0
echo "--- $vmName in zone $vmZone"
if $leader; then if $leader; then
nodeConfig="mode=leader+drone enable-cuda=1 metrics-config=$SOLANA_METRICS_CONFIG" nodeConfig="mode=leader+drone enable-cuda=1 metrics-config=$SOLANA_METRICS_CONFIG"
else else
@ -52,11 +54,13 @@ for info in "${vmlist[@]}"; do
snap info solana snap info solana
sudo snap logs solana -n200 sudo snap logs solana -n200
EOF EOF
set -x set -x
gcloud compute scp --zone "$vmZone" "autogen-refresh-$vmName.sh" "$vmName": gcloud compute scp --zone "$vmZone" "autogen-refresh-$vmName.sh" "$vmName":
gcloud compute ssh "$vmName" --zone "$vmZone" \ gcloud compute ssh "$vmName" --zone "$vmZone" \
--ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -t" \ --ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -t" \
--command="bash ./autogen-refresh-$vmName.sh" --command="bash ./autogen-refresh-$vmName.sh"
echo "Succeeded in ${SECONDS} seconds"
) > "log-$vmName.txt" 2>&1 & ) > "log-$vmName.txt" 2>&1 &
if $leader; then if $leader; then
@ -64,20 +68,27 @@ EOF
# Wait for the leader to initialize before starting the validators # Wait for the leader to initialize before starting the validators
# TODO: Remove this limitation eventually. # TODO: Remove this limitation eventually.
wait wait
cat "log-$vmName.txt"
echo "--- Refreshing validators"
else
# Slow down deployment to ~30 machines a minute to avoid triggering GCP login
# quota limits (the previous |scp| and |ssh| each count as a login)
sleep 2
logfiles+=("log-$vmName.txt")
fi fi
leader=false leader=false
done done
echo Waiting for validators... echo --- Waiting for validators
wait wait
for info in "${vmlist[@]}"; do for log in "${logfiles[@]}"; do
vmName=${info%:*} cat "$log"
cat "log-$vmName.txt"
done done
echo "--- Testnet sanity test" echo "--- Testnet sanity test"
set -e
USE_SNAP=1 ./multinode-demo/test/wallet-sanity.sh testnet.solana.com USE_SNAP=1 ./multinode-demo/test/wallet-sanity.sh testnet.solana.com
exit 0 exit 0