Add provision in testnet scripts to ignore validator nodes that failed to bootup (#3972)

* Skip writing to config file if the node didn't bootup
* Detect dead nodes quicker
This commit is contained in:
Pankaj Garg
2019-04-24 16:23:26 -07:00
committed by GitHub
parent 9a40ad76bd
commit 29a25990d3

View File

@ -55,6 +55,7 @@ blockstreamer=false
fullNodeBootDiskSizeInGb=1000 fullNodeBootDiskSizeInGb=1000
clientBootDiskSizeInGb=75 clientBootDiskSizeInGb=75
externalNodes=false externalNodes=false
failOnValidatorBootupFailure=true
publicNetwork=false publicNetwork=false
enableGpu=false enableGpu=false
@ -95,6 +96,7 @@ Manage testnet instances
zone zone
-x - append to the existing configuration instead of creating a -x - append to the existing configuration instead of creating a
new configuration new configuration
-f - Discard validator nodes that didn't bootup successfully
create-specific options: create-specific options:
-n [number] - Number of additional fullnodes (default: $additionalFullNodeCount) -n [number] - Number of additional fullnodes (default: $additionalFullNodeCount)
@ -133,7 +135,7 @@ shift
[[ $command = create || $command = config || $command = info || $command = delete ]] || [[ $command = create || $command = config || $command = info || $command = delete ]] ||
usage "Invalid command: $command" usage "Invalid command: $command"
while getopts "h?p:Pn:c:z:gG:a:d:bux" opt; do while getopts "h?p:Pn:c:z:gG:a:d:buxf" opt; do
case $opt in case $opt in
h | \?) h | \?)
usage usage
@ -179,6 +181,9 @@ while getopts "h?p:Pn:c:z:gG:a:d:bux" opt; do
x) x)
externalNodes=true externalNodes=true
;; ;;
f)
failOnValidatorBootupFailure=false
;;
*) *)
usage "unhandled option: $opt" usage "unhandled option: $opt"
;; ;;
@ -263,23 +268,27 @@ EOF
declare publicIp="$2" declare publicIp="$2"
declare privateIp="$3" declare privateIp="$3"
declare arrayName="$5" declare failOnFailure="$5"
declare arrayName="$6"
echo "$arrayName+=($publicIp) # $name" >> "$configFile" # This check should eventually be moved to cloud provider specific script
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile" if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
} if $failOnFailure; then
exit 1
waitForStartupComplete() { else
declare name="$1" return 0
declare publicIp="$2" fi
fi
ok=true
echo "Waiting for $name to finish booting..." echo "Waiting for $name to finish booting..."
( (
set -x +e set -x +e
for i in $(seq 1 60); do for i in $(seq 1 20); do
timeout 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
ret=$? ret=$?
if [[ $ret -eq 0 ]]; then if [[ $ret -eq 0 ]]; then
echo "$name has booted."
exit 0 exit 0
fi fi
sleep 2 sleep 2
@ -287,8 +296,16 @@ EOF
done done
echo "$name failed to boot." echo "$name failed to boot."
exit 1 exit 1
) ) || ok=false
echo "$name has booted."
if ! $ok; then
if $failOnFailure; then
exit 1
fi
else
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile"
fi
} }
if $externalNodes; then if $externalNodes; then
@ -333,8 +350,7 @@ EOF
echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp fullnodeIpList cloud_ForEachInstance recordInstanceIp true fullnodeIpList
cloud_ForEachInstance waitForStartupComplete
fi fi
if [[ $additionalFullNodeCount -gt 0 ]]; then if [[ $additionalFullNodeCount -gt 0 ]]; then
@ -345,8 +361,7 @@ EOF
echo "Unable to find additional fullnodes" echo "Unable to find additional fullnodes"
exit 1 exit 1
} }
cloud_ForEachInstance recordInstanceIp fullnodeIpList cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
cloud_ForEachInstance waitForStartupComplete
done done
fi fi
@ -359,8 +374,7 @@ EOF
echo "Looking for client bencher instances..." echo "Looking for client bencher instances..."
cloud_FindInstances "$prefix-client" cloud_FindInstances "$prefix-client"
[[ ${#instances[@]} -eq 0 ]] || { [[ ${#instances[@]} -eq 0 ]] || {
cloud_ForEachInstance recordInstanceIp clientIpList cloud_ForEachInstance recordInstanceIp true clientIpList
cloud_ForEachInstance waitForStartupComplete
} }
if $externalNodes; then if $externalNodes; then
@ -372,8 +386,7 @@ EOF
echo "Looking for blockstreamer instances..." echo "Looking for blockstreamer instances..."
cloud_FindInstances "$prefix-blockstreamer" cloud_FindInstances "$prefix-blockstreamer"
[[ ${#instances[@]} -eq 0 ]] || { [[ ${#instances[@]} -eq 0 ]] || {
cloud_ForEachInstance recordInstanceIp blockstreamerIpList cloud_ForEachInstance recordInstanceIp true blockstreamerIpList
cloud_ForEachInstance waitForStartupComplete
} }
echo "Wrote $configFile" echo "Wrote $configFile"