Add provision in testnet scripts to ignore validator nodes that failed to bootup (#3972)
* Skip writing to config file if the node didn't bootup * Detect dead nodes quicker
This commit is contained in:
55
net/gce.sh
55
net/gce.sh
@ -55,6 +55,7 @@ blockstreamer=false
|
|||||||
fullNodeBootDiskSizeInGb=1000
|
fullNodeBootDiskSizeInGb=1000
|
||||||
clientBootDiskSizeInGb=75
|
clientBootDiskSizeInGb=75
|
||||||
externalNodes=false
|
externalNodes=false
|
||||||
|
failOnValidatorBootupFailure=true
|
||||||
|
|
||||||
publicNetwork=false
|
publicNetwork=false
|
||||||
enableGpu=false
|
enableGpu=false
|
||||||
@ -95,6 +96,7 @@ Manage testnet instances
|
|||||||
zone
|
zone
|
||||||
-x - append to the existing configuration instead of creating a
|
-x - append to the existing configuration instead of creating a
|
||||||
new configuration
|
new configuration
|
||||||
|
-f - Discard validator nodes that didn't bootup successfully
|
||||||
|
|
||||||
create-specific options:
|
create-specific options:
|
||||||
-n [number] - Number of additional fullnodes (default: $additionalFullNodeCount)
|
-n [number] - Number of additional fullnodes (default: $additionalFullNodeCount)
|
||||||
@ -133,7 +135,7 @@ shift
|
|||||||
[[ $command = create || $command = config || $command = info || $command = delete ]] ||
|
[[ $command = create || $command = config || $command = info || $command = delete ]] ||
|
||||||
usage "Invalid command: $command"
|
usage "Invalid command: $command"
|
||||||
|
|
||||||
while getopts "h?p:Pn:c:z:gG:a:d:bux" opt; do
|
while getopts "h?p:Pn:c:z:gG:a:d:buxf" opt; do
|
||||||
case $opt in
|
case $opt in
|
||||||
h | \?)
|
h | \?)
|
||||||
usage
|
usage
|
||||||
@ -179,6 +181,9 @@ while getopts "h?p:Pn:c:z:gG:a:d:bux" opt; do
|
|||||||
x)
|
x)
|
||||||
externalNodes=true
|
externalNodes=true
|
||||||
;;
|
;;
|
||||||
|
f)
|
||||||
|
failOnValidatorBootupFailure=false
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
usage "unhandled option: $opt"
|
usage "unhandled option: $opt"
|
||||||
;;
|
;;
|
||||||
@ -263,23 +268,27 @@ EOF
|
|||||||
declare publicIp="$2"
|
declare publicIp="$2"
|
||||||
declare privateIp="$3"
|
declare privateIp="$3"
|
||||||
|
|
||||||
declare arrayName="$5"
|
declare failOnFailure="$5"
|
||||||
|
declare arrayName="$6"
|
||||||
|
|
||||||
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
# This check should eventually be moved to cloud provider specific script
|
||||||
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile"
|
if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
|
||||||
}
|
if $failOnFailure; then
|
||||||
|
exit 1
|
||||||
waitForStartupComplete() {
|
else
|
||||||
declare name="$1"
|
return 0
|
||||||
declare publicIp="$2"
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
ok=true
|
||||||
echo "Waiting for $name to finish booting..."
|
echo "Waiting for $name to finish booting..."
|
||||||
(
|
(
|
||||||
set -x +e
|
set -x +e
|
||||||
for i in $(seq 1 60); do
|
for i in $(seq 1 20); do
|
||||||
timeout 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
|
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
|
||||||
ret=$?
|
ret=$?
|
||||||
if [[ $ret -eq 0 ]]; then
|
if [[ $ret -eq 0 ]]; then
|
||||||
|
echo "$name has booted."
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
sleep 2
|
sleep 2
|
||||||
@ -287,8 +296,16 @@ EOF
|
|||||||
done
|
done
|
||||||
echo "$name failed to boot."
|
echo "$name failed to boot."
|
||||||
exit 1
|
exit 1
|
||||||
)
|
) || ok=false
|
||||||
echo "$name has booted."
|
|
||||||
|
if ! $ok; then
|
||||||
|
if $failOnFailure; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
||||||
|
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
if $externalNodes; then
|
if $externalNodes; then
|
||||||
@ -333,8 +350,7 @@ EOF
|
|||||||
|
|
||||||
echo "fullnodeIpList=()" >> "$configFile"
|
echo "fullnodeIpList=()" >> "$configFile"
|
||||||
echo "fullnodeIpListPrivate=()" >> "$configFile"
|
echo "fullnodeIpListPrivate=()" >> "$configFile"
|
||||||
cloud_ForEachInstance recordInstanceIp fullnodeIpList
|
cloud_ForEachInstance recordInstanceIp true fullnodeIpList
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
||||||
@ -345,8 +361,7 @@ EOF
|
|||||||
echo "Unable to find additional fullnodes"
|
echo "Unable to find additional fullnodes"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
cloud_ForEachInstance recordInstanceIp fullnodeIpList
|
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -359,8 +374,7 @@ EOF
|
|||||||
echo "Looking for client bencher instances..."
|
echo "Looking for client bencher instances..."
|
||||||
cloud_FindInstances "$prefix-client"
|
cloud_FindInstances "$prefix-client"
|
||||||
[[ ${#instances[@]} -eq 0 ]] || {
|
[[ ${#instances[@]} -eq 0 ]] || {
|
||||||
cloud_ForEachInstance recordInstanceIp clientIpList
|
cloud_ForEachInstance recordInstanceIp true clientIpList
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if $externalNodes; then
|
if $externalNodes; then
|
||||||
@ -372,8 +386,7 @@ EOF
|
|||||||
echo "Looking for blockstreamer instances..."
|
echo "Looking for blockstreamer instances..."
|
||||||
cloud_FindInstances "$prefix-blockstreamer"
|
cloud_FindInstances "$prefix-blockstreamer"
|
||||||
[[ ${#instances[@]} -eq 0 ]] || {
|
[[ ${#instances[@]} -eq 0 ]] || {
|
||||||
cloud_ForEachInstance recordInstanceIp blockstreamerIpList
|
cloud_ForEachInstance recordInstanceIp true blockstreamerIpList
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Wrote $configFile"
|
echo "Wrote $configFile"
|
||||||
|
Reference in New Issue
Block a user