From 0d77d5a076665b396d046a07f2ccd003fb882e90 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Sat, 11 May 2019 22:47:49 -0700 Subject: [PATCH] v0.14: net/ improvements (#4256) automerge --- net/gce.sh | 86 ++++++++++++++++++------------------- net/scripts/ec2-provider.sh | 25 +++++------ 2 files changed, 53 insertions(+), 58 deletions(-) diff --git a/net/gce.sh b/net/gce.sh index b0e9465741..47a59fa71d 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -327,6 +327,40 @@ EOF buildSshOptions + fetchPrivateKey() { + declare nodeName + declare nodeIp + declare nodeZone + IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}") + + # Make sure the machine is alive or pingable + timeout_sec=90 + cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec" + + if [[ ! -r $sshPrivateKey ]]; then + echo "Fetching $sshPrivateKey from $nodeName" + + # Try to scp in a couple times, sshd may not yet be up even though the + # machine can be pinged... + ( + set -o pipefail + for i in $(seq 1 30); do + set -x + cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone" && + cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" && + break + set +x + + sleep 1 + echo "Retry $i..." + done + ) + + chmod 400 "$sshPrivateKey" + ls -l "$sshPrivateKey" + fi + } + recordInstanceIp() { declare name="$1" declare publicIp="$2" @@ -349,15 +383,19 @@ EOF ok=true echo "Waiting for $name to finish booting..." ( - set -x +e - for i in $(seq 1 60); do - timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" + set +e + fetchPrivateKey || exit 1 + for i in $(seq 1 30); do + ( + set -x + timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" + ) ret=$? if [[ $ret -eq 0 ]]; then echo "$name has booted." exit 0 fi - sleep 2 + sleep 5 echo "Retry $i..." done echo "$name failed to boot." @@ -383,41 +421,6 @@ EOF fi } - fetchPrivateKey() { - ( - declare nodeName - declare nodeIp - declare nodeZone - IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}") - - # Make sure the machine is alive or pingable - timeout_sec=90 - cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec" - - if [[ ! -r $sshPrivateKey ]]; then - echo "Fetching $sshPrivateKey from $nodeName" - - # Try to scp in a couple times, sshd may not yet be up even though the - # machine can be pinged... - set -x -o pipefail - for i in $(seq 1 30); do - if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then - if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then - break - fi - fi - - sleep 1 - echo "Retry $i..." - done - - chmod 400 "$sshPrivateKey" - ls -l "$sshPrivateKey" - fi - ) - - } - if $externalNodes; then echo "Bootstrap leader is already configured" else @@ -428,8 +431,6 @@ EOF exit 1 } - fetchPrivateKey - echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile" cloud_ForEachInstance recordInstanceIp true fullnodeIpList @@ -440,7 +441,6 @@ EOF echo "Looking for additional fullnode instances in $zone ..." cloud_FindInstances "$prefix-$zone-fullnode" if [[ ${#instances[@]} -gt 0 ]]; then - fetchPrivateKey cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList else echo "Unable to find additional fullnodes" @@ -697,7 +697,7 @@ info) ipAddress=${clientIpList[$i]} ipAddressPrivate=${clientIpListPrivate[$i]} zone=${clientIpListZone[$i]} - printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone" + printNode client "$ipAddress" "$ipAddressPrivate" "$zone" done for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do diff --git a/net/scripts/ec2-provider.sh b/net/scripts/ec2-provider.sh index f1e421ee4e..ca546a3de4 100755 --- a/net/scripts/ec2-provider.sh +++ b/net/scripts/ec2-provider.sh @@ -121,20 +121,15 @@ cloud_Initialize() { region=$(__cloud_GetRegion "$zone") __cloud_SshPrivateKeyCheck - ( - set -x - aws ec2 delete-key-pair --region "$region" --key-name "$networkName" - aws ec2 import-key-pair --region "$region" --key-name "$networkName" \ - --public-key-material file://"${sshPrivateKey}".pub - ) + aws ec2 delete-key-pair --region "$region" --key-name "$networkName" + aws ec2 import-key-pair --region "$region" --key-name "$networkName" \ + --public-key-material file://"${sshPrivateKey}".pub - ( - set -x - aws ec2 delete-security-group --region "$region" --group-name "$networkName" || true - aws ec2 create-security-group --region "$region" --group-name "$networkName" --description "Created automatically by $0" - rules=$(cat "$(dirname "${BASH_SOURCE[0]}")"/ec2-security-group-config.json) - aws ec2 authorize-security-group-ingress --region "$region" --group-name "$networkName" --cli-input-json "$rules" - ) + declare rules + rules=$(cat "$(dirname "${BASH_SOURCE[0]}")"/ec2-security-group-config.json) + aws ec2 delete-security-group --region "$region" --group-name "$networkName" || true + aws ec2 create-security-group --region "$region" --group-name "$networkName" --description "Created automatically by $0" + aws ec2 authorize-security-group-ingress --output table --region "$region" --group-name "$networkName" --cli-input-json "$rules" } # @@ -273,7 +268,7 @@ cloud_CreateInstances() { ( set -x - aws ec2 run-instances "${args[@]}" + aws ec2 run-instances --output table "${args[@]}" ) if [[ -n $optionalAddress ]]; then @@ -318,7 +313,7 @@ cloud_DeleteInstances() { region=$(__cloud_GetRegion "$zone") ( set -x - aws ec2 terminate-instances --region "$region" --instance-ids "$name" + aws ec2 terminate-instances --output table --region "$region" --instance-ids "$name" ) done