Added support for multi-region cloud testnet

This commit is contained in:
Pankaj Garg 2019-03-27 22:19:55 +00:00 committed by Grimes
parent 7d0ff8e713
commit f0abd06a46
3 changed files with 154 additions and 108 deletions

View File

@ -47,6 +47,14 @@ publicNetwork=false
enableGpu=false enableGpu=false
customAddress= customAddress=
leaderRotation=true leaderRotation=true
zones=()
containsZone() {
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
usage() { usage() {
exitcode=0 exitcode=0
@ -125,7 +133,7 @@ while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
clientNodeCount=$OPTARG clientNodeCount=$OPTARG
;; ;;
z) z)
cloud_SetZone "$OPTARG" containsZone "$OPTARG" "${zones[@]}" || zones+=("$OPTARG")
;; ;;
b) b)
leaderRotation=false leaderRotation=false
@ -156,6 +164,8 @@ while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
done done
shift $((OPTIND - 1)) shift $((OPTIND - 1))
[[ ${#zones[@]} -gt 0 ]] || zones+=($(cloud_DefaultZone))
[[ -z $1 ]] || usage "Unexpected argument: $1" [[ -z $1 ]] || usage "Unexpected argument: $1"
if [[ $cloudProvider = ec2 ]]; then if [[ $cloudProvider = ec2 ]]; then
# EC2 keys can't be retrieved from running instances like GCE keys can so save # EC2 keys can't be retrieved from running instances like GCE keys can so save
@ -168,59 +178,8 @@ fi
case $cloudProvider in case $cloudProvider in
gce) gce)
if $enableGpu; then
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
#
# TODO: Unfortunately this image is not public. When this becomes an issue,
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
# instance boots
#
imageName="ubuntu-1804-bionic-v20181029-with-cuda-10-and-cuda-9-2"
else
# Upstream Ubuntu 18.04 LTS image
imageName="ubuntu-1804-bionic-v20181029 --image-project ubuntu-os-cloud"
fi
;; ;;
ec2) ec2)
if $enableGpu; then
#
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
#
# TODO: Unfortunately these AMIs are not public. When this becomes an issue,
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
# instance boots
#
case $region in
us-east-1)
imageName="ami-0a8bd6fb204473f78"
;;
us-west-1)
imageName="ami-07011f0795513c59d"
;;
us-west-2)
imageName="ami-0a11ef42b62b82b68"
;;
*)
usage "Unsupported region: $region"
;;
esac
else
# Select an upstream Ubuntu 18.04 AMI from https://cloud-images.ubuntu.com/locator/ec2/
case $region in
us-east-1)
imageName="ami-0a313d6098716f372"
;;
us-west-1)
imageName="ami-06397100adf427136"
;;
us-west-2)
imageName="ami-0dc34f4b016c9ce49"
;;
*)
usage "Unsupported region: $region"
;;
esac
fi
;; ;;
*) *)
echo "Error: Unknown cloud provider: $cloudProvider" echo "Error: Unknown cloud provider: $cloudProvider"
@ -313,7 +272,8 @@ EOF
( (
declare nodeName declare nodeName
declare nodeIp declare nodeIp
IFS=: read -r nodeName nodeIp _ < <(echo "${instances[0]}") declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
# Try to ping the machine first. # Try to ping the machine first.
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done" timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
@ -325,7 +285,7 @@ EOF
# machine can be pinged... # machine can be pinged...
set -x -o pipefail set -x -o pipefail
for i in $(seq 1 30); do for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey"; then if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break break
fi fi
@ -344,13 +304,15 @@ EOF
cloud_ForEachInstance waitForStartupComplete cloud_ForEachInstance waitForStartupComplete
echo "Looking for additional fullnode instances..." echo "Looking for additional fullnode instances..."
cloud_FindInstances "$prefix-fullnode" for zone in "${zones[@]}"; do
[[ ${#instances[@]} -gt 0 ]] || { cloud_FindInstances "$prefix-$zone-fullnode"
echo "Unable to find additional fullnodes" [[ ${#instances[@]} -gt 0 ]] || {
exit 1 echo "Unable to find additional fullnodes"
} exit 1
cloud_ForEachInstance recordInstanceIp fullnodeIpList }
cloud_ForEachInstance waitForStartupComplete cloud_ForEachInstance recordInstanceIp fullnodeIpList
cloud_ForEachInstance waitForStartupComplete
done
echo "clientIpList=()" >> "$configFile" echo "clientIpList=()" >> "$configFile"
echo "clientIpListPrivate=()" >> "$configFile" echo "clientIpListPrivate=()" >> "$configFile"
@ -381,7 +343,14 @@ delete() {
# during shutdown (only applicable when leader rotation is disabled). # during shutdown (only applicable when leader rotation is disabled).
# TODO: It would be better to fully cut-off metrics reporting before any # TODO: It would be better to fully cut-off metrics reporting before any
# instances are deleted. # instances are deleted.
for filter in "$prefix-bootstrap-leader" "$prefix-"; do filters=("$prefix-bootstrap-leader")
for zone in "${zones[@]}"; do
filters+=("$prefix-$zone")
done
# Filter for all other nodes (client, blockstreamer)
filters+=("$prefix-")
for filter in "${filters[@]}"; do
echo "Searching for instances: $filter" echo "Searching for instances: $filter"
cloud_FindInstances "$filter" cloud_FindInstances "$filter"
@ -501,25 +470,37 @@ EOF
bootstrapLeaderAddress=$customAddress bootstrapLeaderAddress=$customAddress
fi fi
cloud_Initialize "$prefix" for zone in "${zones[@]}"; do
cloud_Initialize "$prefix" "$zone"
done
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \ cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
"$imageName" "$bootstrapLeaderMachineType" "$fullNodeBootDiskSizeInGb" \ "$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$startupScript" "$bootstrapLeaderAddress" "$bootDiskType"
cloud_CreateInstances "$prefix" "$prefix-fullnode" "$additionalFullNodeCount" \ num_zones=${#zones[@]}
"$imageName" "$fullNodeMachineType" "$fullNodeBootDiskSizeInGb" \ numNodesPerZone=$((additionalFullNodeCount / num_zones))
"$startupScript" "" "$bootDiskType" numLeftOverNodes=$((additionalFullNodeCount % num_zones))
count=0
for zone in "${zones[@]}"; do
count=$((count + 1))
if [[ $count -eq $num_zones ]]; then
numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
fi
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
"$startupScript" "" "$bootDiskType"
done
if [[ $clientNodeCount -gt 0 ]]; then if [[ $clientNodeCount -gt 0 ]]; then
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \ cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
"$imageName" "$clientMachineType" "$clientBootDiskSizeInGb" \ "$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
"$startupScript" "" "$bootDiskType" "$startupScript" "" "$bootDiskType"
fi fi
if $blockstreamer; then if $blockstreamer; then
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \ cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
"$imageName" "$blockstreamerMachineType" "$fullNodeBootDiskSizeInGb" \ "$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
"$startupScript" "$blockstreamerAddress" "$bootDiskType" "$startupScript" "$blockstreamerAddress" "$bootDiskType"
fi fi

View File

@ -3,17 +3,17 @@
# Utilities for working with EC2 instances # Utilities for working with EC2 instances
# #
zone= cloud_DefaultZone() {
region= echo "us-east-1b"
cloud_SetZone() {
zone="$1"
# AWS region is zone with the last character removed
region="${zone:0:$((${#zone} - 1))}"
} }
# Set the default zone # AWS region is zone with the last character removed
cloud_SetZone "us-east-1b" __cloud_GetRegion() {
declare zone="$1"
# AWS region is zone with the last character removed
declare region="${zone:0:$((${#zone} - 1))}"
echo "$region"
}
# sshPrivateKey should be globally defined whenever this function is called. # sshPrivateKey should be globally defined whenever this function is called.
# #
@ -49,18 +49,22 @@ __cloud_FindInstances() {
declare filter="$1" declare filter="$1"
instances=() instances=()
declare name publicIp privateIp declare -a regions=("us-east-1" "us-west-1" "us-west-2")
while read -r name publicIp privateIp; do for region in "${regions[@]}"
printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp" do
instances+=("$name:$publicIp:$privateIp") declare name publicIp privateIp
done < <(aws ec2 describe-instances \ while read -r name publicIp privateIp zone; do
--region "$region" \ printf "%-30s | publicIp=%-16s privateIp=%s zone=%s\n" "$name" "$publicIp" "$privateIp" "$zone"
--filters \ instances+=("$name:$publicIp:$privateIp:$zone")
"Name=tag:name,Values=$filter" \ done < <(aws ec2 describe-instances \
"Name=instance-state-name,Values=pending,running" \ --region "$region" \
--query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress]" \ --filters \
--output text \ "Name=tag:name,Values=$filter" \
) "Name=instance-state-name,Values=pending,running" \
--query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress,Placement.AvailabilityZone]" \
--output text \
)
done
} }
# #
@ -111,6 +115,8 @@ cloud_FindInstance() {
# This function will be called before |cloud_CreateInstances| # This function will be called before |cloud_CreateInstances|
cloud_Initialize() { cloud_Initialize() {
declare networkName="$1" declare networkName="$1"
declare zone="$2"
declare region=$(__cloud_GetRegion "$zone")
__cloud_SshPrivateKeyCheck __cloud_SshPrivateKeyCheck
( (
@ -152,11 +158,53 @@ cloud_CreateInstances() {
declare networkName="$1" declare networkName="$1"
declare namePrefix="$2" declare namePrefix="$2"
declare numNodes="$3" declare numNodes="$3"
declare imageName="$4" declare enableGpu="$4"
declare machineType="$5" declare machineType="$5"
declare optionalBootDiskSize="$6" declare zone="$6"
declare optionalStartupScript="$7" declare optionalBootDiskSize="$7"
declare optionalAddress="$8" declare optionalStartupScript="$8"
declare optionalAddress="$9"
declare region=$(__cloud_GetRegion "$zone")
if $enableGpu; then
#
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
#
# TODO: Unfortunately these AMIs are not public. When this becomes an issue,
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
# instance boots
#
case $region in
us-east-1)
imageName="ami-0a8bd6fb204473f78"
;;
us-west-1)
imageName="ami-07011f0795513c59d"
;;
us-west-2)
imageName="ami-0a11ef42b62b82b68"
;;
*)
usage "Unsupported region: $region"
;;
esac
else
# Select an upstream Ubuntu 18.04 AMI from https://cloud-images.ubuntu.com/locator/ec2/
case $region in
us-east-1)
imageName="ami-0a313d6098716f372"
;;
us-west-1)
imageName="ami-06397100adf427136"
;;
us-west-2)
imageName="ami-0dc34f4b016c9ce49"
;;
*)
usage "Unsupported region: $region"
;;
esac
fi
declare -a args declare -a args
args=( args=(
@ -225,6 +273,8 @@ cloud_DeleteInstances() {
fi fi
declare names=("${instances[@]/:*/}") declare names=("${instances[@]/:*/}")
declare zones=("${instances[@]/*:/}")
declare region=$(__cloud_GetRegion "${zones[0]}")
( (
set -x set -x

View File

@ -4,12 +4,10 @@
# #
# Default zone # Default zone
zone="us-west1-b" cloud_DefaultZone() {
cloud_SetZone() { echo "us-west1-b"
zone="$1"
} }
# #
# __cloud_FindInstances # __cloud_FindInstances
# #
@ -30,13 +28,13 @@ __cloud_FindInstances() {
instances=() instances=()
declare name zone publicIp privateIp status declare name zone publicIp privateIp status
while read -r name publicIp privateIp status; do while read -r name publicIp privateIp status zone; do
printf "%-30s | publicIp=%-16s privateIp=%s status=%s\n" "$name" "$publicIp" "$privateIp" "$status" printf "%-30s | publicIp=%-16s privateIp=%s status=%s zone=%s\n" "$name" "$publicIp" "$privateIp" "$status" "$zone"
instances+=("$name:$publicIp:$privateIp") instances+=("$name:$publicIp:$privateIp:$zone")
done < <(gcloud compute instances list \ done < <(gcloud compute instances list \
--filter "$filter" \ --filter "$filter" \
--format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)') --format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status,zone)')
} }
# #
# cloud_FindInstances [namePrefix] # cloud_FindInstances [namePrefix]
@ -119,12 +117,26 @@ cloud_CreateInstances() {
declare networkName="$1" declare networkName="$1"
declare namePrefix="$2" declare namePrefix="$2"
declare numNodes="$3" declare numNodes="$3"
declare imageName="$4" declare enableGpu="$4"
declare machineType="$5" declare machineType="$5"
declare optionalBootDiskSize="$6" declare zone="$6"
declare optionalStartupScript="$7" declare optionalBootDiskSize="$7"
declare optionalAddress="$8" declare optionalStartupScript="$8"
declare optionalBootDiskType="$9" declare optionalAddress="$9"
declare optionalBootDiskType="${10}"
if $enableGpu; then
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
#
# TODO: Unfortunately this image is not public. When this becomes an issue,
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
# instance boots
#
imageName="ubuntu-1804-bionic-v20181029-with-cuda-10-and-cuda-9-2"
else
# Upstream Ubuntu 18.04 LTS image
imageName="ubuntu-1804-bionic-v20181029 --image-project ubuntu-os-cloud"
fi
declare -a nodes declare -a nodes
if [[ $numNodes = 1 ]]; then if [[ $numNodes = 1 ]]; then
@ -192,11 +204,13 @@ cloud_DeleteInstances() {
echo No instances to delete echo No instances to delete
return return
fi fi
declare names=("${instances[@]/:*/}") declare names=("${instances[@]/:*/}")
declare zones=("${instances[@]/*:/}")
( (
set -x set -x
gcloud beta compute instances delete --zone "$zone" --quiet "${names[@]}" gcloud beta compute instances delete --zone "${zones[0]}" --quiet "${names[@]}"
) )
} }
@ -213,6 +227,7 @@ cloud_FetchFile() {
declare publicIp="$2" declare publicIp="$2"
declare remoteFile="$3" declare remoteFile="$3"
declare localFile="$4" declare localFile="$4"
declare zone="$5"
( (
set -x set -x