Added support for multi-region cloud testnet
This commit is contained in:
parent
7d0ff8e713
commit
f0abd06a46
119
net/gce.sh
119
net/gce.sh
@ -47,6 +47,14 @@ publicNetwork=false
|
|||||||
enableGpu=false
|
enableGpu=false
|
||||||
customAddress=
|
customAddress=
|
||||||
leaderRotation=true
|
leaderRotation=true
|
||||||
|
zones=()
|
||||||
|
|
||||||
|
containsZone() {
|
||||||
|
local e match="$1"
|
||||||
|
shift
|
||||||
|
for e; do [[ "$e" == "$match" ]] && return 0; done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
exitcode=0
|
exitcode=0
|
||||||
@ -125,7 +133,7 @@ while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
|
|||||||
clientNodeCount=$OPTARG
|
clientNodeCount=$OPTARG
|
||||||
;;
|
;;
|
||||||
z)
|
z)
|
||||||
cloud_SetZone "$OPTARG"
|
containsZone "$OPTARG" "${zones[@]}" || zones+=("$OPTARG")
|
||||||
;;
|
;;
|
||||||
b)
|
b)
|
||||||
leaderRotation=false
|
leaderRotation=false
|
||||||
@ -156,6 +164,8 @@ while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
|
|||||||
done
|
done
|
||||||
shift $((OPTIND - 1))
|
shift $((OPTIND - 1))
|
||||||
|
|
||||||
|
[[ ${#zones[@]} -gt 0 ]] || zones+=($(cloud_DefaultZone))
|
||||||
|
|
||||||
[[ -z $1 ]] || usage "Unexpected argument: $1"
|
[[ -z $1 ]] || usage "Unexpected argument: $1"
|
||||||
if [[ $cloudProvider = ec2 ]]; then
|
if [[ $cloudProvider = ec2 ]]; then
|
||||||
# EC2 keys can't be retrieved from running instances like GCE keys can so save
|
# EC2 keys can't be retrieved from running instances like GCE keys can so save
|
||||||
@ -168,59 +178,8 @@ fi
|
|||||||
|
|
||||||
case $cloudProvider in
|
case $cloudProvider in
|
||||||
gce)
|
gce)
|
||||||
if $enableGpu; then
|
|
||||||
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
|
||||||
#
|
|
||||||
# TODO: Unfortunately this image is not public. When this becomes an issue,
|
|
||||||
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
|
|
||||||
# instance boots
|
|
||||||
#
|
|
||||||
imageName="ubuntu-1804-bionic-v20181029-with-cuda-10-and-cuda-9-2"
|
|
||||||
else
|
|
||||||
# Upstream Ubuntu 18.04 LTS image
|
|
||||||
imageName="ubuntu-1804-bionic-v20181029 --image-project ubuntu-os-cloud"
|
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
ec2)
|
ec2)
|
||||||
if $enableGpu; then
|
|
||||||
#
|
|
||||||
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
|
||||||
#
|
|
||||||
# TODO: Unfortunately these AMIs are not public. When this becomes an issue,
|
|
||||||
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
|
|
||||||
# instance boots
|
|
||||||
#
|
|
||||||
case $region in
|
|
||||||
us-east-1)
|
|
||||||
imageName="ami-0a8bd6fb204473f78"
|
|
||||||
;;
|
|
||||||
us-west-1)
|
|
||||||
imageName="ami-07011f0795513c59d"
|
|
||||||
;;
|
|
||||||
us-west-2)
|
|
||||||
imageName="ami-0a11ef42b62b82b68"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
usage "Unsupported region: $region"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
else
|
|
||||||
# Select an upstream Ubuntu 18.04 AMI from https://cloud-images.ubuntu.com/locator/ec2/
|
|
||||||
case $region in
|
|
||||||
us-east-1)
|
|
||||||
imageName="ami-0a313d6098716f372"
|
|
||||||
;;
|
|
||||||
us-west-1)
|
|
||||||
imageName="ami-06397100adf427136"
|
|
||||||
;;
|
|
||||||
us-west-2)
|
|
||||||
imageName="ami-0dc34f4b016c9ce49"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
usage "Unsupported region: $region"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Error: Unknown cloud provider: $cloudProvider"
|
echo "Error: Unknown cloud provider: $cloudProvider"
|
||||||
@ -313,7 +272,8 @@ EOF
|
|||||||
(
|
(
|
||||||
declare nodeName
|
declare nodeName
|
||||||
declare nodeIp
|
declare nodeIp
|
||||||
IFS=: read -r nodeName nodeIp _ < <(echo "${instances[0]}")
|
declare nodeZone
|
||||||
|
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
|
||||||
|
|
||||||
# Try to ping the machine first.
|
# Try to ping the machine first.
|
||||||
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
|
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
|
||||||
@ -325,7 +285,7 @@ EOF
|
|||||||
# machine can be pinged...
|
# machine can be pinged...
|
||||||
set -x -o pipefail
|
set -x -o pipefail
|
||||||
for i in $(seq 1 30); do
|
for i in $(seq 1 30); do
|
||||||
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -344,13 +304,15 @@ EOF
|
|||||||
cloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
|
||||||
echo "Looking for additional fullnode instances..."
|
echo "Looking for additional fullnode instances..."
|
||||||
cloud_FindInstances "$prefix-fullnode"
|
for zone in "${zones[@]}"; do
|
||||||
[[ ${#instances[@]} -gt 0 ]] || {
|
cloud_FindInstances "$prefix-$zone-fullnode"
|
||||||
echo "Unable to find additional fullnodes"
|
[[ ${#instances[@]} -gt 0 ]] || {
|
||||||
exit 1
|
echo "Unable to find additional fullnodes"
|
||||||
}
|
exit 1
|
||||||
cloud_ForEachInstance recordInstanceIp fullnodeIpList
|
}
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance recordInstanceIp fullnodeIpList
|
||||||
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
done
|
||||||
|
|
||||||
echo "clientIpList=()" >> "$configFile"
|
echo "clientIpList=()" >> "$configFile"
|
||||||
echo "clientIpListPrivate=()" >> "$configFile"
|
echo "clientIpListPrivate=()" >> "$configFile"
|
||||||
@ -381,7 +343,14 @@ delete() {
|
|||||||
# during shutdown (only applicable when leader rotation is disabled).
|
# during shutdown (only applicable when leader rotation is disabled).
|
||||||
# TODO: It would be better to fully cut-off metrics reporting before any
|
# TODO: It would be better to fully cut-off metrics reporting before any
|
||||||
# instances are deleted.
|
# instances are deleted.
|
||||||
for filter in "$prefix-bootstrap-leader" "$prefix-"; do
|
filters=("$prefix-bootstrap-leader")
|
||||||
|
for zone in "${zones[@]}"; do
|
||||||
|
filters+=("$prefix-$zone")
|
||||||
|
done
|
||||||
|
# Filter for all other nodes (client, blockstreamer)
|
||||||
|
filters+=("$prefix-")
|
||||||
|
|
||||||
|
for filter in "${filters[@]}"; do
|
||||||
echo "Searching for instances: $filter"
|
echo "Searching for instances: $filter"
|
||||||
cloud_FindInstances "$filter"
|
cloud_FindInstances "$filter"
|
||||||
|
|
||||||
@ -501,25 +470,37 @@ EOF
|
|||||||
bootstrapLeaderAddress=$customAddress
|
bootstrapLeaderAddress=$customAddress
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cloud_Initialize "$prefix"
|
for zone in "${zones[@]}"; do
|
||||||
|
cloud_Initialize "$prefix" "$zone"
|
||||||
|
done
|
||||||
|
|
||||||
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
||||||
"$imageName" "$bootstrapLeaderMachineType" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType"
|
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType"
|
||||||
|
|
||||||
cloud_CreateInstances "$prefix" "$prefix-fullnode" "$additionalFullNodeCount" \
|
num_zones=${#zones[@]}
|
||||||
"$imageName" "$fullNodeMachineType" "$fullNodeBootDiskSizeInGb" \
|
numNodesPerZone=$((additionalFullNodeCount / num_zones))
|
||||||
"$startupScript" "" "$bootDiskType"
|
numLeftOverNodes=$((additionalFullNodeCount % num_zones))
|
||||||
|
count=0
|
||||||
|
for zone in "${zones[@]}"; do
|
||||||
|
count=$((count + 1))
|
||||||
|
if [[ $count -eq $num_zones ]]; then
|
||||||
|
numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
|
||||||
|
fi
|
||||||
|
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
||||||
|
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
||||||
|
"$startupScript" "" "$bootDiskType"
|
||||||
|
done
|
||||||
|
|
||||||
if [[ $clientNodeCount -gt 0 ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
||||||
"$imageName" "$clientMachineType" "$clientBootDiskSizeInGb" \
|
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType"
|
"$startupScript" "" "$bootDiskType"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if $blockstreamer; then
|
if $blockstreamer; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
|
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
|
||||||
"$imageName" "$blockstreamerMachineType" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$blockstreamerAddress" "$bootDiskType"
|
"$startupScript" "$blockstreamerAddress" "$bootDiskType"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -3,17 +3,17 @@
|
|||||||
# Utilities for working with EC2 instances
|
# Utilities for working with EC2 instances
|
||||||
#
|
#
|
||||||
|
|
||||||
zone=
|
cloud_DefaultZone() {
|
||||||
region=
|
echo "us-east-1b"
|
||||||
|
|
||||||
cloud_SetZone() {
|
|
||||||
zone="$1"
|
|
||||||
# AWS region is zone with the last character removed
|
|
||||||
region="${zone:0:$((${#zone} - 1))}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set the default zone
|
# AWS region is zone with the last character removed
|
||||||
cloud_SetZone "us-east-1b"
|
__cloud_GetRegion() {
|
||||||
|
declare zone="$1"
|
||||||
|
# AWS region is zone with the last character removed
|
||||||
|
declare region="${zone:0:$((${#zone} - 1))}"
|
||||||
|
echo "$region"
|
||||||
|
}
|
||||||
|
|
||||||
# sshPrivateKey should be globally defined whenever this function is called.
|
# sshPrivateKey should be globally defined whenever this function is called.
|
||||||
#
|
#
|
||||||
@ -49,18 +49,22 @@ __cloud_FindInstances() {
|
|||||||
declare filter="$1"
|
declare filter="$1"
|
||||||
|
|
||||||
instances=()
|
instances=()
|
||||||
declare name publicIp privateIp
|
declare -a regions=("us-east-1" "us-west-1" "us-west-2")
|
||||||
while read -r name publicIp privateIp; do
|
for region in "${regions[@]}"
|
||||||
printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp"
|
do
|
||||||
instances+=("$name:$publicIp:$privateIp")
|
declare name publicIp privateIp
|
||||||
done < <(aws ec2 describe-instances \
|
while read -r name publicIp privateIp zone; do
|
||||||
--region "$region" \
|
printf "%-30s | publicIp=%-16s privateIp=%s zone=%s\n" "$name" "$publicIp" "$privateIp" "$zone"
|
||||||
--filters \
|
instances+=("$name:$publicIp:$privateIp:$zone")
|
||||||
"Name=tag:name,Values=$filter" \
|
done < <(aws ec2 describe-instances \
|
||||||
"Name=instance-state-name,Values=pending,running" \
|
--region "$region" \
|
||||||
--query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress]" \
|
--filters \
|
||||||
--output text \
|
"Name=tag:name,Values=$filter" \
|
||||||
)
|
"Name=instance-state-name,Values=pending,running" \
|
||||||
|
--query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress,Placement.AvailabilityZone]" \
|
||||||
|
--output text \
|
||||||
|
)
|
||||||
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -111,6 +115,8 @@ cloud_FindInstance() {
|
|||||||
# This function will be called before |cloud_CreateInstances|
|
# This function will be called before |cloud_CreateInstances|
|
||||||
cloud_Initialize() {
|
cloud_Initialize() {
|
||||||
declare networkName="$1"
|
declare networkName="$1"
|
||||||
|
declare zone="$2"
|
||||||
|
declare region=$(__cloud_GetRegion "$zone")
|
||||||
|
|
||||||
__cloud_SshPrivateKeyCheck
|
__cloud_SshPrivateKeyCheck
|
||||||
(
|
(
|
||||||
@ -152,11 +158,53 @@ cloud_CreateInstances() {
|
|||||||
declare networkName="$1"
|
declare networkName="$1"
|
||||||
declare namePrefix="$2"
|
declare namePrefix="$2"
|
||||||
declare numNodes="$3"
|
declare numNodes="$3"
|
||||||
declare imageName="$4"
|
declare enableGpu="$4"
|
||||||
declare machineType="$5"
|
declare machineType="$5"
|
||||||
declare optionalBootDiskSize="$6"
|
declare zone="$6"
|
||||||
declare optionalStartupScript="$7"
|
declare optionalBootDiskSize="$7"
|
||||||
declare optionalAddress="$8"
|
declare optionalStartupScript="$8"
|
||||||
|
declare optionalAddress="$9"
|
||||||
|
declare region=$(__cloud_GetRegion "$zone")
|
||||||
|
|
||||||
|
if $enableGpu; then
|
||||||
|
#
|
||||||
|
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
||||||
|
#
|
||||||
|
# TODO: Unfortunately these AMIs are not public. When this becomes an issue,
|
||||||
|
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
|
||||||
|
# instance boots
|
||||||
|
#
|
||||||
|
case $region in
|
||||||
|
us-east-1)
|
||||||
|
imageName="ami-0a8bd6fb204473f78"
|
||||||
|
;;
|
||||||
|
us-west-1)
|
||||||
|
imageName="ami-07011f0795513c59d"
|
||||||
|
;;
|
||||||
|
us-west-2)
|
||||||
|
imageName="ami-0a11ef42b62b82b68"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage "Unsupported region: $region"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
else
|
||||||
|
# Select an upstream Ubuntu 18.04 AMI from https://cloud-images.ubuntu.com/locator/ec2/
|
||||||
|
case $region in
|
||||||
|
us-east-1)
|
||||||
|
imageName="ami-0a313d6098716f372"
|
||||||
|
;;
|
||||||
|
us-west-1)
|
||||||
|
imageName="ami-06397100adf427136"
|
||||||
|
;;
|
||||||
|
us-west-2)
|
||||||
|
imageName="ami-0dc34f4b016c9ce49"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage "Unsupported region: $region"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
declare -a args
|
declare -a args
|
||||||
args=(
|
args=(
|
||||||
@ -225,6 +273,8 @@ cloud_DeleteInstances() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
declare names=("${instances[@]/:*/}")
|
declare names=("${instances[@]/:*/}")
|
||||||
|
declare zones=("${instances[@]/*:/}")
|
||||||
|
declare region=$(__cloud_GetRegion "${zones[0]}")
|
||||||
|
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
|
@ -4,12 +4,10 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
# Default zone
|
# Default zone
|
||||||
zone="us-west1-b"
|
cloud_DefaultZone() {
|
||||||
cloud_SetZone() {
|
echo "us-west1-b"
|
||||||
zone="$1"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# __cloud_FindInstances
|
# __cloud_FindInstances
|
||||||
#
|
#
|
||||||
@ -30,13 +28,13 @@ __cloud_FindInstances() {
|
|||||||
instances=()
|
instances=()
|
||||||
|
|
||||||
declare name zone publicIp privateIp status
|
declare name zone publicIp privateIp status
|
||||||
while read -r name publicIp privateIp status; do
|
while read -r name publicIp privateIp status zone; do
|
||||||
printf "%-30s | publicIp=%-16s privateIp=%s status=%s\n" "$name" "$publicIp" "$privateIp" "$status"
|
printf "%-30s | publicIp=%-16s privateIp=%s status=%s zone=%s\n" "$name" "$publicIp" "$privateIp" "$status" "$zone"
|
||||||
|
|
||||||
instances+=("$name:$publicIp:$privateIp")
|
instances+=("$name:$publicIp:$privateIp:$zone")
|
||||||
done < <(gcloud compute instances list \
|
done < <(gcloud compute instances list \
|
||||||
--filter "$filter" \
|
--filter "$filter" \
|
||||||
--format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
|
--format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status,zone)')
|
||||||
}
|
}
|
||||||
#
|
#
|
||||||
# cloud_FindInstances [namePrefix]
|
# cloud_FindInstances [namePrefix]
|
||||||
@ -119,12 +117,26 @@ cloud_CreateInstances() {
|
|||||||
declare networkName="$1"
|
declare networkName="$1"
|
||||||
declare namePrefix="$2"
|
declare namePrefix="$2"
|
||||||
declare numNodes="$3"
|
declare numNodes="$3"
|
||||||
declare imageName="$4"
|
declare enableGpu="$4"
|
||||||
declare machineType="$5"
|
declare machineType="$5"
|
||||||
declare optionalBootDiskSize="$6"
|
declare zone="$6"
|
||||||
declare optionalStartupScript="$7"
|
declare optionalBootDiskSize="$7"
|
||||||
declare optionalAddress="$8"
|
declare optionalStartupScript="$8"
|
||||||
declare optionalBootDiskType="$9"
|
declare optionalAddress="$9"
|
||||||
|
declare optionalBootDiskType="${10}"
|
||||||
|
|
||||||
|
if $enableGpu; then
|
||||||
|
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
||||||
|
#
|
||||||
|
# TODO: Unfortunately this image is not public. When this becomes an issue,
|
||||||
|
# use the stock Ubuntu 18.04 image and programmatically install CUDA after the
|
||||||
|
# instance boots
|
||||||
|
#
|
||||||
|
imageName="ubuntu-1804-bionic-v20181029-with-cuda-10-and-cuda-9-2"
|
||||||
|
else
|
||||||
|
# Upstream Ubuntu 18.04 LTS image
|
||||||
|
imageName="ubuntu-1804-bionic-v20181029 --image-project ubuntu-os-cloud"
|
||||||
|
fi
|
||||||
|
|
||||||
declare -a nodes
|
declare -a nodes
|
||||||
if [[ $numNodes = 1 ]]; then
|
if [[ $numNodes = 1 ]]; then
|
||||||
@ -192,11 +204,13 @@ cloud_DeleteInstances() {
|
|||||||
echo No instances to delete
|
echo No instances to delete
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
declare names=("${instances[@]/:*/}")
|
declare names=("${instances[@]/:*/}")
|
||||||
|
declare zones=("${instances[@]/*:/}")
|
||||||
|
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
gcloud beta compute instances delete --zone "$zone" --quiet "${names[@]}"
|
gcloud beta compute instances delete --zone "${zones[0]}" --quiet "${names[@]}"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -213,6 +227,7 @@ cloud_FetchFile() {
|
|||||||
declare publicIp="$2"
|
declare publicIp="$2"
|
||||||
declare remoteFile="$3"
|
declare remoteFile="$3"
|
||||||
declare localFile="$4"
|
declare localFile="$4"
|
||||||
|
declare zone="$5"
|
||||||
|
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
|
Loading…
x
Reference in New Issue
Block a user