diff --git a/net/README.md b/net/README.md index 22fd68d620..ecaf88494a 100644 --- a/net/README.md +++ b/net/README.md @@ -5,15 +5,30 @@ intended to be both dev and CD friendly. ### User Account Prerequisites -Log in to GCP with: +GCP and AWS are supported. + +#### GCP +First authenticate with ```bash $ gcloud auth login ``` -Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough -access to create a new database. +#### AWS +Obtain your credentials from the AWS IAM Console and configure the AWS CLI with +```bash +$ aws configure +``` +More information on AWS CLI configuration can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-quick-configuration) + +### Metrics configuration +Ensure that `$(whoami)` is the name of an InfluxDB user account with enough +access to create a new InfluxDB database. Ask mvines@ for help if needed. ## Quick Start + +NOTE: This example uses GCP. If you are using AWS, replace `./gce.sh` with +`./ec2.sh` in the commands. + ```bash $ cd net/ $ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here) @@ -32,6 +47,10 @@ network over public IP addresses: ```bash $ ./gce.sh create -P ... ``` +or +```bash +$ ./ec2.sh create -P ... +``` ### Deploying a Snap-based network To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master` @@ -46,6 +65,10 @@ First ensure the network instances are created with GPU enabled: ```bash $ ./gce.sh create -g ... ``` +or +```bash +$ ./ec2.sh create -g ... +``` If deploying a Snap-based network nothing further is required, as GPU presence is detected at runtime and the CUDA build is auto selected. @@ -58,9 +81,20 @@ $ ./net.sh start -f "cuda,erasure" ### How to interact with a CD testnet deployed by ci/testnet-deploy.sh +**AWS-Specific Extra Setup**: Follow the steps in `scripts/add-solana-user-authorized_keys.sh`, +then redeploy the testnet before continuing in this section. + Taking **master-testnet-solana-com** as an example, configure your workspace for the testnet using: -``` +```bash $ ./gce.sh config -p master-testnet-solana-com -$ ./ssh.sh # <-- Details on how to ssh into any testnet node +``` +or +```bash +$ ./ec2.sh config -p master-testnet-solana-com +``` + +Then run the following for details on how to ssh into any testnet node +```bash +$ ./ssh.sh ``` diff --git a/net/ec2.sh b/net/ec2.sh new file mode 120000 index 0000000000..91afe231f0 --- /dev/null +++ b/net/ec2.sh @@ -0,0 +1 @@ +gce.sh \ No newline at end of file diff --git a/net/gce.sh b/net/gce.sh index 04eb20bcd7..37b1d72a40 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -1,27 +1,44 @@ #!/bin/bash -e here=$(dirname "$0") -# shellcheck source=net/scripts/gcloud.sh -source "$here"/scripts/gcloud.sh # shellcheck source=net/common.sh source "$here"/common.sh +cloudProvider=$(basename "$0" .sh) +case $cloudProvider in +gce) + # shellcheck source=net/scripts/gce-provider.sh + source "$here"/scripts/gce-provider.sh + + imageName="ubuntu-16-04-cuda-9-2-new" + leaderMachineType=n1-standard-16 + validatorMachineType=n1-standard-4 + clientMachineType=n1-standard-16 + ;; +ec2) + # shellcheck source=net/scripts/ec2-provider.sh + source "$here"/scripts/ec2-provider.sh + + imageName="ami-04169656fea786776" + leaderMachineType=m4.4xlarge + validatorMachineType=m4.xlarge + clientMachineType=m4.4xlarge + ;; +*) + echo "Error: Unknown cloud provider: $cloudProvider" + ;; +esac + + prefix=testnet-dev-${USER//[^A-Za-z0-9]/} validatorNodeCount=5 clientNodeCount=1 -leaderBootDiskSize=1TB -leaderMachineType=n1-standard-16 -leaderAccelerator= -validatorMachineType=n1-standard-4 -validatorBootDiskSize=$leaderBootDiskSize -validatorAccelerator= -clientMachineType=n1-standard-16 -clientBootDiskSize=40GB -clientAccelerator= +leaderBootDiskSizeInGb=1000 +validatorBootDiskSizeInGb=$leaderBootDiskSizeInGb +clientBootDiskSizeInGb=40 -imageName="ubuntu-16-04-cuda-9-2-new" publicNetwork=false -zone="us-west1-b" +enableGpu=false leaderAddress= usage() { @@ -33,7 +50,7 @@ usage() { cat <> "$configFile" if [[ $arrayName = "leaderIp" ]]; then @@ -139,121 +187,133 @@ EOF waitForStartupComplete() { declare name="$1" - declare publicIp="$3" + declare publicIp="$2" echo "Waiting for $name to finish booting..." ( for i in $(seq 1 30); do - if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then + if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"); then break fi sleep 2 echo "Retry $i..." done ) + echo "$name has booted." } echo "Looking for leader instance..." - gcloud_FindInstances "name=$prefix-leader" show + cloud_FindInstance "$prefix-leader" [[ ${#instances[@]} -eq 1 ]] || { echo "Unable to find leader" exit 1 } - echo "Fetching $sshPrivateKey from $leaderName" ( - rm -rf "$sshPrivateKey"{,pub} - declare leaderName - declare leaderZone declare leaderIp - IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}") + IFS=: read -r leaderName leaderIp _ < <(echo "${instances[0]}") - set -x + # Try to ping the machine first. + timeout 60s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done" - # Try to ping the machine first. There can be a delay between when the - # instance is reported as RUNNING and when it's reachable over the network - timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done" + if [[ ! -r $sshPrivateKey ]]; then + echo "Fetching $sshPrivateKey from $leaderName" - # Try to scp in a couple times, sshd may not yet be up even though the - # machine can be pinged... - set -o pipefail - for i in $(seq 1 10); do - if gcloud compute scp --zone "$leaderZone" \ - "$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then - break - fi - sleep 1 - echo "Retry $i..." - done + # Try to scp in a couple times, sshd may not yet be up even though the + # machine can be pinged... + set -x -o pipefail + for i in $(seq 1 30); do + if cloud_FetchFile "$leaderName" "$leaderIp" /solana-id_ecdsa "$sshPrivateKey"; then + break + fi - chmod 400 "$sshPrivateKey" + sleep 1 + echo "Retry $i..." + done + + chmod 400 "$sshPrivateKey" + ls -l "$sshPrivateKey" + fi ) echo "leaderIp=()" >> "$configFile" - gcloud_ForEachInstance recordInstanceIp leaderIp - gcloud_ForEachInstance waitForStartupComplete + cloud_ForEachInstance recordInstanceIp leaderIp + cloud_ForEachInstance waitForStartupComplete echo "Looking for validator instances..." - gcloud_FindInstances "name~^$prefix-validator" show + cloud_FindInstances "$prefix-validator" [[ ${#instances[@]} -gt 0 ]] || { echo "Unable to find validators" exit 1 } echo "validatorIpList=()" >> "$configFile" - gcloud_ForEachInstance recordInstanceIp validatorIpList - gcloud_ForEachInstance waitForStartupComplete + cloud_ForEachInstance recordInstanceIp validatorIpList + cloud_ForEachInstance waitForStartupComplete echo "clientIpList=()" >> "$configFile" echo "Looking for client instances..." - gcloud_FindInstances "name~^$prefix-client" show + cloud_FindInstances "$prefix-client" [[ ${#instances[@]} -eq 0 ]] || { - gcloud_ForEachInstance recordInstanceIp clientIpList - gcloud_ForEachInstance waitForStartupComplete + cloud_ForEachInstance recordInstanceIp clientIpList + cloud_ForEachInstance waitForStartupComplete } echo "Wrote $configFile" $metricsWriteDatapoint "testnet-deploy net-config-complete=1" } -case $command in -delete) +delete() { $metricsWriteDatapoint "testnet-deploy net-delete-begin=1" # Delete the leader node first to prevent unusual metrics on the dashboard # during shutdown. # TODO: It would be better to fully cut-off metrics reporting before any # instances are deleted. - for filter in "^$prefix-leader" "^$prefix-"; do - gcloud_FindInstances "name~$filter" + for filter in "$prefix-leader" "$prefix-"; do + echo "Searching for instances: $filter" + cloud_FindInstances "$filter" if [[ ${#instances[@]} -eq 0 ]]; then echo "No instances found matching '$filter'" else - gcloud_DeleteInstances true + cloud_DeleteInstances true fi done rm -f "$configFile" $metricsWriteDatapoint "testnet-deploy net-delete-complete=1" + +} + +case $command in +delete) + delete ;; create) [[ -n $validatorNodeCount ]] || usage "Need number of nodes" + if [[ $validatorNodeCount -le 0 ]]; then + usage "One or more validator nodes is required" + fi + + delete $metricsWriteDatapoint "testnet-deploy net-create-begin=1" rm -rf "$sshPrivateKey"{,.pub} - ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey" + + # Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa + ssh-keygen -t rsa -N '' -f "$sshPrivateKey" printNetworkInfo() { cat < "$startupScript" < /etc/motd < /etc/motd < /solana-authorized_keys <> /home/solana/.ssh/authorized_keys +" diff --git a/net/scripts/ec2-provider.sh b/net/scripts/ec2-provider.sh new file mode 100644 index 0000000000..57700f3581 --- /dev/null +++ b/net/scripts/ec2-provider.sh @@ -0,0 +1,242 @@ +# |source| this file +# +# Utilities for working with EC2 instances +# + +zone= +region= + +cloud_SetZone() { + zone="$1" + # AWS region is zone with the last character removed + region="${zone:0:$((${#zone} - 1))}" +} + +# Set the default zone +cloud_SetZone "us-east-1b" + +# sshPrivateKey should be globally defined whenever this function is called. +# +# TODO: Remove usage of the sshPrivateKey global +__cloud_SshPrivateKeyCheck() { + # shellcheck disable=SC2154 + if [[ -z $sshPrivateKey ]]; then + echo Error: sshPrivateKey not defined + exit 1 + fi + if [[ ! -r $sshPrivateKey ]]; then + echo "Error: file is not readable: $sshPrivateKey" + exit 1 + fi +} + +# +# __cloud_FindInstances +# +# Find instances with name matching the specified pattern. +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:public IP:private IP" +# +# filter - The instances to filter on +# +# examples: +# $ __cloud_FindInstances "exact-machine-name" +# $ __cloud_FindInstances "all-machines-with-a-common-machine-prefix*" +# +__cloud_FindInstances() { + declare filter="$1" + + instances=() + declare name publicIp privateIp + while read -r name publicIp privateIp; do + printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp" + instances+=("$name:$publicIp:$privateIp") + done < <(aws ec2 describe-instances \ + --region "$region" \ + --filters \ + "Name=tag:name,Values=$filter" \ + "Name=instance-state-name,Values=pending,running" \ + --query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress]" \ + --output text + ) +} + +# +# cloud_FindInstances [namePrefix] +# +# Find instances with names matching the specified prefix +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:public IP:private IP" +# +# namePrefix - The instance name prefix to look for +# +# examples: +# $ cloud_FindInstances all-machines-with-a-common-machine-prefix +# +cloud_FindInstances() { + declare namePrefix="$1" + __cloud_FindInstances "$namePrefix*" +} + +# +# cloud_FindInstance [name] +# +# Find an instance with a name matching the exact pattern. +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:public IP:private IP" +# +# name - The instance name to look for +# +# examples: +# $ cloud_FindInstance exact-machine-name +# +cloud_FindInstance() { + declare name="$1" + __cloud_FindInstances "$name" +} + + +# +# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName] +# [machineType] [bootDiskSize] [enableGpu] +# [startupScript] [address] +# +# Creates one more identical instances. +# +# networkName - unique name of this testnet +# namePrefix - unique string to prefix all the instance names with +# numNodes - number of instances to create +# imageName - Disk image for the instances +# machineType - GCE machine type +# bootDiskSize - Optional size of the boot disk in GB +# enableGpu - Optionally enable GPU, use the value "true" to enable +# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80" +# startupScript - Optional startup script to execute when the instance boots +# address - Optional name of the GCE static IP address to attach to the +# instance. Requires that |numNodes| = 1 and that addressName +# has been provisioned in the GCE region that is hosting |zone| +# +# Tip: use cloud_FindInstances to locate the instances once this function +# returns +cloud_CreateInstances() { + declare networkName="$1" + declare namePrefix="$2" + declare numNodes="$3" + declare imageName="$4" + declare machineType="$5" + declare optionalBootDiskSize="$6" + declare optionalGpu="$7" + declare optionalStartupScript="$8" + declare optionalAddress="$9" + + __cloud_SshPrivateKeyCheck + ( + set -x + aws ec2 delete-key-pair --region "$region" --key-name "$networkName" + aws ec2 import-key-pair --region "$region" --key-name "$networkName" \ + --public-key-material file://"${sshPrivateKey}".pub + ) + + declare -a args + args=( + --key-name "$networkName" + --count "$numNodes" + --region "$region" + --placement "AvailabilityZone=$zone" + --security-groups testnet + --image-id "$imageName" + --instance-type "$machineType" + --tag-specifications "ResourceType=instance,Tags=[{Key=name,Value=$namePrefix}]" + ) + if [[ -n $optionalBootDiskSize ]]; then + args+=( + --block-device-mapping "[{\"DeviceName\": \"/dev/sda1\", \"Ebs\": { \"VolumeSize\": $optionalBootDiskSize }}]" + ) + fi + if [[ $optionalGpu = true ]]; then + echo TODO: GPU support not implemented yet + exit 1 + fi + if [[ -n $optionalStartupScript ]]; then + args+=( + --user-data "file://$optionalStartupScript" + ) + fi + + if [[ -n $optionalAddress ]]; then + [[ $numNodes = 1 ]] || { + echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress" + exit 1 + } + fi + + ( + set -x + aws ec2 run-instances "${args[@]}" + ) + + if [[ -n $optionalAddress ]]; then + cloud_FindInstance "$namePrefix" + if [[ ${#instances[@]} -ne 1 ]]; then + echo "Failed to find newly created instance: $namePrefix" + fi + + declare instanceId + IFS=: read -r instanceId _ < <(echo "${instances[0]}") + aws ec2 associate-address \ + --instance-id "$instanceId" \ + --region "region" \ + --allocation-id "$optionalAddress" + fi +} + +# +# cloud_DeleteInstances +# +# Deletes all the instances listed in the `instances` array +# +cloud_DeleteInstances() { + if [[ ${#instances[0]} -eq 0 ]]; then + echo No instances to delete + return + fi + declare names=("${instances[@]/:*/}") + ( + set -x + aws ec2 terminate-instances --region "$region" --instance-ids "${names[@]}" + ) +} + + +# +# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile] +# +# Fetch a file from the given instance. This function uses a cloud-specific +# mechanism to fetch the file +# +cloud_FetchFile() { + # shellcheck disable=SC2034 # instanceName is unused + declare instanceName="$1" + declare publicIp="$2" + declare remoteFile="$3" + declare localFile="$4" + + __cloud_SshPrivateKeyCheck + ( + set -x + scp \ + -o "StrictHostKeyChecking=no" \ + -o "UserKnownHostsFile=/dev/null" \ + -o "User=solana" \ + -o "IdentityFile=$sshPrivateKey" \ + -o "LogLevel=ERROR" \ + -F /dev/null \ + "solana@$publicIp:$remoteFile" "$localFile" + ) +} diff --git a/net/scripts/gce-provider.sh b/net/scripts/gce-provider.sh new file mode 100644 index 0000000000..b52ea81e90 --- /dev/null +++ b/net/scripts/gce-provider.sh @@ -0,0 +1,201 @@ +# |source| this file +# +# Utilities for working with GCE instances +# + +# Default zone +zone="us-west1-b" +cloud_SetZone() { + zone="$1" +} + + +# +# __cloud_FindInstances +# +# Find instances matching the specified pattern. +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:zone:public IP:private IP" +# +# filter - The instances to filter on +# +# examples: +# $ __cloud_FindInstances "name=exact-machine-name" +# $ __cloud_FindInstances "name~^all-machines-with-a-common-machine-prefix" +# +__cloud_FindInstances() { + declare filter="$1" + instances=() + + declare name zone publicIp privateIp status + while read -r name publicIp privateIp status; do + if [[ $status != RUNNING ]]; then + echo "Warning: $name is not RUNNING, ignoring it." + continue + fi + printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp" + + instances+=("$name:$publicIp:$privateIp") + done < <(gcloud compute instances list \ + --filter="$filter" \ + --format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)') +} +# +# cloud_FindInstances [namePrefix] +# +# Find instances with names matching the specified prefix +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:public IP:private IP" +# +# namePrefix - The instance name prefix to look for +# +# examples: +# $ cloud_FindInstances all-machines-with-a-common-machine-prefix +# +cloud_FindInstances() { + declare namePrefix="$1" + __cloud_FindInstances "name~^$namePrefix" +} + +# +# cloud_FindInstance [name] +# +# Find an instance with a name matching the exact pattern. +# +# For each matching instance, an entry in the `instances` array will be added with the +# following information about the instance: +# "name:public IP:private IP" +# +# name - The instance name to look for +# +# examples: +# $ cloud_FindInstance exact-machine-name +# +cloud_FindInstance() { + declare name="$1" + __cloud_FindInstances "name=$name" +} + +# +# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName] +# [machineType] [bootDiskSize] [enableGpu] +# [startupScript] [address] +# +# Creates one more identical instances. +# +# networkName - unique name of this testnet +# namePrefix - unique string to prefix all the instance names with +# numNodes - number of instances to create +# imageName - Disk image for the instances +# machineType - GCE machine type +# bootDiskSize - Optional size of the boot disk in GB +# enableGpu - Optionally enable GPU, use the value "true" to enable +# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80" +# startupScript - Optional startup script to execute when the instance boots +# address - Optional name of the GCE static IP address to attach to the +# instance. Requires that |numNodes| = 1 and that addressName +# has been provisioned in the GCE region that is hosting `$zone` +# +# Tip: use cloud_FindInstances to locate the instances once this function +# returns +cloud_CreateInstances() { + declare networkName="$1" + declare namePrefix="$2" + declare numNodes="$3" + declare imageName="$4" + declare machineType="$5" + declare optionalBootDiskSize="$6" + declare optionalGpu="$7" + declare optionalStartupScript="$8" + declare optionalAddress="$9" + + declare nodes + if [[ $numNodes = 1 ]]; then + nodes=("$namePrefix") + else + read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes") + fi + + declare -a args + args=( + "--zone=$zone" + "--tags=testnet" + "--metadata=testnet=$networkName" + "--image=$imageName" + "--machine-type=$machineType" + ) + if [[ -n $optionalBootDiskSize ]]; then + args+=( + "--boot-disk-size=${optionalBootDiskSize}GB" + ) + fi + if [[ $optionalGpu = true ]]; then + args+=( + "--accelerator=count=4,type=nvidia-tesla-k80" + --maintenance-policy TERMINATE + --restart-on-failure + ) + fi + if [[ -n $optionalStartupScript ]]; then + args+=( + --metadata-from-file "startup-script=$optionalStartupScript" + ) + fi + + if [[ -n $optionalAddress ]]; then + [[ $numNodes = 1 ]] || { + echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress" + exit 1 + } + args+=( + "--address=$optionalAddress" + ) + fi + + ( + set -x + gcloud beta compute instances create "${nodes[@]}" "${args[@]}" + ) +} + +# +# cloud_DeleteInstances +# +# Deletes all the instances listed in the `instances` array +# +cloud_DeleteInstances() { + if [[ ${#instances[0]} -eq 0 ]]; then + echo No instances to delete + return + fi + declare names=("${instances[@]/:*/}") + + ( + set -x + gcloud beta compute instances delete --zone "$zone" --quiet "${names[@]}" + ) +} + + +# +# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile] +# +# Fetch a file from the given instance. This function uses a cloud-specific +# mechanism to fetch the file +# +cloud_FetchFile() { + declare instanceName="$1" + # shellcheck disable=SC2034 # publicIp is unused + declare publicIp="$2" + declare remoteFile="$3" + declare localFile="$4" + + ( + set -x + gcloud compute scp --zone "$zone" "$instanceName:$remoteFile" "$localFile" + ) +} diff --git a/net/scripts/gcloud.sh b/net/scripts/gcloud.sh deleted file mode 100644 index d758b76d15..0000000000 --- a/net/scripts/gcloud.sh +++ /dev/null @@ -1,187 +0,0 @@ -# |source| this file -# -# Utilities for working with gcloud -# - - -# -# gcloud_FindInstances [filter] [options] -# -# Find instances matching the specified pattern. -# -# For each matching instance, an entry in the `instances` array will be added with the -# following information about the instance: -# "name:zone:public IP:private IP" -# -# filter - The instances to filter on -# options - If set to the string "show", the list of instances will be echoed -# to stdout -# -# examples: -# $ gcloud_FindInstances "name=exact-machine-name" -# $ gcloud_FindInstances "name~^all-machines-with-a-common-machine-prefix" -# -gcloud_FindInstances() { - declare filter="$1" - declare options="$2" - instances=() - - declare name zone publicIp privateIp status - while read -r name zone publicIp privateIp status; do - if [[ $status != RUNNING ]]; then - echo "Warning: $name is not RUNNING, ignoring it." - continue - fi - if [[ $options = show ]]; then - printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp" - fi - - instances+=("$name:$zone:$publicIp:$privateIp") - done < <(gcloud compute instances list \ - --filter="$filter" \ - --format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)') -} - -# -# gcloud_ForEachInstance [cmd] [extra args to cmd] -# -# Execute a command for each element in the `instances` array -# -# cmd - The command to execute on each instance -# The command will receive arguments followed by any -# additionl arguments supplied to gcloud_ForEachInstance: -# name - name of the instance -# zone - zone the instance is located in -# publicIp - The public IP address of this instance -# privateIp - The priate IP address of this instance -# count - Monotonically increasing count for each -# invocation of cmd, starting at 1 -# ... - Extra args to cmd.. -# -# -gcloud_ForEachInstance() { - declare cmd="$1" - shift - [[ -n $cmd ]] || { echo gcloud_ForEachInstance: cmd not specified; exit 1; } - - declare count=1 - for info in "${instances[@]}"; do - declare name zone publicIp privateIp - IFS=: read -r name zone publicIp privateIp < <(echo "$info") - - eval "$cmd" "$name" "$zone" "$publicIp" "$privateIp" "$count" "$@" - count=$((count + 1)) - done -} - -# -# gcloud_CreateInstances [namePrefix] [numNodes] [zone] [imageName] -# [machineType] [bootDiskSize] [accelerator] -# [startupScript] [address] -# -# Creates one more identical instances. -# -# namePrefix - unique string to prefix all the instance names with -# numNodes - number of instances to create -# zone - zone to create the instances in -# imageName - Disk image for the instances -# machineType - GCE machine type -# bootDiskSize - Optional disk of the boot disk -# accelerator - Optional accelerator to attach to the instance(s), see -# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80" -# startupScript - Optional startup script to execute when the instance boots -# address - Optional name of the GCE static IP address to attach to the -# instance. Requires that |numNodes| = 1 and that addressName -# has been provisioned in the GCE region that is hosting |zone| -# -# Tip: use gcloud_FindInstances to locate the instances once this function -# returns -gcloud_CreateInstances() { - declare namePrefix="$1" - declare numNodes="$2" - declare zone="$3" - declare imageName="$4" - declare machineType="$5" - declare optionalBootDiskSize="$6" - declare optionalAccelerator="$7" - declare optionalStartupScript="$8" - declare optionalAddress="$9" - - declare nodes - if [[ $numNodes = 1 ]]; then - nodes=("$namePrefix") - else - read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes") - fi - - declare -a args - args=( - "--zone=$zone" - "--tags=testnet" - "--image=$imageName" - "--machine-type=$machineType" - ) - if [[ -n $optionalBootDiskSize ]]; then - args+=( - "--boot-disk-size=$optionalBootDiskSize" - ) - fi - if [[ -n $optionalAccelerator ]]; then - args+=( - "--accelerator=$optionalAccelerator" - --maintenance-policy TERMINATE - --restart-on-failure - ) - fi - if [[ -n $optionalStartupScript ]]; then - args+=( - --metadata-from-file "startup-script=$optionalStartupScript" - ) - fi - - if [[ -n $optionalAddress ]]; then - [[ $numNodes = 1 ]] || { - echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress" - exit 1 - } - args+=( - "--address=$optionalAddress" - ) - fi - - ( - set -x - gcloud beta compute instances create "${nodes[@]}" "${args[@]}" - ) -} - -# -# gcloud_DeleteInstances [yes] -# -# Deletes all the instances listed in the `instances` array -# -# If yes = "true", skip the delete confirmation -# -gcloud_DeleteInstances() { - declare maybeQuiet= - if [[ $1 = true ]]; then - maybeQuiet=--quiet - fi - - if [[ ${#instances[0]} -eq 0 ]]; then - echo No instances to delete - return - fi - declare names=("${instances[@]/:*/}") - - # Assume all instances are in the same zone - # TODO: One day this assumption will be invalid - declare zone - IFS=: read -r _ zone _ < <(echo "${instances[0]}") - - ( - set -x - gcloud beta compute instances delete --zone "$zone" $maybeQuiet "${names[@]}" - ) -} -