Add AWS EC2 support
This commit is contained in:
@ -5,15 +5,30 @@ intended to be both dev and CD friendly.
|
|||||||
|
|
||||||
### User Account Prerequisites
|
### User Account Prerequisites
|
||||||
|
|
||||||
Log in to GCP with:
|
GCP and AWS are supported.
|
||||||
|
|
||||||
|
#### GCP
|
||||||
|
First authenticate with
|
||||||
```bash
|
```bash
|
||||||
$ gcloud auth login
|
$ gcloud auth login
|
||||||
```
|
```
|
||||||
|
|
||||||
Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough
|
#### AWS
|
||||||
access to create a new database.
|
Obtain your credentials from the AWS IAM Console and configure the AWS CLI with
|
||||||
|
```bash
|
||||||
|
$ aws configure
|
||||||
|
```
|
||||||
|
More information on AWS CLI configuration can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-quick-configuration)
|
||||||
|
|
||||||
|
### Metrics configuration
|
||||||
|
Ensure that `$(whoami)` is the name of an InfluxDB user account with enough
|
||||||
|
access to create a new InfluxDB database. Ask mvines@ for help if needed.
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
NOTE: This example uses GCP. If you are using AWS, replace `./gce.sh` with
|
||||||
|
`./ec2.sh` in the commands.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd net/
|
$ cd net/
|
||||||
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
|
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
|
||||||
@ -32,6 +47,10 @@ network over public IP addresses:
|
|||||||
```bash
|
```bash
|
||||||
$ ./gce.sh create -P ...
|
$ ./gce.sh create -P ...
|
||||||
```
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
$ ./ec2.sh create -P ...
|
||||||
|
```
|
||||||
|
|
||||||
### Deploying a Snap-based network
|
### Deploying a Snap-based network
|
||||||
To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
|
To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
|
||||||
@ -46,6 +65,10 @@ First ensure the network instances are created with GPU enabled:
|
|||||||
```bash
|
```bash
|
||||||
$ ./gce.sh create -g ...
|
$ ./gce.sh create -g ...
|
||||||
```
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
$ ./ec2.sh create -g ...
|
||||||
|
```
|
||||||
|
|
||||||
If deploying a Snap-based network nothing further is required, as GPU presence
|
If deploying a Snap-based network nothing further is required, as GPU presence
|
||||||
is detected at runtime and the CUDA build is auto selected.
|
is detected at runtime and the CUDA build is auto selected.
|
||||||
@ -58,9 +81,20 @@ $ ./net.sh start -f "cuda,erasure"
|
|||||||
|
|
||||||
### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
|
### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
|
||||||
|
|
||||||
|
**AWS-Specific Extra Setup**: Follow the steps in `scripts/add-solana-user-authorized_keys.sh`,
|
||||||
|
then redeploy the testnet before continuing in this section.
|
||||||
|
|
||||||
Taking **master-testnet-solana-com** as an example, configure your workspace for
|
Taking **master-testnet-solana-com** as an example, configure your workspace for
|
||||||
the testnet using:
|
the testnet using:
|
||||||
```
|
```bash
|
||||||
$ ./gce.sh config -p master-testnet-solana-com
|
$ ./gce.sh config -p master-testnet-solana-com
|
||||||
$ ./ssh.sh # <-- Details on how to ssh into any testnet node
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
$ ./ec2.sh config -p master-testnet-solana-com
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the following for details on how to ssh into any testnet node
|
||||||
|
```bash
|
||||||
|
$ ./ssh.sh
|
||||||
```
|
```
|
||||||
|
1
net/ec2.sh
Symbolic link
1
net/ec2.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
gce.sh
|
220
net/gce.sh
220
net/gce.sh
@ -1,27 +1,44 @@
|
|||||||
#!/bin/bash -e
|
#!/bin/bash -e
|
||||||
|
|
||||||
here=$(dirname "$0")
|
here=$(dirname "$0")
|
||||||
# shellcheck source=net/scripts/gcloud.sh
|
|
||||||
source "$here"/scripts/gcloud.sh
|
|
||||||
# shellcheck source=net/common.sh
|
# shellcheck source=net/common.sh
|
||||||
source "$here"/common.sh
|
source "$here"/common.sh
|
||||||
|
|
||||||
|
cloudProvider=$(basename "$0" .sh)
|
||||||
|
case $cloudProvider in
|
||||||
|
gce)
|
||||||
|
# shellcheck source=net/scripts/gce-provider.sh
|
||||||
|
source "$here"/scripts/gce-provider.sh
|
||||||
|
|
||||||
|
imageName="ubuntu-16-04-cuda-9-2-new"
|
||||||
|
leaderMachineType=n1-standard-16
|
||||||
|
validatorMachineType=n1-standard-4
|
||||||
|
clientMachineType=n1-standard-16
|
||||||
|
;;
|
||||||
|
ec2)
|
||||||
|
# shellcheck source=net/scripts/ec2-provider.sh
|
||||||
|
source "$here"/scripts/ec2-provider.sh
|
||||||
|
|
||||||
|
imageName="ami-04169656fea786776"
|
||||||
|
leaderMachineType=m4.4xlarge
|
||||||
|
validatorMachineType=m4.xlarge
|
||||||
|
clientMachineType=m4.4xlarge
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown cloud provider: $cloudProvider"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
|
||||||
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
|
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
|
||||||
validatorNodeCount=5
|
validatorNodeCount=5
|
||||||
clientNodeCount=1
|
clientNodeCount=1
|
||||||
leaderBootDiskSize=1TB
|
leaderBootDiskSizeInGb=1000
|
||||||
leaderMachineType=n1-standard-16
|
validatorBootDiskSizeInGb=$leaderBootDiskSizeInGb
|
||||||
leaderAccelerator=
|
clientBootDiskSizeInGb=40
|
||||||
validatorMachineType=n1-standard-4
|
|
||||||
validatorBootDiskSize=$leaderBootDiskSize
|
|
||||||
validatorAccelerator=
|
|
||||||
clientMachineType=n1-standard-16
|
|
||||||
clientBootDiskSize=40GB
|
|
||||||
clientAccelerator=
|
|
||||||
|
|
||||||
imageName="ubuntu-16-04-cuda-9-2-new"
|
|
||||||
publicNetwork=false
|
publicNetwork=false
|
||||||
zone="us-west1-b"
|
enableGpu=false
|
||||||
leaderAddress=
|
leaderAddress=
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
@ -33,7 +50,7 @@ usage() {
|
|||||||
cat <<EOF
|
cat <<EOF
|
||||||
usage: $0 [create|config|delete] [common options] [command-specific options]
|
usage: $0 [create|config|delete] [common options] [command-specific options]
|
||||||
|
|
||||||
Configure a GCE-based testnet
|
Manage testnet instances
|
||||||
|
|
||||||
create - create a new testnet (implies 'config')
|
create - create a new testnet (implies 'config')
|
||||||
config - configure the testnet and write a config file describing it
|
config - configure the testnet and write a config file describing it
|
||||||
@ -47,10 +64,13 @@ Configure a GCE-based testnet
|
|||||||
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
||||||
-c [number] - Number of client nodes (default: $clientNodeCount)
|
-c [number] - Number of client nodes (default: $clientNodeCount)
|
||||||
-P - Use public network IP addresses (default: $publicNetwork)
|
-P - Use public network IP addresses (default: $publicNetwork)
|
||||||
-z [zone] - GCP Zone for the nodes (default: $zone)
|
-z [zone] - Zone for the nodes (default: $zone)
|
||||||
-i [imageName] - Existing image on GCE (default: $imageName)
|
-g - Enable GPU (default: $enableGpu)
|
||||||
-g - Enable GPU
|
-a [address] - Set the leader node's external IP address to this value.
|
||||||
-a [address] - Set the leader node's external IP address to this GCE address
|
For GCE, [address] is the "name" of the desired External
|
||||||
|
IP Address.
|
||||||
|
For EC2, [address] is the "allocation ID" of the desired
|
||||||
|
Elastic IP.
|
||||||
|
|
||||||
config-specific options:
|
config-specific options:
|
||||||
none
|
none
|
||||||
@ -68,7 +88,7 @@ command=$1
|
|||||||
shift
|
shift
|
||||||
[[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
|
[[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
|
||||||
|
|
||||||
while getopts "h?p:Pi:n:c:z:ga:" opt; do
|
while getopts "h?p:Pn:c:z:ga:" opt; do
|
||||||
case $opt in
|
case $opt in
|
||||||
h | \?)
|
h | \?)
|
||||||
usage
|
usage
|
||||||
@ -80,9 +100,6 @@ while getopts "h?p:Pi:n:c:z:ga:" opt; do
|
|||||||
P)
|
P)
|
||||||
publicNetwork=true
|
publicNetwork=true
|
||||||
;;
|
;;
|
||||||
i)
|
|
||||||
imageName=$OPTARG
|
|
||||||
;;
|
|
||||||
n)
|
n)
|
||||||
validatorNodeCount=$OPTARG
|
validatorNodeCount=$OPTARG
|
||||||
;;
|
;;
|
||||||
@ -90,10 +107,10 @@ while getopts "h?p:Pi:n:c:z:ga:" opt; do
|
|||||||
clientNodeCount=$OPTARG
|
clientNodeCount=$OPTARG
|
||||||
;;
|
;;
|
||||||
z)
|
z)
|
||||||
zone=$OPTARG
|
cloud_SetZone "$OPTARG"
|
||||||
;;
|
;;
|
||||||
g)
|
g)
|
||||||
leaderAccelerator="count=4,type=nvidia-tesla-k80"
|
enableGpu=true
|
||||||
;;
|
;;
|
||||||
a)
|
a)
|
||||||
leaderAddress=$OPTARG
|
leaderAddress=$OPTARG
|
||||||
@ -108,6 +125,37 @@ shift $((OPTIND - 1))
|
|||||||
[[ -z $1 ]] || usage "Unexpected argument: $1"
|
[[ -z $1 ]] || usage "Unexpected argument: $1"
|
||||||
sshPrivateKey="$netConfigDir/id_$prefix"
|
sshPrivateKey="$netConfigDir/id_$prefix"
|
||||||
|
|
||||||
|
|
||||||
|
# cloud_ForEachInstance [cmd] [extra args to cmd]
|
||||||
|
#
|
||||||
|
# Execute a command for each element in the `instances` array
|
||||||
|
#
|
||||||
|
# cmd - The command to execute on each instance
|
||||||
|
# The command will receive arguments followed by any
|
||||||
|
# additionl arguments supplied to cloud_ForEachInstance:
|
||||||
|
# name - name of the instance
|
||||||
|
# publicIp - The public IP address of this instance
|
||||||
|
# privateIp - The priate IP address of this instance
|
||||||
|
# count - Monotonically increasing count for each
|
||||||
|
# invocation of cmd, starting at 1
|
||||||
|
# ... - Extra args to cmd..
|
||||||
|
#
|
||||||
|
#
|
||||||
|
cloud_ForEachInstance() {
|
||||||
|
declare cmd="$1"
|
||||||
|
shift
|
||||||
|
[[ -n $cmd ]] || { echo cloud_ForEachInstance: cmd not specified; exit 1; }
|
||||||
|
|
||||||
|
declare count=1
|
||||||
|
for info in "${instances[@]}"; do
|
||||||
|
declare name publicIp privateIp
|
||||||
|
IFS=: read -r name publicIp privateIp < <(echo "$info")
|
||||||
|
|
||||||
|
eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@"
|
||||||
|
count=$((count + 1))
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
prepareInstancesAndWriteConfigFile() {
|
prepareInstancesAndWriteConfigFile() {
|
||||||
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
|
||||||
|
|
||||||
@ -122,10 +170,10 @@ EOF
|
|||||||
|
|
||||||
recordInstanceIp() {
|
recordInstanceIp() {
|
||||||
declare name="$1"
|
declare name="$1"
|
||||||
declare publicIp="$3"
|
declare publicIp="$2"
|
||||||
declare privateIp="$4"
|
declare privateIp="$3"
|
||||||
|
|
||||||
declare arrayName="$6"
|
declare arrayName="$5"
|
||||||
|
|
||||||
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
||||||
if [[ $arrayName = "leaderIp" ]]; then
|
if [[ $arrayName = "leaderIp" ]]; then
|
||||||
@ -139,121 +187,133 @@ EOF
|
|||||||
|
|
||||||
waitForStartupComplete() {
|
waitForStartupComplete() {
|
||||||
declare name="$1"
|
declare name="$1"
|
||||||
declare publicIp="$3"
|
declare publicIp="$2"
|
||||||
|
|
||||||
echo "Waiting for $name to finish booting..."
|
echo "Waiting for $name to finish booting..."
|
||||||
(
|
(
|
||||||
for i in $(seq 1 30); do
|
for i in $(seq 1 30); do
|
||||||
if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
|
if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"); then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
sleep 2
|
sleep 2
|
||||||
echo "Retry $i..."
|
echo "Retry $i..."
|
||||||
done
|
done
|
||||||
)
|
)
|
||||||
|
echo "$name has booted."
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Looking for leader instance..."
|
echo "Looking for leader instance..."
|
||||||
gcloud_FindInstances "name=$prefix-leader" show
|
cloud_FindInstance "$prefix-leader"
|
||||||
[[ ${#instances[@]} -eq 1 ]] || {
|
[[ ${#instances[@]} -eq 1 ]] || {
|
||||||
echo "Unable to find leader"
|
echo "Unable to find leader"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Fetching $sshPrivateKey from $leaderName"
|
|
||||||
(
|
(
|
||||||
rm -rf "$sshPrivateKey"{,pub}
|
|
||||||
|
|
||||||
declare leaderName
|
declare leaderName
|
||||||
declare leaderZone
|
|
||||||
declare leaderIp
|
declare leaderIp
|
||||||
IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
|
IFS=: read -r leaderName leaderIp _ < <(echo "${instances[0]}")
|
||||||
|
|
||||||
set -x
|
# Try to ping the machine first.
|
||||||
|
timeout 60s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
|
||||||
|
|
||||||
# Try to ping the machine first. There can be a delay between when the
|
if [[ ! -r $sshPrivateKey ]]; then
|
||||||
# instance is reported as RUNNING and when it's reachable over the network
|
echo "Fetching $sshPrivateKey from $leaderName"
|
||||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
|
|
||||||
|
|
||||||
# Try to scp in a couple times, sshd may not yet be up even though the
|
# Try to scp in a couple times, sshd may not yet be up even though the
|
||||||
# machine can be pinged...
|
# machine can be pinged...
|
||||||
set -o pipefail
|
set -x -o pipefail
|
||||||
for i in $(seq 1 10); do
|
for i in $(seq 1 30); do
|
||||||
if gcloud compute scp --zone "$leaderZone" \
|
if cloud_FetchFile "$leaderName" "$leaderIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
||||||
"$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
|
break
|
||||||
break
|
fi
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
echo "Retry $i..."
|
|
||||||
done
|
|
||||||
|
|
||||||
chmod 400 "$sshPrivateKey"
|
sleep 1
|
||||||
|
echo "Retry $i..."
|
||||||
|
done
|
||||||
|
|
||||||
|
chmod 400 "$sshPrivateKey"
|
||||||
|
ls -l "$sshPrivateKey"
|
||||||
|
fi
|
||||||
)
|
)
|
||||||
|
|
||||||
echo "leaderIp=()" >> "$configFile"
|
echo "leaderIp=()" >> "$configFile"
|
||||||
gcloud_ForEachInstance recordInstanceIp leaderIp
|
cloud_ForEachInstance recordInstanceIp leaderIp
|
||||||
gcloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
|
||||||
echo "Looking for validator instances..."
|
echo "Looking for validator instances..."
|
||||||
gcloud_FindInstances "name~^$prefix-validator" show
|
cloud_FindInstances "$prefix-validator"
|
||||||
[[ ${#instances[@]} -gt 0 ]] || {
|
[[ ${#instances[@]} -gt 0 ]] || {
|
||||||
echo "Unable to find validators"
|
echo "Unable to find validators"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
echo "validatorIpList=()" >> "$configFile"
|
echo "validatorIpList=()" >> "$configFile"
|
||||||
gcloud_ForEachInstance recordInstanceIp validatorIpList
|
cloud_ForEachInstance recordInstanceIp validatorIpList
|
||||||
gcloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
|
||||||
echo "clientIpList=()" >> "$configFile"
|
echo "clientIpList=()" >> "$configFile"
|
||||||
echo "Looking for client instances..."
|
echo "Looking for client instances..."
|
||||||
gcloud_FindInstances "name~^$prefix-client" show
|
cloud_FindInstances "$prefix-client"
|
||||||
[[ ${#instances[@]} -eq 0 ]] || {
|
[[ ${#instances[@]} -eq 0 ]] || {
|
||||||
gcloud_ForEachInstance recordInstanceIp clientIpList
|
cloud_ForEachInstance recordInstanceIp clientIpList
|
||||||
gcloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Wrote $configFile"
|
echo "Wrote $configFile"
|
||||||
$metricsWriteDatapoint "testnet-deploy net-config-complete=1"
|
$metricsWriteDatapoint "testnet-deploy net-config-complete=1"
|
||||||
}
|
}
|
||||||
|
|
||||||
case $command in
|
delete() {
|
||||||
delete)
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
|
||||||
|
|
||||||
# Delete the leader node first to prevent unusual metrics on the dashboard
|
# Delete the leader node first to prevent unusual metrics on the dashboard
|
||||||
# during shutdown.
|
# during shutdown.
|
||||||
# TODO: It would be better to fully cut-off metrics reporting before any
|
# TODO: It would be better to fully cut-off metrics reporting before any
|
||||||
# instances are deleted.
|
# instances are deleted.
|
||||||
for filter in "^$prefix-leader" "^$prefix-"; do
|
for filter in "$prefix-leader" "$prefix-"; do
|
||||||
gcloud_FindInstances "name~$filter"
|
echo "Searching for instances: $filter"
|
||||||
|
cloud_FindInstances "$filter"
|
||||||
|
|
||||||
if [[ ${#instances[@]} -eq 0 ]]; then
|
if [[ ${#instances[@]} -eq 0 ]]; then
|
||||||
echo "No instances found matching '$filter'"
|
echo "No instances found matching '$filter'"
|
||||||
else
|
else
|
||||||
gcloud_DeleteInstances true
|
cloud_DeleteInstances true
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
rm -f "$configFile"
|
rm -f "$configFile"
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
|
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
case $command in
|
||||||
|
delete)
|
||||||
|
delete
|
||||||
;;
|
;;
|
||||||
|
|
||||||
create)
|
create)
|
||||||
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
|
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
|
||||||
|
if [[ $validatorNodeCount -le 0 ]]; then
|
||||||
|
usage "One or more validator nodes is required"
|
||||||
|
fi
|
||||||
|
|
||||||
|
delete
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
|
||||||
|
|
||||||
rm -rf "$sshPrivateKey"{,.pub}
|
rm -rf "$sshPrivateKey"{,.pub}
|
||||||
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
|
|
||||||
|
# Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa
|
||||||
|
ssh-keygen -t rsa -N '' -f "$sshPrivateKey"
|
||||||
|
|
||||||
printNetworkInfo() {
|
printNetworkInfo() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
========================================================================================
|
========================================================================================
|
||||||
|
|
||||||
Network composition:
|
Network composition:
|
||||||
Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})
|
Leader = $leaderMachineType (GPU=$enableGpu)
|
||||||
Validators = $validatorNodeCount x $validatorMachineType (GPU=${validatorAccelerator:-none})
|
Validators = $validatorNodeCount x $validatorMachineType
|
||||||
Client(s) = $clientNodeCount x $clientMachineType (GPU=${clientAccelerator:-none})
|
Client(s) = $clientNodeCount x $clientMachineType
|
||||||
|
|
||||||
========================================================================================
|
========================================================================================
|
||||||
|
|
||||||
@ -261,7 +321,7 @@ EOF
|
|||||||
}
|
}
|
||||||
printNetworkInfo
|
printNetworkInfo
|
||||||
|
|
||||||
declare startupScript="$netConfigDir"/gce-startup-script.sh
|
declare startupScript="$netConfigDir"/instance-startup-script.sh
|
||||||
cat > "$startupScript" <<EOF
|
cat > "$startupScript" <<EOF
|
||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
# autogenerated at $(date)
|
# autogenerated at $(date)
|
||||||
@ -270,11 +330,12 @@ cat > /etc/motd <<EOM
|
|||||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
|
|
||||||
This instance has not been fully configured.
|
This instance has not been fully configured.
|
||||||
See "startup-script" log messages in /var/log/syslog for status:
|
|
||||||
$ sudo cat /var/log/syslog | grep startup-script
|
See startup script log messages in /var/log/syslog for status:
|
||||||
|
$ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
|
||||||
|
|
||||||
To block until setup is complete, run:
|
To block until setup is complete, run:
|
||||||
$ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
|
$ until [[ -f /.instance-startup-complete ]]; do sleep 1; done
|
||||||
|
|
||||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
EOM
|
EOM
|
||||||
@ -296,6 +357,7 @@ $(
|
|||||||
cat \
|
cat \
|
||||||
disable-background-upgrades.sh \
|
disable-background-upgrades.sh \
|
||||||
create-solana-user.sh \
|
create-solana-user.sh \
|
||||||
|
add-solana-user-authorized_keys.sh \
|
||||||
install-earlyoom.sh \
|
install-earlyoom.sh \
|
||||||
install-libssl-compatability.sh \
|
install-libssl-compatability.sh \
|
||||||
install-rsync.sh \
|
install-rsync.sh \
|
||||||
@ -305,21 +367,21 @@ cat > /etc/motd <<EOM
|
|||||||
$(printNetworkInfo)
|
$(printNetworkInfo)
|
||||||
EOM
|
EOM
|
||||||
|
|
||||||
touch /.gce-startup-complete
|
touch /.instance-startup-complete
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
gcloud_CreateInstances "$prefix-leader" 1 "$zone" \
|
cloud_CreateInstances "$prefix" "$prefix-leader" 1 \
|
||||||
"$imageName" "$leaderMachineType" "$leaderBootDiskSize" "$leaderAccelerator" \
|
"$imageName" "$leaderMachineType" "$leaderBootDiskSizeInGb" "$enableGpu" \
|
||||||
"$startupScript" "$leaderAddress"
|
"$startupScript" "$leaderAddress"
|
||||||
|
|
||||||
gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
|
cloud_CreateInstances "$prefix" "$prefix-validator" "$validatorNodeCount" \
|
||||||
"$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
|
"$imageName" "$validatorMachineType" "$validatorBootDiskSizeInGb" false \
|
||||||
"$startupScript" ""
|
"$startupScript" ""
|
||||||
|
|
||||||
if [[ $clientNodeCount -gt 0 ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
|
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
||||||
"$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
|
"$imageName" "$clientMachineType" "$clientBootDiskSizeInGb" false \
|
||||||
"$startupScript" ""
|
"$startupScript" ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
20
net/scripts/add-solana-user-authorized_keys.sh
Executable file
20
net/scripts/add-solana-user-authorized_keys.sh
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash -ex
|
||||||
|
|
||||||
|
[[ $(uname) = Linux ]] || exit 1
|
||||||
|
[[ $USER = root ]] || exit 1
|
||||||
|
|
||||||
|
[[ -d /home/solana/.ssh ]] || exit 1
|
||||||
|
|
||||||
|
# /solana-authorized_keys contains the public keys for users that should
|
||||||
|
# automatically be granted access to ALL testnets.
|
||||||
|
#
|
||||||
|
# To add an entry into this list:
|
||||||
|
# 1. Run: ssh-keygen -t ecdsa -N '' -f ~/.ssh/id-solana-testnet
|
||||||
|
# 2. Inline ~/.ssh/id-solana-testnet.pub below
|
||||||
|
cat > /solana-authorized_keys <<EOF
|
||||||
|
ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFBNwLw0i+rI312gWshojFlNw9NV7WfaKeeUsYADqOvM2o4yrO2pPw+sgW8W+/rPpVyH7zU9WVRgTME8NgFV1Vc=
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo -u solana bash -c "
|
||||||
|
cat /solana-authorized_keys >> /home/solana/.ssh/authorized_keys
|
||||||
|
"
|
242
net/scripts/ec2-provider.sh
Normal file
242
net/scripts/ec2-provider.sh
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
# |source| this file
|
||||||
|
#
|
||||||
|
# Utilities for working with EC2 instances
|
||||||
|
#
|
||||||
|
|
||||||
|
zone=
|
||||||
|
region=
|
||||||
|
|
||||||
|
cloud_SetZone() {
|
||||||
|
zone="$1"
|
||||||
|
# AWS region is zone with the last character removed
|
||||||
|
region="${zone:0:$((${#zone} - 1))}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set the default zone
|
||||||
|
cloud_SetZone "us-east-1b"
|
||||||
|
|
||||||
|
# sshPrivateKey should be globally defined whenever this function is called.
|
||||||
|
#
|
||||||
|
# TODO: Remove usage of the sshPrivateKey global
|
||||||
|
__cloud_SshPrivateKeyCheck() {
|
||||||
|
# shellcheck disable=SC2154
|
||||||
|
if [[ -z $sshPrivateKey ]]; then
|
||||||
|
echo Error: sshPrivateKey not defined
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ! -r $sshPrivateKey ]]; then
|
||||||
|
echo "Error: file is not readable: $sshPrivateKey"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# __cloud_FindInstances
|
||||||
|
#
|
||||||
|
# Find instances with name matching the specified pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# filter - The instances to filter on
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ __cloud_FindInstances "exact-machine-name"
|
||||||
|
# $ __cloud_FindInstances "all-machines-with-a-common-machine-prefix*"
|
||||||
|
#
|
||||||
|
__cloud_FindInstances() {
|
||||||
|
declare filter="$1"
|
||||||
|
|
||||||
|
instances=()
|
||||||
|
declare name publicIp privateIp
|
||||||
|
while read -r name publicIp privateIp; do
|
||||||
|
printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp"
|
||||||
|
instances+=("$name:$publicIp:$privateIp")
|
||||||
|
done < <(aws ec2 describe-instances \
|
||||||
|
--region "$region" \
|
||||||
|
--filters \
|
||||||
|
"Name=tag:name,Values=$filter" \
|
||||||
|
"Name=instance-state-name,Values=pending,running" \
|
||||||
|
--query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress]" \
|
||||||
|
--output text
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FindInstances [namePrefix]
|
||||||
|
#
|
||||||
|
# Find instances with names matching the specified prefix
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# namePrefix - The instance name prefix to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstances all-machines-with-a-common-machine-prefix
|
||||||
|
#
|
||||||
|
cloud_FindInstances() {
|
||||||
|
declare namePrefix="$1"
|
||||||
|
__cloud_FindInstances "$namePrefix*"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FindInstance [name]
|
||||||
|
#
|
||||||
|
# Find an instance with a name matching the exact pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# name - The instance name to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstance exact-machine-name
|
||||||
|
#
|
||||||
|
cloud_FindInstance() {
|
||||||
|
declare name="$1"
|
||||||
|
__cloud_FindInstances "$name"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
|
||||||
|
# [machineType] [bootDiskSize] [enableGpu]
|
||||||
|
# [startupScript] [address]
|
||||||
|
#
|
||||||
|
# Creates one more identical instances.
|
||||||
|
#
|
||||||
|
# networkName - unique name of this testnet
|
||||||
|
# namePrefix - unique string to prefix all the instance names with
|
||||||
|
# numNodes - number of instances to create
|
||||||
|
# imageName - Disk image for the instances
|
||||||
|
# machineType - GCE machine type
|
||||||
|
# bootDiskSize - Optional size of the boot disk in GB
|
||||||
|
# enableGpu - Optionally enable GPU, use the value "true" to enable
|
||||||
|
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
|
||||||
|
# startupScript - Optional startup script to execute when the instance boots
|
||||||
|
# address - Optional name of the GCE static IP address to attach to the
|
||||||
|
# instance. Requires that |numNodes| = 1 and that addressName
|
||||||
|
# has been provisioned in the GCE region that is hosting |zone|
|
||||||
|
#
|
||||||
|
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||||
|
# returns
|
||||||
|
cloud_CreateInstances() {
|
||||||
|
declare networkName="$1"
|
||||||
|
declare namePrefix="$2"
|
||||||
|
declare numNodes="$3"
|
||||||
|
declare imageName="$4"
|
||||||
|
declare machineType="$5"
|
||||||
|
declare optionalBootDiskSize="$6"
|
||||||
|
declare optionalGpu="$7"
|
||||||
|
declare optionalStartupScript="$8"
|
||||||
|
declare optionalAddress="$9"
|
||||||
|
|
||||||
|
__cloud_SshPrivateKeyCheck
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
aws ec2 delete-key-pair --region "$region" --key-name "$networkName"
|
||||||
|
aws ec2 import-key-pair --region "$region" --key-name "$networkName" \
|
||||||
|
--public-key-material file://"${sshPrivateKey}".pub
|
||||||
|
)
|
||||||
|
|
||||||
|
declare -a args
|
||||||
|
args=(
|
||||||
|
--key-name "$networkName"
|
||||||
|
--count "$numNodes"
|
||||||
|
--region "$region"
|
||||||
|
--placement "AvailabilityZone=$zone"
|
||||||
|
--security-groups testnet
|
||||||
|
--image-id "$imageName"
|
||||||
|
--instance-type "$machineType"
|
||||||
|
--tag-specifications "ResourceType=instance,Tags=[{Key=name,Value=$namePrefix}]"
|
||||||
|
)
|
||||||
|
if [[ -n $optionalBootDiskSize ]]; then
|
||||||
|
args+=(
|
||||||
|
--block-device-mapping "[{\"DeviceName\": \"/dev/sda1\", \"Ebs\": { \"VolumeSize\": $optionalBootDiskSize }}]"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
if [[ $optionalGpu = true ]]; then
|
||||||
|
echo TODO: GPU support not implemented yet
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ -n $optionalStartupScript ]]; then
|
||||||
|
args+=(
|
||||||
|
--user-data "file://$optionalStartupScript"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $optionalAddress ]]; then
|
||||||
|
[[ $numNodes = 1 ]] || {
|
||||||
|
echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
aws ec2 run-instances "${args[@]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ -n $optionalAddress ]]; then
|
||||||
|
cloud_FindInstance "$namePrefix"
|
||||||
|
if [[ ${#instances[@]} -ne 1 ]]; then
|
||||||
|
echo "Failed to find newly created instance: $namePrefix"
|
||||||
|
fi
|
||||||
|
|
||||||
|
declare instanceId
|
||||||
|
IFS=: read -r instanceId _ < <(echo "${instances[0]}")
|
||||||
|
aws ec2 associate-address \
|
||||||
|
--instance-id "$instanceId" \
|
||||||
|
--region "region" \
|
||||||
|
--allocation-id "$optionalAddress"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_DeleteInstances
|
||||||
|
#
|
||||||
|
# Deletes all the instances listed in the `instances` array
|
||||||
|
#
|
||||||
|
cloud_DeleteInstances() {
|
||||||
|
if [[ ${#instances[0]} -eq 0 ]]; then
|
||||||
|
echo No instances to delete
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
declare names=("${instances[@]/:*/}")
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
aws ec2 terminate-instances --region "$region" --instance-ids "${names[@]}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
|
||||||
|
#
|
||||||
|
# Fetch a file from the given instance. This function uses a cloud-specific
|
||||||
|
# mechanism to fetch the file
|
||||||
|
#
|
||||||
|
cloud_FetchFile() {
|
||||||
|
# shellcheck disable=SC2034 # instanceName is unused
|
||||||
|
declare instanceName="$1"
|
||||||
|
declare publicIp="$2"
|
||||||
|
declare remoteFile="$3"
|
||||||
|
declare localFile="$4"
|
||||||
|
|
||||||
|
__cloud_SshPrivateKeyCheck
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
scp \
|
||||||
|
-o "StrictHostKeyChecking=no" \
|
||||||
|
-o "UserKnownHostsFile=/dev/null" \
|
||||||
|
-o "User=solana" \
|
||||||
|
-o "IdentityFile=$sshPrivateKey" \
|
||||||
|
-o "LogLevel=ERROR" \
|
||||||
|
-F /dev/null \
|
||||||
|
"solana@$publicIp:$remoteFile" "$localFile"
|
||||||
|
)
|
||||||
|
}
|
201
net/scripts/gce-provider.sh
Normal file
201
net/scripts/gce-provider.sh
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
# |source| this file
|
||||||
|
#
|
||||||
|
# Utilities for working with GCE instances
|
||||||
|
#
|
||||||
|
|
||||||
|
# Default zone
|
||||||
|
zone="us-west1-b"
|
||||||
|
cloud_SetZone() {
|
||||||
|
zone="$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# __cloud_FindInstances
|
||||||
|
#
|
||||||
|
# Find instances matching the specified pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:zone:public IP:private IP"
|
||||||
|
#
|
||||||
|
# filter - The instances to filter on
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ __cloud_FindInstances "name=exact-machine-name"
|
||||||
|
# $ __cloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
|
||||||
|
#
|
||||||
|
__cloud_FindInstances() {
|
||||||
|
declare filter="$1"
|
||||||
|
instances=()
|
||||||
|
|
||||||
|
declare name zone publicIp privateIp status
|
||||||
|
while read -r name publicIp privateIp status; do
|
||||||
|
if [[ $status != RUNNING ]]; then
|
||||||
|
echo "Warning: $name is not RUNNING, ignoring it."
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp"
|
||||||
|
|
||||||
|
instances+=("$name:$publicIp:$privateIp")
|
||||||
|
done < <(gcloud compute instances list \
|
||||||
|
--filter="$filter" \
|
||||||
|
--format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
|
||||||
|
}
|
||||||
|
#
|
||||||
|
# cloud_FindInstances [namePrefix]
|
||||||
|
#
|
||||||
|
# Find instances with names matching the specified prefix
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# namePrefix - The instance name prefix to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstances all-machines-with-a-common-machine-prefix
|
||||||
|
#
|
||||||
|
cloud_FindInstances() {
|
||||||
|
declare namePrefix="$1"
|
||||||
|
__cloud_FindInstances "name~^$namePrefix"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FindInstance [name]
|
||||||
|
#
|
||||||
|
# Find an instance with a name matching the exact pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# name - The instance name to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstance exact-machine-name
|
||||||
|
#
|
||||||
|
cloud_FindInstance() {
|
||||||
|
declare name="$1"
|
||||||
|
__cloud_FindInstances "name=$name"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
|
||||||
|
# [machineType] [bootDiskSize] [enableGpu]
|
||||||
|
# [startupScript] [address]
|
||||||
|
#
|
||||||
|
# Creates one more identical instances.
|
||||||
|
#
|
||||||
|
# networkName - unique name of this testnet
|
||||||
|
# namePrefix - unique string to prefix all the instance names with
|
||||||
|
# numNodes - number of instances to create
|
||||||
|
# imageName - Disk image for the instances
|
||||||
|
# machineType - GCE machine type
|
||||||
|
# bootDiskSize - Optional size of the boot disk in GB
|
||||||
|
# enableGpu - Optionally enable GPU, use the value "true" to enable
|
||||||
|
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
|
||||||
|
# startupScript - Optional startup script to execute when the instance boots
|
||||||
|
# address - Optional name of the GCE static IP address to attach to the
|
||||||
|
# instance. Requires that |numNodes| = 1 and that addressName
|
||||||
|
# has been provisioned in the GCE region that is hosting `$zone`
|
||||||
|
#
|
||||||
|
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||||
|
# returns
|
||||||
|
cloud_CreateInstances() {
|
||||||
|
declare networkName="$1"
|
||||||
|
declare namePrefix="$2"
|
||||||
|
declare numNodes="$3"
|
||||||
|
declare imageName="$4"
|
||||||
|
declare machineType="$5"
|
||||||
|
declare optionalBootDiskSize="$6"
|
||||||
|
declare optionalGpu="$7"
|
||||||
|
declare optionalStartupScript="$8"
|
||||||
|
declare optionalAddress="$9"
|
||||||
|
|
||||||
|
declare nodes
|
||||||
|
if [[ $numNodes = 1 ]]; then
|
||||||
|
nodes=("$namePrefix")
|
||||||
|
else
|
||||||
|
read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
|
||||||
|
fi
|
||||||
|
|
||||||
|
declare -a args
|
||||||
|
args=(
|
||||||
|
"--zone=$zone"
|
||||||
|
"--tags=testnet"
|
||||||
|
"--metadata=testnet=$networkName"
|
||||||
|
"--image=$imageName"
|
||||||
|
"--machine-type=$machineType"
|
||||||
|
)
|
||||||
|
if [[ -n $optionalBootDiskSize ]]; then
|
||||||
|
args+=(
|
||||||
|
"--boot-disk-size=${optionalBootDiskSize}GB"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
if [[ $optionalGpu = true ]]; then
|
||||||
|
args+=(
|
||||||
|
"--accelerator=count=4,type=nvidia-tesla-k80"
|
||||||
|
--maintenance-policy TERMINATE
|
||||||
|
--restart-on-failure
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
if [[ -n $optionalStartupScript ]]; then
|
||||||
|
args+=(
|
||||||
|
--metadata-from-file "startup-script=$optionalStartupScript"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n $optionalAddress ]]; then
|
||||||
|
[[ $numNodes = 1 ]] || {
|
||||||
|
echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
args+=(
|
||||||
|
"--address=$optionalAddress"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_DeleteInstances
|
||||||
|
#
|
||||||
|
# Deletes all the instances listed in the `instances` array
|
||||||
|
#
|
||||||
|
cloud_DeleteInstances() {
|
||||||
|
if [[ ${#instances[0]} -eq 0 ]]; then
|
||||||
|
echo No instances to delete
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
declare names=("${instances[@]/:*/}")
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
gcloud beta compute instances delete --zone "$zone" --quiet "${names[@]}"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
|
||||||
|
#
|
||||||
|
# Fetch a file from the given instance. This function uses a cloud-specific
|
||||||
|
# mechanism to fetch the file
|
||||||
|
#
|
||||||
|
cloud_FetchFile() {
|
||||||
|
declare instanceName="$1"
|
||||||
|
# shellcheck disable=SC2034 # publicIp is unused
|
||||||
|
declare publicIp="$2"
|
||||||
|
declare remoteFile="$3"
|
||||||
|
declare localFile="$4"
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
gcloud compute scp --zone "$zone" "$instanceName:$remoteFile" "$localFile"
|
||||||
|
)
|
||||||
|
}
|
@ -1,187 +0,0 @@
|
|||||||
# |source| this file
|
|
||||||
#
|
|
||||||
# Utilities for working with gcloud
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# gcloud_FindInstances [filter] [options]
|
|
||||||
#
|
|
||||||
# Find instances matching the specified pattern.
|
|
||||||
#
|
|
||||||
# For each matching instance, an entry in the `instances` array will be added with the
|
|
||||||
# following information about the instance:
|
|
||||||
# "name:zone:public IP:private IP"
|
|
||||||
#
|
|
||||||
# filter - The instances to filter on
|
|
||||||
# options - If set to the string "show", the list of instances will be echoed
|
|
||||||
# to stdout
|
|
||||||
#
|
|
||||||
# examples:
|
|
||||||
# $ gcloud_FindInstances "name=exact-machine-name"
|
|
||||||
# $ gcloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
|
|
||||||
#
|
|
||||||
gcloud_FindInstances() {
|
|
||||||
declare filter="$1"
|
|
||||||
declare options="$2"
|
|
||||||
instances=()
|
|
||||||
|
|
||||||
declare name zone publicIp privateIp status
|
|
||||||
while read -r name zone publicIp privateIp status; do
|
|
||||||
if [[ $status != RUNNING ]]; then
|
|
||||||
echo "Warning: $name is not RUNNING, ignoring it."
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
if [[ $options = show ]]; then
|
|
||||||
printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp"
|
|
||||||
fi
|
|
||||||
|
|
||||||
instances+=("$name:$zone:$publicIp:$privateIp")
|
|
||||||
done < <(gcloud compute instances list \
|
|
||||||
--filter="$filter" \
|
|
||||||
--format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# gcloud_ForEachInstance [cmd] [extra args to cmd]
|
|
||||||
#
|
|
||||||
# Execute a command for each element in the `instances` array
|
|
||||||
#
|
|
||||||
# cmd - The command to execute on each instance
|
|
||||||
# The command will receive arguments followed by any
|
|
||||||
# additionl arguments supplied to gcloud_ForEachInstance:
|
|
||||||
# name - name of the instance
|
|
||||||
# zone - zone the instance is located in
|
|
||||||
# publicIp - The public IP address of this instance
|
|
||||||
# privateIp - The priate IP address of this instance
|
|
||||||
# count - Monotonically increasing count for each
|
|
||||||
# invocation of cmd, starting at 1
|
|
||||||
# ... - Extra args to cmd..
|
|
||||||
#
|
|
||||||
#
|
|
||||||
gcloud_ForEachInstance() {
|
|
||||||
declare cmd="$1"
|
|
||||||
shift
|
|
||||||
[[ -n $cmd ]] || { echo gcloud_ForEachInstance: cmd not specified; exit 1; }
|
|
||||||
|
|
||||||
declare count=1
|
|
||||||
for info in "${instances[@]}"; do
|
|
||||||
declare name zone publicIp privateIp
|
|
||||||
IFS=: read -r name zone publicIp privateIp < <(echo "$info")
|
|
||||||
|
|
||||||
eval "$cmd" "$name" "$zone" "$publicIp" "$privateIp" "$count" "$@"
|
|
||||||
count=$((count + 1))
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# gcloud_CreateInstances [namePrefix] [numNodes] [zone] [imageName]
|
|
||||||
# [machineType] [bootDiskSize] [accelerator]
|
|
||||||
# [startupScript] [address]
|
|
||||||
#
|
|
||||||
# Creates one more identical instances.
|
|
||||||
#
|
|
||||||
# namePrefix - unique string to prefix all the instance names with
|
|
||||||
# numNodes - number of instances to create
|
|
||||||
# zone - zone to create the instances in
|
|
||||||
# imageName - Disk image for the instances
|
|
||||||
# machineType - GCE machine type
|
|
||||||
# bootDiskSize - Optional disk of the boot disk
|
|
||||||
# accelerator - Optional accelerator to attach to the instance(s), see
|
|
||||||
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
|
|
||||||
# startupScript - Optional startup script to execute when the instance boots
|
|
||||||
# address - Optional name of the GCE static IP address to attach to the
|
|
||||||
# instance. Requires that |numNodes| = 1 and that addressName
|
|
||||||
# has been provisioned in the GCE region that is hosting |zone|
|
|
||||||
#
|
|
||||||
# Tip: use gcloud_FindInstances to locate the instances once this function
|
|
||||||
# returns
|
|
||||||
gcloud_CreateInstances() {
|
|
||||||
declare namePrefix="$1"
|
|
||||||
declare numNodes="$2"
|
|
||||||
declare zone="$3"
|
|
||||||
declare imageName="$4"
|
|
||||||
declare machineType="$5"
|
|
||||||
declare optionalBootDiskSize="$6"
|
|
||||||
declare optionalAccelerator="$7"
|
|
||||||
declare optionalStartupScript="$8"
|
|
||||||
declare optionalAddress="$9"
|
|
||||||
|
|
||||||
declare nodes
|
|
||||||
if [[ $numNodes = 1 ]]; then
|
|
||||||
nodes=("$namePrefix")
|
|
||||||
else
|
|
||||||
read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
|
|
||||||
fi
|
|
||||||
|
|
||||||
declare -a args
|
|
||||||
args=(
|
|
||||||
"--zone=$zone"
|
|
||||||
"--tags=testnet"
|
|
||||||
"--image=$imageName"
|
|
||||||
"--machine-type=$machineType"
|
|
||||||
)
|
|
||||||
if [[ -n $optionalBootDiskSize ]]; then
|
|
||||||
args+=(
|
|
||||||
"--boot-disk-size=$optionalBootDiskSize"
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
if [[ -n $optionalAccelerator ]]; then
|
|
||||||
args+=(
|
|
||||||
"--accelerator=$optionalAccelerator"
|
|
||||||
--maintenance-policy TERMINATE
|
|
||||||
--restart-on-failure
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
if [[ -n $optionalStartupScript ]]; then
|
|
||||||
args+=(
|
|
||||||
--metadata-from-file "startup-script=$optionalStartupScript"
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n $optionalAddress ]]; then
|
|
||||||
[[ $numNodes = 1 ]] || {
|
|
||||||
echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
args+=(
|
|
||||||
"--address=$optionalAddress"
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
(
|
|
||||||
set -x
|
|
||||||
gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# gcloud_DeleteInstances [yes]
|
|
||||||
#
|
|
||||||
# Deletes all the instances listed in the `instances` array
|
|
||||||
#
|
|
||||||
# If yes = "true", skip the delete confirmation
|
|
||||||
#
|
|
||||||
gcloud_DeleteInstances() {
|
|
||||||
declare maybeQuiet=
|
|
||||||
if [[ $1 = true ]]; then
|
|
||||||
maybeQuiet=--quiet
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${#instances[0]} -eq 0 ]]; then
|
|
||||||
echo No instances to delete
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
declare names=("${instances[@]/:*/}")
|
|
||||||
|
|
||||||
# Assume all instances are in the same zone
|
|
||||||
# TODO: One day this assumption will be invalid
|
|
||||||
declare zone
|
|
||||||
IFS=: read -r _ zone _ < <(echo "${instances[0]}")
|
|
||||||
|
|
||||||
(
|
|
||||||
set -x
|
|
||||||
gcloud beta compute instances delete --zone "$zone" $maybeQuiet "${names[@]}"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
Reference in New Issue
Block a user