diff --git a/ci/testnet-deploy.sh b/ci/testnet-deploy.sh index 60de0d72f5..5b8caac7f7 100755 --- a/ci/testnet-deploy.sh +++ b/ci/testnet-deploy.sh @@ -49,7 +49,7 @@ Deploys a CD testnet -c [number] - Number of client bencher nodes (default: $clientNodeCount) -u - Include a Blockstreamer (default: $blockstreamer) -P - Use public network IP addresses (default: $publicNetwork) - -G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80) + -G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100) -g - Enable GPU (default: $enableGpu) -a [address] - Set the bootstrap fullnode's external IP address to this GCE address -d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE. diff --git a/net/gce.sh b/net/gce.sh index 01fe137d13..e819067d63 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -12,8 +12,8 @@ gce) # shellcheck source=net/scripts/gce-provider.sh source "$here"/scripts/gce-provider.sh - cpuBootstrapLeaderMachineType="--machine-type n1-standard-16" - gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80" + cpuBootstrapLeaderMachineType="--machine-type n1-standard-16 --min-cpu-platform Intel%20Skylake" + gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100" bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=$cpuBootstrapLeaderMachineType clientMachineType="--custom-cpu 16 --custom-memory 20GB" @@ -23,12 +23,16 @@ ec2) # shellcheck source=net/scripts/ec2-provider.sh source "$here"/scripts/ec2-provider.sh - cpuBootstrapLeaderMachineType=m4.2xlarge + cpuBootstrapLeaderMachineType=c5.2xlarge + + # NOTE: At this time only the p3dn.24xlarge EC2 instance type has GPU and + # AVX-512 support. The default, p2.xlarge, does not support + # AVX-512 gpuBootstrapLeaderMachineType=p2.xlarge bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=$cpuBootstrapLeaderMachineType - clientMachineType=m4.2xlarge - blockstreamerMachineType=m4.2xlarge + clientMachineType=c5.2xlarge + blockstreamerMachineType=c5.2xlarge ;; azure) # shellcheck source=net/scripts/azure-provider.sh @@ -338,7 +342,7 @@ EOF # machine can be pinged... ( set -o pipefail - for i in $(seq 1 30); do + for i in $(seq 1 60); do set -x cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone" && cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" && @@ -379,7 +383,7 @@ EOF ( set +e fetchPrivateKey || exit 1 - for i in $(seq 1 30); do + for i in $(seq 1 60); do ( set -x timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" diff --git a/net/scripts/gce-provider.sh b/net/scripts/gce-provider.sh index 69714bcf89..395194ddaa 100755 --- a/net/scripts/gce-provider.sh +++ b/net/scripts/gce-provider.sh @@ -163,7 +163,11 @@ cloud_CreateInstances() { args+=(--image $imageName) # shellcheck disable=SC2206 # Do not want to quote $machineType as it may contain extra args - args+=($machineType) + for word in $machineType; do + # Special handling for the "--min-cpu-platform" argument which may contain a + # space (escaped as '%20')... + args+=("${word//%20/ }") + done if [[ -n $optionalBootDiskSize ]]; then args+=( --boot-disk-size "${optionalBootDiskSize}GB"