Switch to instances with AVX-512 if possible for better interop with dev machines (#4328)

automerge
This commit is contained in:
Michael Vines 2019-05-17 20:06:07 -07:00 committed by Grimes
parent 431cc82032
commit 458ae3fdac
3 changed files with 17 additions and 9 deletions

View File

@ -49,7 +49,7 @@ Deploys a CD testnet
-c [number] - Number of client bencher nodes (default: $clientNodeCount)
-u - Include a Blockstreamer (default: $blockstreamer)
-P - Use public network IP addresses (default: $publicNetwork)
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80)
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100)
-g - Enable GPU (default: $enableGpu)
-a [address] - Set the bootstrap fullnode's external IP address to this GCE address
-d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.

View File

@ -12,8 +12,8 @@ gce)
# shellcheck source=net/scripts/gce-provider.sh
source "$here"/scripts/gce-provider.sh
cpuBootstrapLeaderMachineType="--machine-type n1-standard-16"
gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80"
cpuBootstrapLeaderMachineType="--machine-type n1-standard-16 --min-cpu-platform Intel%20Skylake"
gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100"
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
fullNodeMachineType=$cpuBootstrapLeaderMachineType
clientMachineType="--custom-cpu 16 --custom-memory 20GB"
@ -23,12 +23,16 @@ ec2)
# shellcheck source=net/scripts/ec2-provider.sh
source "$here"/scripts/ec2-provider.sh
cpuBootstrapLeaderMachineType=m4.2xlarge
cpuBootstrapLeaderMachineType=c5.2xlarge
# NOTE: At this time only the p3dn.24xlarge EC2 instance type has GPU and
# AVX-512 support. The default, p2.xlarge, does not support
# AVX-512
gpuBootstrapLeaderMachineType=p2.xlarge
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
fullNodeMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=m4.2xlarge
blockstreamerMachineType=m4.2xlarge
clientMachineType=c5.2xlarge
blockstreamerMachineType=c5.2xlarge
;;
azure)
# shellcheck source=net/scripts/azure-provider.sh
@ -338,7 +342,7 @@ EOF
# machine can be pinged...
(
set -o pipefail
for i in $(seq 1 30); do
for i in $(seq 1 60); do
set -x
cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone" &&
cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" &&
@ -379,7 +383,7 @@ EOF
(
set +e
fetchPrivateKey || exit 1
for i in $(seq 1 30); do
for i in $(seq 1 60); do
(
set -x
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"

View File

@ -163,7 +163,11 @@ cloud_CreateInstances() {
args+=(--image $imageName)
# shellcheck disable=SC2206 # Do not want to quote $machineType as it may contain extra args
args+=($machineType)
for word in $machineType; do
# Special handling for the "--min-cpu-platform" argument which may contain a
# space (escaped as '%20')...
args+=("${word//%20/ }")
done
if [[ -n $optionalBootDiskSize ]]; then
args+=(
--boot-disk-size "${optionalBootDiskSize}GB"