From a1a70060e9264240a998dcd534bb160e363bd3ce Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Sat, 16 Jan 2021 01:35:44 +0000 Subject: [PATCH] Support account on tmpfs via net/ scripts (bp #14459) (#14620) * multinode-demo: Pass --accounts through bootstrap leader wrapper (cherry picked from commit 327be55accc0ebb39613a56244e78adc1a9add3c) * gce.sh: Factor out default custom memory (cherry picked from commit ddf1d2dbf5660238b46584d3fadadf212448e7c6) * net/: Support accounts on swap-backed tmpfs (cherry picked from commit ff599ace4d201701e570f847faec5516d737e043) * net/gce.sh: Add cusom RAM arg instead of doubling default with tmpfs (cherry picked from commit 3175cf1debfc508dd45460cc0e7503211a69da62) Co-authored-by: Trent Nelson --- multinode-demo/bootstrap-validator.sh | 3 ++ net/gce.sh | 75 ++++++++++++++++++++++----- net/net.sh | 2 + net/remote/remote-node.sh | 9 ++++ net/scripts/azure-provider.sh | 4 ++ net/scripts/colo-provider.sh | 4 ++ net/scripts/ec2-provider.sh | 4 ++ net/scripts/gce-provider.sh | 4 ++ 8 files changed, 93 insertions(+), 12 deletions(-) diff --git a/multinode-demo/bootstrap-validator.sh b/multinode-demo/bootstrap-validator.sh index 87ce93eae7..d938211890 100755 --- a/multinode-demo/bootstrap-validator.sh +++ b/multinode-demo/bootstrap-validator.sh @@ -66,6 +66,9 @@ while [[ -n $1 ]]; do elif [[ $1 == --expected-bank-hash ]]; then args+=("$1" "$2") shift 2 + elif [[ $1 == --accounts ]]; then + args+=("$1" "$2") + shift 2 else echo "Unknown argument: $1" $program --help diff --git a/net/gce.sh b/net/gce.sh index 5e941fda95..f37b0eb549 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -12,7 +12,7 @@ gce) # shellcheck source=net/scripts/gce-provider.sh source "$here"/scripts/gce-provider.sh - cpuBootstrapLeaderMachineType="--custom-cpu 24 --custom-memory 64GB --min-cpu-platform Intel%20Skylake" + cpuBootstrapLeaderMachineType="--custom-cpu 24 --min-cpu-platform Intel%20Skylake" gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100" clientMachineType="--custom-cpu 16 --custom-memory 20GB" blockstreamerMachineType="--machine-type n1-standard-8" @@ -68,6 +68,9 @@ externalNodes=false failOnValidatorBootupFailure=true preemptible=true evalInfo=false +tmpfsAccounts=false +defaultCustomMemoryGB="$(cloud_DefaultCustomMemoryGB)" +customMemoryGB="$defaultCustomMemoryGB" publicNetwork=false letsEncryptDomainName= @@ -137,6 +140,12 @@ Manage testnet instances --custom-machine-type - Set a custom machine type without assuming whether or not GPU is enabled. Set this explicitly with --enable-gpu/-g to call out the presence of GPUs. +$( + if [[ -n "$defaultCustomMemoryGB" ]]; then + echo " --custom-memory-gb" + echo " - Set memory size for custom machine type in GB (default: $defaultCustomMemoryGB)" + fi +) --enable-gpu - Use with --custom-machine-type to specify whether or not GPUs should be used/enabled --validator-additional-disk-size-gb [number] - Add an additional [number] GB SSD to all validators to store the config directory. @@ -150,6 +159,7 @@ Manage testnet instances --self-destruct-hours [number] - Specify lifetime of the allocated instances in hours. 0 to disable. Only supported on GCE. (default: $selfDestructHours) + --tmpfs-accounts - Put accounts directory on a swap-backed tmpfs volume config-specific options: -P - Use public network IP addresses (default: $publicNetwork) @@ -218,6 +228,12 @@ while [[ -n $1 ]]; do elif [[ $1 == --reclaim-all-reservations ]]; then reclaimAllReservations=true shift + elif [[ $1 == --tmpfs-accounts ]]; then + tmpfsAccounts=true + shift + elif [[ $1 == --custom-memory-gb ]]; then + customMemoryGB=$2 + shift 2 else usage "Unknown long option: $1" fi @@ -273,16 +289,6 @@ while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do esac done -if [[ -n "$customMachineType" ]] ; then - bootstrapLeaderMachineType="$customMachineType" -elif [[ "$enableGpu" = "true" ]] ; then - bootstrapLeaderMachineType="$gpuBootstrapLeaderMachineType" -else - bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType" -fi -validatorMachineType=$bootstrapLeaderMachineType -blockstreamerMachineType=$bootstrapLeaderMachineType - [[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)") [[ -z $1 ]] || usage "Unexpected argument: $1" @@ -297,10 +303,26 @@ fi case $cloudProvider in gce) + if [[ "$tmpfsAccounts" = "true" ]]; then + cpuBootstrapLeaderMachineType+=" --local-ssd interface=nvme" + gpuBootstrapLeaderMachineType+=" --local-ssd interface=nvme" + if [[ $customMemoryGB -lt 100 ]]; then + # shellcheck disable=SC2016 # We don't want expression expansion on these backticks + echo -e '\nWarning: At least 100GB of system RAM is recommending with `--tmpfs-accounts` (see `--custom-memory-gb`)\n' + fi + fi + cpuBootstrapLeaderMachineType+=" --custom-memory ${customMemoryGB}GB" + gpuBootstrapLeaderMachineType+=" --custom-memory ${customMemoryGB}GB" ;; ec2|azure|colo) if [[ -n $validatorAdditionalDiskSizeInGb ]] ; then - usage "Error: --validator-additional-disk-size-gb currently only supported with cloud provider: gce" + usage "--validator-additional-disk-size-gb currently only supported with cloud provider: gce" + fi + if [[ "$tmpfsAccounts" = "true" ]]; then + usage "--tmpfs-accounts only supported on cloud provider: gce" + fi + if [[ "$customMemoryGB" != "$defaultCustomMemoryGB" ]]; then + usage "--custom-memory-gb only supported on cloud provider: gce" fi ;; *) @@ -328,6 +350,16 @@ if [[ -n $reclaimAllReservations || -n $reclaimOnlyPreemptibleReservations ]]; t forceDelete="true" fi +if [[ -n "$customMachineType" ]] ; then + bootstrapLeaderMachineType="$customMachineType" +elif [[ "$enableGpu" = "true" ]] ; then + bootstrapLeaderMachineType="$gpuBootstrapLeaderMachineType" +else + bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType" +fi +validatorMachineType=$bootstrapLeaderMachineType +blockstreamerMachineType=$bootstrapLeaderMachineType + # cloud_ForEachInstance [cmd] [extra args to cmd] # # Execute a command for each element in the `instances` array @@ -432,6 +464,7 @@ netBasename=$prefix publicNetwork=$publicNetwork sshPrivateKey=$sshPrivateKey letsEncryptDomainName=$letsEncryptDomainName +export TMPFS_ACCOUNTS=$tmpfsAccounts EOF fi touch "$geoipConfigFile" @@ -820,6 +853,24 @@ $(printNetworkInfo) $(creationInfo) EOM +$( + if [[ "$tmpfsAccounts" = "true" ]]; then + cat <<'EOSWAP' + +# Setup swap/tmpfs for accounts +tmpfsMountPoint=/mnt/solana-accounts +swapDevice="/dev/nvme0n1" +swapUUID="43076c54-7840-4e59-a368-2d164f8984fb" +mkswap --uuid "$swapUUID" "$swapDevice" +echo "UUID=$swapUUID swap swap defaults 0 0" >> /etc/fstab +swapon "UUID=$swapUUID" +mkdir -p -m 0777 "$tmpfsMountPoint" +echo "tmpfs $tmpfsMountPoint tmpfs defaults,size=300G 0 0" >> /etc/fstab +mount "$tmpfsMountPoint" +EOSWAP + fi +) + touch /solana-scratch/.instance-startup-complete EOF diff --git a/net/net.sh b/net/net.sh index 1140925b0f..7211947bbf 100755 --- a/net/net.sh +++ b/net/net.sh @@ -289,6 +289,7 @@ startBootstrapLeader() { \"$maybeWarpSlot\" \ \"$waitForNodeInit\" \ \"$extraPrimordialStakes\" \ + \"$TMPFS_ACCOUNTS\" \ " ) >> "$logFile" 2>&1 || { @@ -360,6 +361,7 @@ startNode() { \"$maybeWarpSlot\" \ \"$waitForNodeInit\" \ \"$extraPrimordialStakes\" \ + \"$TMPFS_ACCOUNTS\" \ " ) >> "$logFile" 2>&1 & declare pid=$! diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index e2cc5594cb..f6f8c1dbe3 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -28,6 +28,7 @@ gpuMode="${19:-auto}" maybeWarpSlot="${20}" waitForNodeInit="${21}" extraPrimordialStakes="${22:=0}" +tmpfsAccounts="${23:false}" set +x missing() { @@ -274,6 +275,10 @@ EOF --init-complete-file "$initCompleteFile" ) + if [[ "$tmpfsAccounts" = "true" ]]; then + args+=(--accounts /mnt/solana-accounts) + fi + if [[ $airdropsEnabled = true ]]; then cat >> ~/solana/on-reboot < faucet.log 2>&1 & @@ -391,6 +396,10 @@ EOF maybeSkipAccountsCreation="export SKIP_ACCOUNTS_CREATION=1" fi + if [[ "$tmpfsAccounts" = "true" ]]; then + args+=(--accounts /mnt/solana-accounts) + fi + cat >> ~/solana/on-reboot < validator.log.\$now 2>&1 & diff --git a/net/scripts/azure-provider.sh b/net/scripts/azure-provider.sh index b6597c120e..e4d2c2bad2 100755 --- a/net/scripts/azure-provider.sh +++ b/net/scripts/azure-provider.sh @@ -8,6 +8,10 @@ cloud_DefaultZone() { echo "westus" } +cloud_DefaultCustomMemoryGB() { + : # Not implemented +} + cloud_RestartPreemptedInstances() { : # Not implemented } diff --git a/net/scripts/colo-provider.sh b/net/scripts/colo-provider.sh index b16a6754fd..c0a58821c8 100755 --- a/net/scripts/colo-provider.sh +++ b/net/scripts/colo-provider.sh @@ -17,6 +17,10 @@ cloud_DefaultZone() { echo "Denver" } +cloud_DefaultCustomMemoryGB() { + : # Not implemented +} + cloud_RestartPreemptedInstances() { : # Not implemented } diff --git a/net/scripts/ec2-provider.sh b/net/scripts/ec2-provider.sh index f7acf33499..365b8119b9 100755 --- a/net/scripts/ec2-provider.sh +++ b/net/scripts/ec2-provider.sh @@ -7,6 +7,10 @@ cloud_DefaultZone() { echo "us-east-1b" } +cloud_DefaultCustomMemoryGB() { + : # Not implemented +} + cloud_RestartPreemptedInstances() { : # Not implemented } diff --git a/net/scripts/gce-provider.sh b/net/scripts/gce-provider.sh index 9f76cf020b..dbedcea864 100755 --- a/net/scripts/gce-provider.sh +++ b/net/scripts/gce-provider.sh @@ -8,6 +8,10 @@ cloud_DefaultZone() { echo "us-west1-b" } +cloud_DefaultCustomMemoryGB() { + echo 64 +} + # # cloud_RestartPreemptedInstances [namePrefix] #