Use a common solana user on all testnet instances
This commit is contained in:
		| @@ -7,8 +7,12 @@ | ||||
| # shellcheck disable=2034 | ||||
| # | ||||
|  | ||||
| netConfigDir="$(dirname "${BASH_SOURCE[0]}")"/config | ||||
| netLogDir="$(dirname "${BASH_SOURCE[0]}")"/log | ||||
| netDir=$( | ||||
|   cd "$(dirname "${BASH_SOURCE[0]}")" || exit | ||||
|   echo "$PWD" | ||||
| ) | ||||
| netConfigDir="$netDir"/config | ||||
| netLogDir="$netDir"/log | ||||
| mkdir -p "$netConfigDir" "$netLogDir" | ||||
|  | ||||
| # shellcheck source=scripts/configure-metrics.sh | ||||
| @@ -21,7 +25,6 @@ publicNetwork= | ||||
| leaderIp= | ||||
| netBasename= | ||||
| sshPrivateKey= | ||||
| sshUsername= | ||||
| clientIpList=() | ||||
| sshOptions=() | ||||
| validatorIpList=() | ||||
| @@ -31,9 +34,10 @@ buildSshOptions() { | ||||
|     -o "BatchMode=yes" | ||||
|     -o "StrictHostKeyChecking=no" | ||||
|     -o "UserKnownHostsFile=/dev/null" | ||||
|     -o "User=$sshUsername" | ||||
|     -o "User=solana" | ||||
|     -o "IdentityFile=$sshPrivateKey" | ||||
|     -o "LogLevel=ERROR" | ||||
|     -F /dev/null | ||||
|   ) | ||||
| } | ||||
|  | ||||
| @@ -47,7 +51,6 @@ loadConfigFile() { | ||||
|   [[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile" | ||||
|   [[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile" | ||||
|   [[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile" | ||||
|   [[ -n $sshUsername ]] || usage "Config file invalid, sshUsername unspecified: $configFile" | ||||
|   [[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile" | ||||
|  | ||||
|   buildSshOptions | ||||
|   | ||||
							
								
								
									
										88
									
								
								net/gce.sh
									
									
									
									
									
								
							
							
						
						
									
										88
									
								
								net/gce.sh
									
									
									
									
									
								
							| @@ -106,6 +106,7 @@ done | ||||
| shift $((OPTIND - 1)) | ||||
|  | ||||
| [[ -z $1 ]] || usage "Unexpected argument: $1" | ||||
| sshPrivateKey="$netConfigDir/id_$prefix" | ||||
|  | ||||
| prepareInstancesAndWriteConfigFile() { | ||||
|   $metricsWriteDatapoint "testnet-deploy net-config-begin=1" | ||||
| @@ -114,15 +115,10 @@ prepareInstancesAndWriteConfigFile() { | ||||
| # autogenerated at $(date) | ||||
| netBasename=$prefix | ||||
| publicNetwork=$publicNetwork | ||||
| sshPrivateKey=$sshPrivateKey | ||||
| EOF | ||||
|  | ||||
|   declare sshPrivateKey="$netConfigDir/id_$prefix" | ||||
|   rm -rf "$sshPrivateKey"{,.pub} | ||||
|   ( | ||||
|     set -x | ||||
|     ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey" | ||||
|   ) | ||||
|   echo "sshPrivateKey=$sshPrivateKey" >> "$configFile" | ||||
|   buildSshOptions | ||||
|  | ||||
|   recordInstanceIp() { | ||||
|     declare name="$1" | ||||
| @@ -141,38 +137,79 @@ EOF | ||||
|     fi | ||||
|   } | ||||
|  | ||||
|   waitForStartupComplete() { | ||||
|     declare name="$1" | ||||
|     declare publicIp="$3" | ||||
|  | ||||
|     echo "Waiting for $name to finish booting..." | ||||
|     ( | ||||
|       for i in $(seq 1 30); do | ||||
|         if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then | ||||
|           break | ||||
|         fi | ||||
|         sleep 2 | ||||
|         echo "Retry $i..." | ||||
|       done | ||||
|     ) | ||||
|   } | ||||
|  | ||||
|   echo "Looking for leader instance..." | ||||
|   gcloud_FindInstances "name=$prefix-leader" show | ||||
|   [[ ${#instances[@]} -eq 1 ]] || { | ||||
|     echo "Unable to start leader" | ||||
|     echo "Unable to find leader" | ||||
|     exit 1 | ||||
|   } | ||||
|   gcloud_FigureRemoteUsername "${instances[0]}" | ||||
|   sshUsername=$gcloud_username | ||||
|   echo "sshUsername=$sshUsername" >> "$configFile" | ||||
|   buildSshOptions | ||||
|  | ||||
|   gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey" | ||||
|   echo "Fetching $sshPrivateKey from $leaderName" | ||||
|   ( | ||||
|     rm -rf "$sshPrivateKey"{,pub} | ||||
|  | ||||
|     declare leaderName | ||||
|     declare leaderZone | ||||
|     declare leaderIp | ||||
|     IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}") | ||||
|  | ||||
|     set -x | ||||
|  | ||||
|     # Try to ping the machine first.  There can be a delay between when the | ||||
|     # instance is reported as RUNNING and when it's reachable over the network | ||||
|     timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done" | ||||
|  | ||||
|     # Try to scp in a couple times, sshd may not yet be up even though the | ||||
|     # machine can be pinged... | ||||
|     set -o pipefail | ||||
|     for i in $(seq 1 10); do | ||||
|       if gcloud compute scp --zone "$leaderZone" \ | ||||
|           "$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then | ||||
|         break | ||||
|       fi | ||||
|       sleep 1 | ||||
|       echo "Retry $i..." | ||||
|     done | ||||
|  | ||||
|     chmod 400 "$sshPrivateKey" | ||||
|   ) | ||||
|  | ||||
|   echo "leaderIp=()" >> "$configFile" | ||||
|   gcloud_ForEachInstance recordInstanceIp leaderIp | ||||
|   gcloud_ForEachInstance waitForStartupComplete | ||||
|  | ||||
|   echo "Looking for validator instances..." | ||||
|   gcloud_FindInstances "name~^$prefix-validator" show | ||||
|   [[ ${#instances[@]} -gt 0 ]] || { | ||||
|     echo "Unable to start validators" | ||||
|     echo "Unable to find validators" | ||||
|     exit 1 | ||||
|   } | ||||
|   echo "validatorIpList=()" >> "$configFile" | ||||
|   gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey" | ||||
|   gcloud_ForEachInstance recordInstanceIp validatorIpList | ||||
|   gcloud_ForEachInstance waitForStartupComplete | ||||
|  | ||||
|   echo "clientIpList=()" >> "$configFile" | ||||
|   echo "Looking for client instances..." | ||||
|   gcloud_FindInstances "name~^$prefix-client" show | ||||
|   [[ ${#instances[@]} -eq 0 ]] || { | ||||
|     gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey" | ||||
|     gcloud_ForEachInstance recordInstanceIp clientIpList | ||||
|     gcloud_ForEachInstance waitForStartupComplete | ||||
|   } | ||||
|  | ||||
|   echo "Wrote $configFile" | ||||
| @@ -206,6 +243,9 @@ create) | ||||
|  | ||||
|   $metricsWriteDatapoint "testnet-deploy net-create-begin=1" | ||||
|  | ||||
|   rm -rf "$sshPrivateKey"{,.pub} | ||||
|   ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey" | ||||
|  | ||||
|   printNetworkInfo() { | ||||
|     cat <<EOF | ||||
| ======================================================================================== | ||||
| @@ -233,15 +273,29 @@ cat > /etc/motd <<EOM | ||||
|   See "startup-script" log messages in /var/log/syslog for status: | ||||
|     $ sudo cat /var/log/syslog | grep startup-script | ||||
|  | ||||
|   To block until setup is complete, run: | ||||
|     $ until [[ -f /.gce-startup-complete ]]; do sleep 1; done | ||||
|  | ||||
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||||
| EOM | ||||
|  | ||||
| # Place the generated private key at /solana-id_ecdsa so it's retrievable by anybody | ||||
| # who is able to log into this machine | ||||
| cat > /solana-id_ecdsa <<EOK | ||||
| $(cat "$sshPrivateKey") | ||||
| EOK | ||||
| cat > /solana-id_ecdsa.pub <<EOK | ||||
| $(cat "$sshPrivateKey.pub") | ||||
| EOK | ||||
| chmod 444 /solana-id_ecdsa | ||||
|  | ||||
| USER=\$(id -un) | ||||
|  | ||||
| $( | ||||
|   cd "$here"/scripts/ | ||||
|   cat \ | ||||
|     disable-background-upgrades.sh \ | ||||
|     create-solana-user.sh \ | ||||
|     install-earlyoom.sh \ | ||||
|     install-rsync.sh \ | ||||
|     install-libssl-compatability.sh \ | ||||
| @@ -251,6 +305,8 @@ cat > /etc/motd <<EOM | ||||
| $(printNetworkInfo) | ||||
| EOM | ||||
|  | ||||
| touch /.gce-startup-complete | ||||
|  | ||||
| EOF | ||||
|  | ||||
|   gcloud_CreateInstances "$prefix-leader" 1 "$zone" \ | ||||
|   | ||||
							
								
								
									
										16
									
								
								net/net.sh
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								net/net.sh
									
									
									
									
									
								
							| @@ -116,16 +116,7 @@ build() { | ||||
| startCommon() { | ||||
|   declare ipAddress=$1 | ||||
|   test -d "$SOLANA_ROOT" | ||||
|   ssh "${sshOptions[@]}" "$ipAddress" " | ||||
|     mkdir -p ~/solana ~/.cargo/bin | ||||
|  | ||||
|     # Help other users of the machine locate network logs | ||||
|     [[ -d /tmp/solana/ ]] || { | ||||
|       mkdir /tmp/solana/ | ||||
|       chmod go+w /tmp/solana/ | ||||
|     } | ||||
|     ln -sfT ~/solana /tmp/solana/= | ||||
|   " | ||||
|   ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin" | ||||
|   rsync -vPrc -e "ssh ${sshOptions[*]}" \ | ||||
|     "$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \ | ||||
|     "$ipAddress":~/solana/ | ||||
| @@ -231,7 +222,10 @@ start() { | ||||
|           " | ||||
|         ) | ||||
|       else | ||||
|         snap download --channel="$snapChannel" solana | ||||
|         ( | ||||
|           cd "$SOLANA_ROOT" | ||||
|           snap download --channel="$snapChannel" solana | ||||
|         ) | ||||
|       fi | ||||
|       snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)" | ||||
|       [[ -r $snapFilename ]] || { | ||||
|   | ||||
| @@ -2,6 +2,8 @@ | ||||
|  | ||||
| cd "$(dirname "$0")"/../.. | ||||
|  | ||||
| echo "$(date) | $0 $*" > client.log | ||||
|  | ||||
| deployMethod="$1" | ||||
| entrypointIp="$2" | ||||
| numNodes="$3" | ||||
|   | ||||
							
								
								
									
										27
									
								
								net/scripts/create-solana-user.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										27
									
								
								net/scripts/create-solana-user.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| #!/bin/bash -ex | ||||
|  | ||||
| [[ $(uname) = Linux ]] || exit 1 | ||||
| [[ $USER = root ]] || exit 1 | ||||
|  | ||||
| adduser solana --gecos "" --disabled-password --quiet | ||||
| adduser solana sudo | ||||
| echo "solana ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers | ||||
| id solana | ||||
|  | ||||
| [[ -r /solana-id_ecdsa ]] || exit 1 | ||||
| [[ -r /solana-id_ecdsa.pub ]] || exit 1 | ||||
|  | ||||
| sudo -u solana bash -c " | ||||
|   mkdir -p /home/solana/.ssh/ | ||||
|   cd /home/solana/.ssh/ | ||||
|   cp /solana-id_ecdsa.pub authorized_keys | ||||
|   umask 377 | ||||
|   cp /solana-id_ecdsa id_ecdsa | ||||
|   echo \" | ||||
|     Host * | ||||
|     BatchMode yes | ||||
|     IdentityFile ~/.ssh/id_ecdsa | ||||
|     StrictHostKeyChecking no | ||||
|   \" > config | ||||
| " | ||||
|  | ||||
| @@ -1,5 +1,5 @@ | ||||
| #!/bin/bash -ex | ||||
|  | ||||
| # | ||||
| # Prevent background upgrades that block |apt-get| | ||||
| # | ||||
| # TODO: This approach is pretty uncompromising.  An alternative solution that | ||||
| @@ -18,4 +18,3 @@ while fuser /var/lib/dpkg/lock; do | ||||
|   sleep 1 | ||||
| done | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -185,149 +185,3 @@ gcloud_DeleteInstances() { | ||||
|   ) | ||||
| } | ||||
|  | ||||
| # | ||||
| # gcloud_FigureRemoteUsername [instanceInfo] | ||||
| # | ||||
| # The remote username when ssh-ing into GCP instances tends to not be the same | ||||
| # as the user's local username, but it needs to be discovered by ssh-ing into an | ||||
| # instance and examining the system. | ||||
| # | ||||
| # On success the gcloud_username global variable is updated | ||||
| # | ||||
| # instanceInfo  - an entry from the `instances` array | ||||
| # | ||||
| # example: | ||||
| #   gcloud_FigureRemoteUsername "name:zone:..." | ||||
| # | ||||
| gcloud_FigureRemoteUsername() { | ||||
|   if [[ -n $gcloud_username ]]; then | ||||
|     return | ||||
|   fi | ||||
|  | ||||
|   declare instanceInfo="$1" | ||||
|   declare name zone publicIp | ||||
|   IFS=: read -r name zone publicIp _ < <(echo "$instanceInfo") | ||||
|  | ||||
|   echo "Detecting remote username using $zone in $zone:" | ||||
|  | ||||
|   # Figure the gcp ssh username | ||||
|   ( | ||||
|     set -x | ||||
|  | ||||
|     # Try to ping the machine first.  There can be a delay between when the | ||||
|     # instance is reported as RUNNING and when it's reachable over the network | ||||
|     timeout 30s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done" | ||||
|  | ||||
|     # Try to ssh in a couple times, sshd may not yet be up even though the | ||||
|     # machine can be pinged... | ||||
|     set -o pipefail | ||||
|     for i in $(seq 1 10); do | ||||
|       if gcloud compute ssh "$name" \ | ||||
|           --zone "$zone" -- "echo whoami:\$USER:iamwho" \ | ||||
|           | tr -d $'\r '| tee /tmp/whoami-$$; then | ||||
|         break | ||||
|       fi | ||||
|       sleep 1 | ||||
|       echo "Retry $i..." | ||||
|     done | ||||
|   ) | ||||
|   while IFS=: read -r whoami gcloud_username iamwho ; do | ||||
|     [[ $whoami == "whoami" && $iamwho == "iamwho" ]] && break; | ||||
|   done < /tmp/whoami-$$ | ||||
|   rm -f /tmp/whoami-$$ | ||||
|  | ||||
|   if [[ -z $gcloud_username ]]; then | ||||
|       echo Unable to figure remote user name | ||||
|       exit 1 | ||||
|   fi | ||||
|  | ||||
|   echo "Remote username: $gcloud_username" | ||||
| } | ||||
|  | ||||
| # | ||||
| # gcloud_PrepInstancesForSsh [username] [privateKey] | ||||
| # | ||||
| # Prepares all the instances in the `instances` array for ssh with the specified | ||||
| # keypair.  This eliminates the need to use the restrictive |gcloud compute ssh|, | ||||
| # use plain |ssh| instead. | ||||
| # | ||||
| # username    - gcp ssh username as computed by gcloud_FigureRemoteUsername | ||||
| # privateKey  - private key to install on all the instances | ||||
| # | ||||
| gcloud_PrepInstancesForSsh() { | ||||
|   declare username="$1" | ||||
|   declare privateKey="$2" | ||||
|   declare publicKey="$privateKey".pub | ||||
|   declare logDir=log/ | ||||
|  | ||||
|   mkdir -p $logDir | ||||
|   rm -rf $logDir/gcloud_PrepInstancesForSsh-* | ||||
|  | ||||
|   [[ -r $publicKey ]] || { | ||||
|     echo "Unable to read public key: $publicKey" | ||||
|     exit 1 | ||||
|   } | ||||
|  | ||||
|   [[ -r $privateKey ]] || { | ||||
|     echo "Unable to read private key: $privateKey" | ||||
|     exit 1 | ||||
|   } | ||||
|  | ||||
|   [[ -d $logDir ]] || { | ||||
|     echo "logDir does not exist: $logDir" | ||||
|     exit 1 | ||||
|   } | ||||
|  | ||||
|   declare -a pids | ||||
|   for instanceInfo in "${instances[@]}"; do | ||||
|     declare name zone publicIp | ||||
|     IFS=: read -r name zone publicIp _ < <(echo "$instanceInfo") | ||||
|  | ||||
|     logFile="$logDir/gcloud_PrepInstancesForSsh-$name.log" | ||||
|  | ||||
|     # TODO: This next subshell runs in series because for unknown reason running | ||||
|     # multiple |gcloud compute ssh| commands in parallel cause the macOS | ||||
|     # terminal to misbehave | ||||
|     ( | ||||
|       set -x | ||||
|  | ||||
|       # Try to ping the machine first.  There can be a delay between when the | ||||
|       # instance is reported as RUNNING and when it's reachable over the network | ||||
|       timeout 60s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done" | ||||
|  | ||||
|       gcloud compute ssh --zone "$zone" "$name" -- " | ||||
|         set -x; | ||||
|         mkdir -p .ssh; | ||||
|         echo \"$(cat "$publicKey")\" >> .ssh/authorized_keys; | ||||
|         echo \" | ||||
|           Host * | ||||
|           BatchMode yes | ||||
|           IdentityFile ~/.ssh/id_testnet | ||||
|           StrictHostKeyChecking no | ||||
|         \" > .ssh/config; | ||||
|       " | ||||
|     ) >> "$logFile" 2>&1 | ||||
|     ( | ||||
|       set -x | ||||
|       scp \ | ||||
|         -o StrictHostKeyChecking=no \ | ||||
|         -o UserKnownHostsFile=/dev/null \ | ||||
|         -i "$privateKey" \ | ||||
|         "$privateKey" "$username@$publicIp:.ssh/id_testnet" | ||||
|     ) >> "$logFile" 2>&1 & | ||||
|     declare pid=$! | ||||
|  | ||||
|     ln -sfT "$logFile" "$logDir/gcloud_PrepInstancesForSsh-$pid.log" | ||||
|     pids+=("$pid") | ||||
|   done | ||||
|  | ||||
|   for pid in "${pids[@]}"; do | ||||
|     declare ok=true | ||||
|     wait "$pid" || ok=false | ||||
|     if ! $ok; then | ||||
|       cat "$logDir/gcloud_PrepInstancesForSsh-$pid.log" | ||||
|       echo ^^^ +++ | ||||
|       exit 1 | ||||
|     fi | ||||
|   done | ||||
| } | ||||
|   | ||||
| @@ -46,7 +46,7 @@ fi | ||||
| printNode() { | ||||
|   declare nodeType=$1 | ||||
|   declare ip=$2 | ||||
|   printf "  %-25s | For logs run: $0 $ip tail -f /tmp/solana/=/$nodeType.log\n" "$0 $ip" | ||||
|   printf "  %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip" | ||||
| } | ||||
|  | ||||
| echo Leader: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user