Use a common solana user on all testnet instances

This commit is contained in:
Michael Vines
2018-09-08 19:19:12 -07:00
committed by Grimes
parent 7029e4395c
commit ebcac3c2d1
8 changed files with 116 additions and 181 deletions

View File

@ -106,6 +106,7 @@ done
shift $((OPTIND - 1))
[[ -z $1 ]] || usage "Unexpected argument: $1"
sshPrivateKey="$netConfigDir/id_$prefix"
prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
@ -114,15 +115,10 @@ prepareInstancesAndWriteConfigFile() {
# autogenerated at $(date)
netBasename=$prefix
publicNetwork=$publicNetwork
sshPrivateKey=$sshPrivateKey
EOF
declare sshPrivateKey="$netConfigDir/id_$prefix"
rm -rf "$sshPrivateKey"{,.pub}
(
set -x
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
)
echo "sshPrivateKey=$sshPrivateKey" >> "$configFile"
buildSshOptions
recordInstanceIp() {
declare name="$1"
@ -141,38 +137,79 @@ EOF
fi
}
waitForStartupComplete() {
declare name="$1"
declare publicIp="$3"
echo "Waiting for $name to finish booting..."
(
for i in $(seq 1 30); do
if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
break
fi
sleep 2
echo "Retry $i..."
done
)
}
echo "Looking for leader instance..."
gcloud_FindInstances "name=$prefix-leader" show
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to start leader"
echo "Unable to find leader"
exit 1
}
gcloud_FigureRemoteUsername "${instances[0]}"
sshUsername=$gcloud_username
echo "sshUsername=$sshUsername" >> "$configFile"
buildSshOptions
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
echo "Fetching $sshPrivateKey from $leaderName"
(
rm -rf "$sshPrivateKey"{,pub}
declare leaderName
declare leaderZone
declare leaderIp
IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
set -x
# Try to ping the machine first. There can be a delay between when the
# instance is reported as RUNNING and when it's reachable over the network
timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
set -o pipefail
for i in $(seq 1 10); do
if gcloud compute scp --zone "$leaderZone" \
"$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
break
fi
sleep 1
echo "Retry $i..."
done
chmod 400 "$sshPrivateKey"
)
echo "leaderIp=()" >> "$configFile"
gcloud_ForEachInstance recordInstanceIp leaderIp
gcloud_ForEachInstance waitForStartupComplete
echo "Looking for validator instances..."
gcloud_FindInstances "name~^$prefix-validator" show
[[ ${#instances[@]} -gt 0 ]] || {
echo "Unable to start validators"
echo "Unable to find validators"
exit 1
}
echo "validatorIpList=()" >> "$configFile"
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
gcloud_ForEachInstance recordInstanceIp validatorIpList
gcloud_ForEachInstance waitForStartupComplete
echo "clientIpList=()" >> "$configFile"
echo "Looking for client instances..."
gcloud_FindInstances "name~^$prefix-client" show
[[ ${#instances[@]} -eq 0 ]] || {
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
gcloud_ForEachInstance recordInstanceIp clientIpList
gcloud_ForEachInstance waitForStartupComplete
}
echo "Wrote $configFile"
@ -206,6 +243,9 @@ create)
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
rm -rf "$sshPrivateKey"{,.pub}
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
printNetworkInfo() {
cat <<EOF
========================================================================================
@ -233,15 +273,29 @@ cat > /etc/motd <<EOM
See "startup-script" log messages in /var/log/syslog for status:
$ sudo cat /var/log/syslog | grep startup-script
To block until setup is complete, run:
$ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOM
# Place the generated private key at /solana-id_ecdsa so it's retrievable by anybody
# who is able to log into this machine
cat > /solana-id_ecdsa <<EOK
$(cat "$sshPrivateKey")
EOK
cat > /solana-id_ecdsa.pub <<EOK
$(cat "$sshPrivateKey.pub")
EOK
chmod 444 /solana-id_ecdsa
USER=\$(id -un)
$(
cd "$here"/scripts/
cat \
disable-background-upgrades.sh \
create-solana-user.sh \
install-earlyoom.sh \
install-rsync.sh \
install-libssl-compatability.sh \
@ -251,6 +305,8 @@ cat > /etc/motd <<EOM
$(printNetworkInfo)
EOM
touch /.gce-startup-complete
EOF
gcloud_CreateInstances "$prefix-leader" 1 "$zone" \