From f853595efb44cd1518cbcfe9b1ea0639d4c8cdd2 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Sun, 28 Apr 2019 19:50:02 -0700 Subject: [PATCH] testnet-demo now runs across more GCE zones (#4053) * testnet-demo now runs across more GCE zones * Save zone info to config file * Add geoip whitelist for common data centers * Skip more of start * Include -x for config * Fetch private key from first validator node if necessary * Correct -r propagation --- ci/testnet-deploy.sh | 63 ++++++++++-------- ci/testnet-manager.sh | 82 ++++++++++++++++++----- net/common.sh | 4 ++ net/gce.sh | 134 +++++++++++++++++++++++++++++++------- net/remote/remote-node.sh | 1 + 5 files changed, 215 insertions(+), 69 deletions(-) diff --git a/ci/testnet-deploy.sh b/ci/testnet-deploy.sh index f60ae6c8c8..c788548075 100755 --- a/ci/testnet-deploy.sh +++ b/ci/testnet-deploy.sh @@ -11,7 +11,8 @@ clientNodeCount=0 additionalFullNodeCount=10 publicNetwork=false stopNetwork=false -skipSetup=false +reuseLedger=false +skipCreate=false skipStart=false externalNode=false failOnValidatorBootupFailure=true @@ -56,6 +57,7 @@ Deploys a CD testnet -r - Reuse existing node/ledger configuration from a previous |start| (ie, don't run ./multinode-demo/setup.sh). -x - External node. Default: false + -e - Skip create. Assume the nodes have already been created -s - Skip start. Nodes will still be created or configured, but network software will not be started. -S - Stop network software without tearing down nodes. -f - Discard validator nodes that didn't bootup successfully @@ -68,7 +70,7 @@ EOF zone=() -while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sf" opt; do +while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sfe" opt; do case $opt in h | \?) usage @@ -121,7 +123,10 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sf" opt; do delete=true ;; r) - skipSetup=true + reuseLedger=true + ;; + e) + skipCreate=true ;; s) skipStart=true @@ -175,15 +180,15 @@ for val in "${zone[@]}"; do done if $stopNetwork; then - skipSetup=true + skipCreate=true fi if $delete; then - skipSetup=false + skipCreate=false fi # Create the network -if ! $skipSetup; then +if ! $skipCreate; then echo "--- $cloudProvider.sh delete" # shellcheck disable=SC2068 time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x} @@ -245,6 +250,10 @@ else config_args+=(-P) fi + if $externalNode; then + config_args+=(-x) + fi + if ! $failOnValidatorBootupFailure; then config_args+=(-f) fi @@ -262,35 +271,35 @@ if $stopNetwork; then exit 0 fi -echo --- net.sh start -maybeRejectExtraNodes= -if ! $publicNetwork; then - maybeRejectExtraNodes="-o rejectExtraNodes" -fi -maybeNoValidatorSanity= -if [[ -n $NO_VALIDATOR_SANITY ]]; then - maybeNoValidatorSanity="-o noValidatorSanity" -fi -maybeNoLedgerVerify= -if [[ -n $NO_LEDGER_VERIFY ]]; then - maybeNoLedgerVerify="-o noLedgerVerify" -fi - -maybeSkipSetup= -if $skipSetup; then - maybeSkipSetup="-r" -fi - ok=true if ! $skipStart; then ( - if $skipSetup; then + if $skipCreate; then # TODO: Enable rolling updates #op=update op=restart else op=start fi + echo "--- net.sh $op" + + maybeRejectExtraNodes= + if ! $publicNetwork; then + maybeRejectExtraNodes="-o rejectExtraNodes" + fi + maybeNoValidatorSanity= + if [[ -n $NO_VALIDATOR_SANITY ]]; then + maybeNoValidatorSanity="-o noValidatorSanity" + fi + maybeNoLedgerVerify= + if [[ -n $NO_LEDGER_VERIFY ]]; then + maybeNoLedgerVerify="-o noLedgerVerify" + fi + + maybeReuseLedger= + if $reuseLedger; then + maybeReuseLedger="-r" + fi maybeUpdateManifestKeypairFile= # shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/ @@ -302,7 +311,7 @@ if ! $skipStart; then # shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables time net/net.sh $op -t "$tarChannelOrTag" \ $maybeUpdateManifestKeypairFile \ - $maybeSkipSetup \ + $maybeReuseLedger \ $maybeRejectExtraNodes \ $maybeNoValidatorSanity \ $maybeNoLedgerVerify diff --git a/ci/testnet-manager.sh b/ci/testnet-manager.sh index 0b0d298fb6..1bfbac792a 100755 --- a/ci/testnet-manager.sh +++ b/ci/testnet-manager.sh @@ -81,11 +81,38 @@ eval "$(ci/channel-info.sh)" EC2_ZONES=(us-west-1a sa-east-1a ap-northeast-2a eu-central-1a ca-central-1a) + +# GCE zones with _lots_ of quota GCE_ZONES=( - us-west1-a us-west1-b us-west1-c - us-central1-a us-central1-b - us-east1-b us-east1-c us-east1-d - europe-west4-a europe-west4-b europe-west4-c + us-west1-a + us-central1-a + us-east1-b + europe-west4-a + + us-west1-b + us-central1-b + us-east1-c + europe-west4-b + + us-west1-c + us-east1-d + europe-west4-c +) + +# GCE zones with enough quota for one CPU-only fullnode +GCE_LOW_QUOTA_ZONES=( + asia-east2-a + asia-northeast1-b + asia-northeast2-b + asia-south1-c + asia-southeast1-b + australia-southeast1-b + europe-north1-a + europe-west2-b + europe-west3-c + europe-west6-a + northamerica-northeast1-a + southamerica-east1-b ) case $TESTNET in @@ -113,7 +140,8 @@ testnet-perf) testnet-demo) CHANNEL_OR_TAG=beta CHANNEL_BRANCH=$BETA_CHANNEL - : "${GCE_NODE_COUNT:=200}" + : "${GCE_NODE_COUNT:=186}" + : "${GCE_LOW_QUOTA_NODE_COUNT:=12}" : "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}" ;; *) @@ -130,6 +158,10 @@ GCE_ZONE_ARGS=() for val in "${GCE_ZONES[@]}"; do GCE_ZONE_ARGS+=("-z $val") done +GCE_LOW_QUOTA_ZONE_ARGS=() +for val in "${GCE_LOW_QUOTA_ZONES[@]}"; do + GCE_LOW_QUOTA_ZONE_ARGS+=("-z $val") +done if [[ -n $TESTNET_DB_HOST ]]; then SOLANA_METRICS_PARTIAL_CONFIG="host=$TESTNET_DB_HOST,$SOLANA_METRICS_PARTIAL_CONFIG" @@ -158,6 +190,7 @@ steps: TESTNET_DB_HOST: "$TESTNET_DB_HOST" EC2_NODE_COUNT: "$EC2_NODE_COUNT" GCE_NODE_COUNT: "$GCE_NODE_COUNT" + GCE_LOW_QUOTA_NODE_COUNT: "$GCE_LOW_QUOTA_NODE_COUNT" EOF ) | buildkite-agent pipeline upload exit 0 @@ -277,7 +310,7 @@ deploy() { set -x ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -292,7 +325,7 @@ deploy() { ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \ -g -t "$CHANNEL_OR_TAG" -c 2 \ -b \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -305,7 +338,7 @@ deploy() { ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 -z us-west-1a \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \ -b \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -320,7 +353,7 @@ deploy() { ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \ -g -t "$CHANNEL_OR_TAG" -c 2 \ -b \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -337,7 +370,7 @@ deploy() { # shellcheck disable=SC2068 ci/testnet-deploy.sh -p testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \ -t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${maybeSkipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -346,11 +379,11 @@ deploy() { # shellcheck disable=SC2068 ci/testnet-deploy.sh -p testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \ -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} \ - ${EC2_NODE_COUNT:+-x} + -x fi ) ;; @@ -365,7 +398,7 @@ deploy() { -t "$CHANNEL_OR_TAG" -c 2 \ -b \ -d pd-ssd \ - ${skipCreate:+-r} \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} @@ -374,12 +407,25 @@ deploy() { testnet-demo) ( set -x - if [[ -n $GCE_NODE_COUNT ]]; then + + if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]] || [[ -n $skipStart ]]; then + maybeSkipStart="skip" + fi + + # shellcheck disable=SC2068 + ci/testnet-deploy.sh -p demo-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \ + -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \ + -a demo-testnet-solana-com \ + ${skipCreate:+-e} \ + ${maybeSkipStart:+-s} \ + ${maybeStop:+-S} \ + ${maybeDelete:+-D} + + if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]]; then # shellcheck disable=SC2068 - ci/testnet-deploy.sh -p testnet-demo -C gce ${GCE_ZONE_ARGS[@]} \ - -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \ - -a demo-testnet-solana-com \ - ${skipCreate:+-r} \ + ci/testnet-deploy.sh -p demo-testnet-solana-com2 -C gce ${GCE_LOW_QUOTA_ZONE_ARGS[@]} \ + -t "$CHANNEL_OR_TAG" -n "$GCE_LOW_QUOTA_NODE_COUNT" -c 0 -P -f -x \ + ${skipCreate:+-e} \ ${skipStart:+-s} \ ${maybeStop:+-S} \ ${maybeDelete:+-D} diff --git a/net/common.sh b/net/common.sh index 3378395908..69fd847679 100644 --- a/net/common.sh +++ b/net/common.sh @@ -19,6 +19,7 @@ mkdir -p "$netConfigDir" "$netLogDir" source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh configFile="$netConfigDir/config" +geoipConfigFile="$netConfigDir/geoip.yml" entrypointIp= publicNetwork= @@ -28,10 +29,13 @@ externalNodeSshKey= sshOptions=() fullnodeIpList=() fullnodeIpListPrivate=() +fullnodeIpListZone=() clientIpList=() clientIpListPrivate=() +clientIpListZone=() blockstreamerIpList=() blockstreamerIpListPrivate=() +blockstreamerIpListZone=() leaderRotation= buildSshOptions() { diff --git a/net/gce.sh b/net/gce.sh index de1c3c82c0..aeae569560 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -227,6 +227,7 @@ esac # name - name of the instance # publicIp - The public IP address of this instance # privateIp - The private IP address of this instance +# zone - Zone of this instance # count - Monotonically increasing count for each # invocation of cmd, starting at 1 # ... - Extra args to cmd.. @@ -242,11 +243,70 @@ cloud_ForEachInstance() { declare name publicIp privateIp IFS=: read -r name publicIp privateIp zone < <(echo "$info") - eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@" + eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@" count=$((count + 1)) done } +# Given a cloud provider zone, return an approximate lat,long location for the +# data center. Normal geoip lookups for cloud provider IP addresses are +# sometimes widely inaccurate. +zoneLocation() { + declare zone="$1" + case "$zone" in + us-west1*) + echo "[45.5946, -121.1787]" + ;; + us-central1*) + echo "[41.2619, -95.8608]" + ;; + us-east1*) + echo "[33.1960, -80.0131]" + ;; + asia-east2*) + echo "[22.3193, 114.1694]" + ;; + asia-northeast1*) + echo "[35.6762, 139.6503]" + ;; + asia-northeast2*) + echo "[34.6937, 135.5023]" + ;; + asia-south1*) + echo "[19.0760, 72.8777]" + ;; + asia-southeast1*) + echo "[1.3404, 103.7090]" + ;; + australia-southeast1*) + echo "[-33.8688, 151.2093]" + ;; + europe-north1*) + echo "[60.5693, 27.1878]" + ;; + europe-west2*) + echo "[51.5074, -0.1278]" + ;; + europe-west3*) + echo "[50.1109, 8.6821]" + ;; + europe-west4*) + echo "[53.4386, 6.8355]" + ;; + europe-west6*) + echo "[47.3769, 8.5417]" + ;; + northamerica-northeast1*) + echo "[45.5017, -73.5673]" + ;; + southamerica-east1*) + echo "[-23.5505, -46.6333]" + ;; + *) + ;; + esac +} + prepareInstancesAndWriteConfigFile() { $metricsWriteDatapoint "testnet-deploy net-config-begin=1" @@ -254,6 +314,7 @@ prepareInstancesAndWriteConfigFile() { echo "Appending to existing config file" echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile" else + rm -f "$geoipConfigFile" cat >> "$configFile" <> "$configFile" - echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile" + { + echo "$arrayName+=($publicIp) # $name" + echo "${arrayName}Private+=($privateIp) # $name" + echo "${arrayName}Zone+=($zone) # $name" + } >> "$configFile" + + declare latlng= + latlng=$(zoneLocation "$zone") + if [[ -n $latlng ]]; then + echo "$publicIp: $latlng" >> "$geoipConfigFile" + fi fi } - if $externalNodes; then - echo "Bootstrap leader is already configured" - else - echo "Looking for bootstrap leader instance..." - cloud_FindInstance "$prefix-bootstrap-leader" - [[ ${#instances[@]} -eq 1 ]] || { - echo "Unable to find bootstrap leader" - exit 1 - } - + fetchPrivateKey() { ( declare nodeName declare nodeIp @@ -338,7 +402,9 @@ EOF set -x -o pipefail for i in $(seq 1 30); do if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then - break + if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then + break + fi fi sleep 1 @@ -350,6 +416,20 @@ EOF fi ) + } + + if $externalNodes; then + echo "Bootstrap leader is already configured" + else + echo "Looking for bootstrap leader instance..." + cloud_FindInstance "$prefix-bootstrap-leader" + [[ ${#instances[@]} -eq 1 ]] || { + echo "Unable to find bootstrap leader" + exit 1 + } + + fetchPrivateKey + echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile" cloud_ForEachInstance recordInstanceIp true fullnodeIpList @@ -363,6 +443,8 @@ EOF echo "Unable to find additional fullnodes" exit 1 } + + fetchPrivateKey cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList done fi @@ -588,29 +670,33 @@ info) declare nodeType=$1 declare ip=$2 declare ipPrivate=$3 - printf " %-16s | %-15s | %-15s\n" "$nodeType" "$ip" "$ipPrivate" + declare zone=$4 + printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone" } - printNode "Node Type" "Public IP" "Private IP" - echo "-------------------+-----------------+-----------------" + printNode "Node Type" "Public IP" "Private IP" "Zone" + echo "-------------------+-----------------+-----------------+--------------" nodeType=bootstrap-leader for i in $(seq 0 $(( ${#fullnodeIpList[@]} - 1)) ); do ipAddress=${fullnodeIpList[$i]} ipAddressPrivate=${fullnodeIpListPrivate[$i]} - printNode $nodeType "$ipAddress" "$ipAddressPrivate" + zone=${fullnodeIpListZone[$i]} + printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone" nodeType=fullnode done for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do ipAddress=${clientIpList[$i]} ipAddressPrivate=${clientIpListPrivate[$i]} - printNode bench-tps "$ipAddress" "$ipAddressPrivate" + zone=${clientIpListZone[$i]} + printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone" done for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do ipAddress=${blockstreamerIpList[$i]} ipAddressPrivate=${blockstreamerIpListPrivate[$i]} - printNode blockstreamer "$ipAddress" "$ipAddressPrivate" + zone=${blockstreamerIpListZone[$i]} + printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone" done ;; *) diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index e416189e69..81d6426ad5 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -135,6 +135,7 @@ local|tar) scp "$entrypointIp":~/solana/config-local/mint-id.json config-local/ ./multinode-demo/drone.sh > drone.log 2>&1 & + export BLOCKEXPLORER_GEOIP_WHITELIST=$PWD/net/config/geoip.yml npm install @solana/blockexplorer@1 npx solana-blockexplorer > blockexplorer.log 2>&1 &