rework of netwrk rendezvous

* rename NodeInfo field of Node from "data" to "info"
      (touches a lot of files)

  * update client to use gossip to find leader, a la drone

  * rework multinode scripts
      * move more stuff into rust
      * added usage to all
      * no more rsync unless you're a validator (TODO: whack that, too)
  * fullnode doesn't bail if drone isn't up yet, just keeps trying
  * drone doesn't bail if network isn't up yet, just keeps trying
This commit is contained in:
Rob Walker
2018-08-31 00:10:39 -07:00
parent eb4e5a7bd0
commit 176e806d94
19 changed files with 385 additions and 400 deletions

View File

@ -1,68 +1,37 @@
#!/bin/bash -e
#
USAGE=" usage: $0 [leader_url] [num_nodes] [--loop] [extra args]
Run bench-tps against the specified network
leader_url URL to the leader (defaults to ..)
num_nodes Minimum number of nodes to look for while converging
--loop Add this flag to cause the program to loop infinitely
\"extra args\" Any additional arguments are pass along to solana-bench-tps
"
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
leader=$1
if [[ -n $leader ]]; then
if [[ $leader == "-h" || $leader == "--help" ]]; then
echo "$USAGE"
exit 0
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
shift
echo "usage: $0 [network entry point] [extra args]"
echo
echo " Run bench-tps against the specified network"
echo
echo " extra args: additional arguments are pass along to solana-bench-tps"
echo
exit 1
}
# this is a little hacky
if [[ ${1:0:2} != "--" ]]; then
read -r _ leader_address shift < <(find_leader "${@:1:1}")
else
if [[ -d "$SNAP" ]]; then
leader=testnet.solana.com # Default to testnet when running as a Snap
else
leader=$here/.. # Default to local solana repo
fi
read -r _ leader_address shift < <(find_leader)
fi
shift "$shift"
count=$1
if [[ -n $count ]]; then
shift
else
count=1
fi
loop=
if [[ $1 = --loop ]]; then
loop=1
shift
fi
client_json="$SOLANA_CONFIG_CLIENT_DIR"/client.json
[[ -r $client_json ]] || $solana_keygen -o "$client_json"
rsync_leader_url=$(rsync_url "$leader")
(
set -x
mkdir -p "$SOLANA_CONFIG_CLIENT_DIR"
$rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_CLIENT_DIR"/
client_json="$SOLANA_CONFIG_CLIENT_DIR"/client.json
[[ -r $client_json ]] || $solana_keygen -o "$client_json"
)
iteration=0
set -x
while true; do
$solana_bench_tps \
-n "$count" \
-l "$SOLANA_CONFIG_CLIENT_DIR"/leader.json \
-k "$SOLANA_CONFIG_CLIENT_DIR"/client.json \
"$@"
[[ -n $loop ]] || exit 0
iteration=$((iteration + 1))
echo ------------------------------------------------------------------------
echo "Iteration: $iteration"
echo ------------------------------------------------------------------------
done
$solana_bench_tps \
--network "$leader_address" \
--keypair "$SOLANA_CONFIG_CLIENT_DIR"/client.json \
"$@"

View File

@ -159,3 +159,46 @@ rsync_url() { # adds the 'rsync://` prefix to URLs that need it
# Default to rsync:// URL
echo "rsync://$url"
}
# called from drone, validator, client
find_leader() {
declare leader leader_address
declare shift=0
if [[ -d $SNAP ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n $(snapctl get mode) ]] || exit 0
# Select leader from the Snap configuration
leader_address=$(snapctl get leader-address)
if [[ -z $leader_address ]]; then
# Assume public testnet by default
leader_address=35.227.93.37:8001 # testnet.solana.com
fi
leader=$leader_address
else
if [[ -z $1 ]]; then
leader=${here}/.. # Default to local tree for rsync
leader_address=127.0.0.1:8001 # Default to local leader
elif [[ -z $2 ]]; then
leader=$1
declare leader_ip
leader_ip=$(dig +short "${leader%:*}" | head -n1)
if [[ -z $leader_ip ]]; then
usage "Error: unable to resolve IP address for $leader"
fi
leader_address=${leader_ip}:8001
shift=1
else
leader=$1
leader_address=$2
shift=2
fi
fi
echo "$leader" "$leader_address" "$shift"
}

View File

@ -2,32 +2,25 @@
#
# Starts an instance of solana-drone
#
# usage: $0 <rsync network path to solana repo on leader machine>
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
SOLANA_CONFIG_DIR="$SOLANA_CONFIG_DIR"-drone
# shellcheck source=scripts/oom-score-adj.sh
source "$here"/../scripts/oom-score-adj.sh
if [[ -d "$SNAP" ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n "$(snapctl get mode)" ]] || exit 0
# Select leader from the Snap configuration
leader_address="$(snapctl get leader-address)"
if [[ -z "$leader_address" ]]; then
# Assume drone is running on the same node as the leader by default
leader_address="localhost"
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
leader="$leader_address"
else
leader=${1:-${here}/..} # Default to local tree for data
fi
echo "usage: $0 [network entry point]"
echo
echo " Run an airdrop drone for the specified network"
echo
exit 1
}
read -r _ leader_address shift < <(find_leader "${@:1:1}")
shift "$shift"
[[ -f "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json ]] || {
echo "$SOLANA_CONFIG_PRIVATE_DIR/mint.json not found, create it by running:"
@ -36,17 +29,12 @@ fi
exit 1
}
rsync_leader_url=$(rsync_url "$leader")
set -ex
mkdir -p "$SOLANA_CONFIG_DIR"
$rsync -vPz "$rsync_leader_url"/config/leader.json "$SOLANA_CONFIG_DIR"/
trap 'kill "$pid" && wait "$pid"' INT TERM
$solana_drone \
-l "$SOLANA_CONFIG_DIR"/leader.json -k "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
--timeout 120 \
--keypair "$SOLANA_CONFIG_PRIVATE_DIR"/mint.json \
--network "$leader_address" \
> >($drone_logger) 2>&1 &
pid=$!
oom_score_adj "$pid" 1000
wait "$pid"

View File

@ -14,9 +14,12 @@ usage() {
echo "$*"
echo
fi
echo "usage: $0 [-x] [rsync network path to solana repo on leader machine] [network ip address of leader]"
echo ""
echo " -x: runs a new, dynamically-configured validator"
echo "usage: $0 [-x] [rsync network path to leader] [network entry point]"
echo
echo " Start a validator on the specified network"
echo
echo " -x: runs a new, dynamically-configured validator"
echo
exit 1
}
@ -35,34 +38,8 @@ if [[ -n $3 ]]; then
usage
fi
if [[ -d $SNAP ]]; then
# Exit if mode is not yet configured
# (typically the case after the Snap is first installed)
[[ -n $(snapctl get mode) ]] || exit 0
# Select leader from the Snap configuration
leader_address=$(snapctl get leader-address)
if [[ -z $leader_address ]]; then
# Assume public testnet by default
leader_address=35.227.93.37 # testnet.solana.com
fi
leader=$leader_address
else
if [[ -z $1 ]]; then
leader=${1:-${here}/..} # Default to local tree for data
leader_address=${2:-127.0.0.1} # Default to local leader
elif [[ -z $2 ]]; then
leader=$1
leader_address=$(dig +short "${leader%:*}" | head -n1)
if [[ -z $leader_address ]]; then
usage "Error: unable to resolve IP address for $leader"
fi
else
leader=$1
leader_address=$2
fi
fi
leader_port=8001
read -r leader leader_address shift < <(find_leader "${@:1:2}")
shift "$shift"
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_fullnode_cuda
@ -109,7 +86,7 @@ $rsync -vPr "$rsync_leader_url"/config/ "$SOLANA_LEADER_CONFIG_DIR"
trap 'kill "$pid" && wait "$pid"' INT TERM
$program \
--identity "$validator_json_path" \
--testnet "$leader_address:$leader_port" \
--network "$leader_address" \
--ledger "$SOLANA_LEADER_CONFIG_DIR"/ledger \
> >($validator_logger) 2>&1 &
pid=$!