solana/multinode-demo/validator.sh
Ryo Onodera cb8661bd49
Persistent tower (#10718)
* Save/restore Tower

* Avoid unwrap()

* Rebase cleanups

* Forcibly pass test

* Correct reconcilation of votes after validator resume

* d b g

* Add more tests

* fsync and fix test

* Add test

* Fix fmt

* Debug

* Fix tests...

* save

* Clarify error message and code cleaning around it

* Move most of code out of tower save hot codepath

* Proper comment for the lack of fsync on tower

* Clean up

* Clean up

* Simpler type alias

* Manage tower-restored ancestor slots without banks

* Add comment

* Extract long code blocks...

* Add comment

* Simplify returned tuple...

* Tweak too aggresive log

* Fix typo...

* Add test

* Update comment

* Improve test to require non-empty stray restored slots

* Measure tower save and dump all tower contents

* Log adjust and add threshold related assertions

* cleanup adjust

* Properly lower stray restored slots priority...

* Rust fmt

* Fix test....

* Clarify comments a bit and add TowerError::TooNew

* Further clean-up arround TowerError

* Truly create ancestors by excluding last vote slot

* Add comment for stray_restored_slots

* Add comment for stray_restored_slots

* Use BTreeSet

* Consider root_slot into post-replay adjustment

* Tweak logging

* Add test for stray_restored_ancestors

* Reorder some code

* Better names for unit tests

* Add frozen_abi to SavedTower

* Fold long lines

* Tweak stray ancestors and too old slot history

* Re-adjust error conditon of too old slot history

* Test normal ancestors is checked before stray ones

* Fix conflict, update tests, adjust behavior a bit

* Fix test

* Address review comments

* Last touch!

* Immediately after creating cleaning pr

* Revert stray slots

* Revert comment...

* Report error as metrics

* Revert not to panic! and ignore unfixable test...

* Normalize lockouts.root_slot more strictly

* Add comments for panic! and more assertions

* Proper initialize root without vote account

* Clarify code and comments based on review feedback

* Fix rebase

* Further simplify based on assured tower root

* Reorder code for more readability

Co-authored-by: Michael Vines <mvines@gmail.com>
2020-09-19 14:03:54 +09:00

325 lines
7.8 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Start a validator
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
args=(
--max-genesis-archive-unpacked-size 1073741824
)
airdrops_enabled=1
node_sol=500 # 500 SOL: number of SOL to airdrop the node for transaction fees and vote account rent exemption (ignored if airdrops_enabled=0)
label=
identity=
vote_account=
no_restart=0
gossip_entrypoint=
ledger_dir=
usage() {
if [[ -n $1 ]]; then
echo "$*"
echo
fi
cat <<EOF
usage: $0 [OPTIONS] [cluster entry point hostname]
Start a validator with no stake
OPTIONS:
--ledger PATH - store ledger under this PATH
--init-complete-file FILE - create this file, if it doesn't already exist, once node initialization is complete
--label LABEL - Append the given label to the configuration files, useful when running
multiple validators in the same workspace
--node-sol SOL - Number of SOL this node has been funded from the genesis config (default: $node_sol)
--no-voting - start node without vote signer
--rpc-port port - custom RPC port for this node
--no-restart - do not restart the node if it exits
--no-airdrop - The genesis config has an account for the node. Airdrops are not required.
EOF
exit 1
}
positional_args=()
while [[ -n $1 ]]; do
if [[ ${1:0:1} = - ]]; then
# validator.sh-only options
if [[ $1 = --label ]]; then
label="-$2"
shift 2
elif [[ $1 = --no-restart ]]; then
no_restart=1
shift
elif [[ $1 = --node-sol ]]; then
node_sol="$2"
shift 2
elif [[ $1 = --no-airdrop ]]; then
airdrops_enabled=0
shift
# solana-validator options
elif [[ $1 = --expected-genesis-hash ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --expected-shred-version ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --identity ]]; then
identity=$2
args+=("$1" "$2")
shift 2
elif [[ $1 = --authorized-voter ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --vote-account ]]; then
vote_account=$2
args+=("$1" "$2")
shift 2
elif [[ $1 = --init-complete-file ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --ledger ]]; then
ledger_dir=$2
shift 2
elif [[ $1 = --entrypoint ]]; then
gossip_entrypoint=$2
args+=("$1" "$2")
shift 2
elif [[ $1 = --no-snapshot-fetch ]]; then
args+=("$1")
shift
elif [[ $1 = --no-voting ]]; then
args+=("$1")
shift
elif [[ $1 = --dev-no-sigverify ]]; then
args+=("$1")
shift
elif [[ $1 = --dev-halt-at-slot ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --rpc-port ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --enable-rpc-exit ]]; then
args+=("$1")
shift
elif [[ $1 = --rpc-faucet-address ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --vote-signer-address ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --accounts ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --gossip-port ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --dynamic-port-range ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --snapshot-interval-slots ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --limit-ledger-size ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --no-rocksdb-compaction ]]; then
args+=("$1")
shift
elif [[ $1 = --enable-rpc-transaction-history ]]; then
args+=("$1")
shift
elif [[ $1 = --skip-poh-verify ]]; then
args+=("$1")
shift
elif [[ $1 = --log ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --trusted-validator ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --halt-on-trusted-validators-accounts-hash-mismatch ]]; then
args+=("$1")
shift
elif [[ $1 = --max-genesis-archive-unpacked-size ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 == --wait-for-supermajority ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 == --expected-bank-hash ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = -h ]]; then
usage "$@"
else
echo "Unknown argument: $1"
exit 1
fi
else
positional_args+=("$1")
shift
fi
done
if [[ "$SOLANA_GPU_MISSING" -eq 1 ]]; then
echo "Testnet requires GPUs, but none were found! Aborting..."
exit 1
fi
if [[ ${#positional_args[@]} -gt 1 ]]; then
usage "$@"
fi
if [[ -n $REQUIRE_LEDGER_DIR ]]; then
if [[ -z $ledger_dir ]]; then
usage "Error: --ledger not specified"
fi
SOLANA_CONFIG_DIR="$ledger_dir"
fi
if [[ -n $REQUIRE_KEYPAIRS ]]; then
if [[ -z $identity ]]; then
usage "Error: --identity not specified"
fi
if [[ -z $vote_account ]]; then
usage "Error: --vote-account not specified"
fi
fi
if [[ -z "$ledger_dir" ]]; then
ledger_dir="$SOLANA_CONFIG_DIR/validator$label"
fi
mkdir -p "$ledger_dir"
if [[ -n $gossip_entrypoint ]]; then
# Prefer the --entrypoint argument if supplied...
if [[ ${#positional_args[@]} -gt 0 ]]; then
usage "$@"
fi
else
# ...but also support providing the entrypoint's hostname as the first
# positional argument
entrypoint_hostname=${positional_args[0]}
if [[ -z $entrypoint_hostname ]]; then
gossip_entrypoint=127.0.0.1:8001
else
gossip_entrypoint="$entrypoint_hostname":8001
fi
fi
faucet_address="${gossip_entrypoint%:*}":9900
: "${identity:=$ledger_dir/identity.json}"
: "${vote_account:=$ledger_dir/vote-account.json}"
default_arg --entrypoint "$gossip_entrypoint"
if ((airdrops_enabled)); then
default_arg --rpc-faucet-address "$faucet_address"
fi
default_arg --identity "$identity"
default_arg --vote-account "$vote_account"
default_arg --ledger "$ledger_dir"
default_arg --log -
default_arg --enable-rpc-exit
default_arg --enable-rpc-set-log-filter
default_arg --require-tower
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_validator_cuda
else
program=$solana_validator
fi
set -e
PS4="$(basename "$0"): "
pid=
kill_node() {
# Note: do not echo anything from this function to ensure $pid is actually
# killed when stdout/stderr are redirected
set +ex
if [[ -n $pid ]]; then
declare _pid=$pid
pid=
kill "$_pid" || true
wait "$_pid" || true
fi
}
kill_node_and_exit() {
kill_node
exit
}
trap 'kill_node_and_exit' INT TERM ERR
wallet() {
(
set -x
$solana_cli --keypair "$identity" --url "$rpc_url" "$@"
)
}
setup_validator_accounts() {
declare node_sol=$1
if [[ -n "$SKIP_ACCOUNTS_CREATION" ]]; then
return 0
fi
if ! wallet vote-account "$vote_account"; then
if ((airdrops_enabled)); then
echo "Adding $node_sol to validator identity account:"
(
set -x
$solana_cli --keypair "$SOLANA_CONFIG_DIR/faucet.json" --url "$rpc_url" transfer "$identity" "$node_sol"
) || return $?
fi
echo "Creating validator vote account"
wallet create-vote-account "$vote_account" "$identity" || return $?
fi
echo "Validator vote account configured"
echo "Validator identity account balance:"
wallet balance || return $?
return 0
}
rpc_url=$($solana_gossip rpc-url --timeout 180 --entrypoint "$gossip_entrypoint")
[[ -r "$identity" ]] || $solana_keygen new --no-passphrase -so "$identity"
[[ -r "$vote_account" ]] || $solana_keygen new --no-passphrase -so "$vote_account"
setup_validator_accounts "$node_sol"
while true; do
echo "$PS4$program ${args[*]}"
$program "${args[@]}" &
pid=$!
echo "pid: $pid"
if ((no_restart)); then
wait "$pid"
exit $?
fi
while true; do
if [[ -z $pid ]] || ! kill -0 "$pid"; then
echo "############## validator exited, restarting ##############"
break
fi
sleep 1
done
kill_node
done