solana/multinode-demo/bootstrap-validator.sh
Ryo Onodera cb8661bd49
Persistent tower (#10718)
* Save/restore Tower

* Avoid unwrap()

* Rebase cleanups

* Forcibly pass test

* Correct reconcilation of votes after validator resume

* d b g

* Add more tests

* fsync and fix test

* Add test

* Fix fmt

* Debug

* Fix tests...

* save

* Clarify error message and code cleaning around it

* Move most of code out of tower save hot codepath

* Proper comment for the lack of fsync on tower

* Clean up

* Clean up

* Simpler type alias

* Manage tower-restored ancestor slots without banks

* Add comment

* Extract long code blocks...

* Add comment

* Simplify returned tuple...

* Tweak too aggresive log

* Fix typo...

* Add test

* Update comment

* Improve test to require non-empty stray restored slots

* Measure tower save and dump all tower contents

* Log adjust and add threshold related assertions

* cleanup adjust

* Properly lower stray restored slots priority...

* Rust fmt

* Fix test....

* Clarify comments a bit and add TowerError::TooNew

* Further clean-up arround TowerError

* Truly create ancestors by excluding last vote slot

* Add comment for stray_restored_slots

* Add comment for stray_restored_slots

* Use BTreeSet

* Consider root_slot into post-replay adjustment

* Tweak logging

* Add test for stray_restored_ancestors

* Reorder some code

* Better names for unit tests

* Add frozen_abi to SavedTower

* Fold long lines

* Tweak stray ancestors and too old slot history

* Re-adjust error conditon of too old slot history

* Test normal ancestors is checked before stray ones

* Fix conflict, update tests, adjust behavior a bit

* Fix test

* Address review comments

* Last touch!

* Immediately after creating cleaning pr

* Revert stray slots

* Revert comment...

* Report error as metrics

* Revert not to panic! and ignore unfixable test...

* Normalize lockouts.root_slot more strictly

* Add comments for panic! and more assertions

* Proper initialize root without vote account

* Clarify code and comments based on review feedback

* Fix rebase

* Further simplify based on assured tower root

* Reorder code for more readability

Co-authored-by: Michael Vines <mvines@gmail.com>
2020-09-19 14:03:54 +09:00

149 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Start the bootstrap validator node
#
set -e
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
if [[ "$SOLANA_GPU_MISSING" -eq 1 ]]; then
echo "Testnet requires GPUs, but none were found! Aborting..."
exit 1
fi
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_validator_cuda
else
program=$solana_validator
fi
no_restart=0
args=()
while [[ -n $1 ]]; do
if [[ ${1:0:1} = - ]]; then
if [[ $1 = --init-complete-file ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --gossip-host ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --gossip-port ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --dev-halt-at-slot ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --dynamic-port-range ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --limit-ledger-size ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --no-rocksdb-compaction ]]; then
args+=("$1")
shift
elif [[ $1 = --enable-rpc-transaction-history ]]; then
args+=("$1")
shift
elif [[ $1 = --enable-rpc-bigtable-ledger-storage ]]; then
args+=("$1")
shift
elif [[ $1 = --skip-poh-verify ]]; then
args+=("$1")
shift
elif [[ $1 = --log ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --no-restart ]]; then
no_restart=1
shift
elif [[ $1 == --wait-for-supermajority ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 == --expected-bank-hash ]]; then
args+=("$1" "$2")
shift 2
else
echo "Unknown argument: $1"
$program --help
exit 1
fi
else
echo "Unknown argument: $1"
$program --help
exit 1
fi
done
# These keypairs are created by ./setup.sh and included in the genesis config
identity=$SOLANA_CONFIG_DIR/bootstrap-validator/identity.json
vote_account="$SOLANA_CONFIG_DIR"/bootstrap-validator/vote-account.json
ledger_dir="$SOLANA_CONFIG_DIR"/bootstrap-validator
[[ -d "$ledger_dir" ]] || {
echo "$ledger_dir does not exist"
echo
echo "Please run: $here/setup.sh"
exit 1
}
args+=(
--enable-rpc-exit
--enable-rpc-set-log-filter
--require-tower
--ledger "$ledger_dir"
--rpc-port 8899
--snapshot-interval-slots 200
--identity "$identity"
--vote-account "$vote_account"
--rpc-faucet-address 127.0.0.1:9900
)
default_arg --gossip-port 8001
default_arg --log -
pid=
kill_node() {
# Note: do not echo anything from this function to ensure $pid is actually
# killed when stdout/stderr are redirected
set +ex
if [[ -n $pid ]]; then
declare _pid=$pid
pid=
kill "$_pid" || true
wait "$_pid" || true
fi
}
kill_node_and_exit() {
kill_node
exit
}
trap 'kill_node_and_exit' INT TERM ERR
while true; do
echo "$program ${args[*]}"
$program "${args[@]}" &
pid=$!
echo "pid: $pid"
if ((no_restart)); then
wait "$pid"
exit $?
fi
while true; do
if [[ -z $pid ]] || ! kill -0 "$pid"; then
echo "############## validator exited, restarting ##############"
break
fi
sleep 1
done
kill_node
done