Various Snapshot generation improvements

* Only a single snapshot is maintained to avoid unbounded disk growth
* Snapshot is stored as a compressed tar archive for faster rsyncing
* Any validator node may now generate snapshots
* Updated testnet scripts to generate snapshots on the blockstreamer node
This commit is contained in:
Michael Vines
2019-06-18 13:07:59 -07:00
parent b4484b89c3
commit 465cd45833
2 changed files with 79 additions and 74 deletions

View File

@ -191,6 +191,7 @@ label=
identity_keypair_path= identity_keypair_path=
no_restart=0 no_restart=0
airdrops_enabled=1 airdrops_enabled=1
generate_snapshots=0
positional_args=() positional_args=()
while [[ -n $1 ]]; do while [[ -n $1 ]]; do
@ -203,6 +204,10 @@ while [[ -n $1 ]]; do
shift shift
elif [[ $1 = --bootstrap-leader ]]; then elif [[ $1 = --bootstrap-leader ]]; then
node_type=bootstrap_leader node_type=bootstrap_leader
generate_snapshots=1
shift
elif [[ $1 = --generate-snapshots ]]; then
generate_snapshots=1
shift shift
elif [[ $1 = --replicator ]]; then elif [[ $1 = --replicator ]]; then
node_type=replicator node_type=replicator
@ -314,8 +319,7 @@ elif [[ $node_type = bootstrap_leader ]]; then
: "${identity_keypair_path:=$SOLANA_CONFIG_DIR/bootstrap-leader-keypair.json}" : "${identity_keypair_path:=$SOLANA_CONFIG_DIR/bootstrap-leader-keypair.json}"
vote_keypair_path="$SOLANA_CONFIG_DIR"/bootstrap-leader-vote-keypair.json vote_keypair_path="$SOLANA_CONFIG_DIR"/bootstrap-leader-vote-keypair.json
ledger_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-ledger ledger_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-ledger
accounts_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-accounts state_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-state
snapshot_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-snapshots
storage_keypair_path=$SOLANA_CONFIG_DIR/bootstrap-leader-storage-keypair.json storage_keypair_path=$SOLANA_CONFIG_DIR/bootstrap-leader-storage-keypair.json
configured_flag=$SOLANA_CONFIG_DIR/bootstrap-leader.configured configured_flag=$SOLANA_CONFIG_DIR/bootstrap-leader.configured
@ -335,11 +339,10 @@ elif [[ $node_type = validator ]]; then
: "${identity_keypair_path:=$SOLANA_CONFIG_DIR/validator-keypair$label.json}" : "${identity_keypair_path:=$SOLANA_CONFIG_DIR/validator-keypair$label.json}"
vote_keypair_path=$SOLANA_CONFIG_DIR/validator-vote-keypair$label.json vote_keypair_path=$SOLANA_CONFIG_DIR/validator-vote-keypair$label.json
stake_keypair_path=$SOLANA_CONFIG_DIR/validator-stake-keypair$label.json
storage_keypair_path=$SOLANA_CONFIG_DIR/validator-storage-keypair$label.json
ledger_config_dir=$SOLANA_CONFIG_DIR/validator-ledger$label ledger_config_dir=$SOLANA_CONFIG_DIR/validator-ledger$label
accounts_config_dir=$SOLANA_CONFIG_DIR/validator-accounts$label state_dir="$SOLANA_CONFIG_DIR"/validator-state$label
snapshot_config_dir="$SOLANA_CONFIG_DIR"/validator-snapshots$label storage_keypair_path=$SOLANA_CONFIG_DIR/validator-storage-keypair$label.json
stake_keypair_path=$SOLANA_CONFIG_DIR/validator-stake-keypair$label.json
configured_flag=$SOLANA_CONFIG_DIR/validator$label.configured configured_flag=$SOLANA_CONFIG_DIR/validator$label.configured
mkdir -p "$SOLANA_CONFIG_DIR" mkdir -p "$SOLANA_CONFIG_DIR"
@ -361,6 +364,9 @@ fi
if [[ $node_type != replicator ]]; then if [[ $node_type != replicator ]]; then
accounts_config_dir="$state_dir"/accounts
snapshot_config_dir="$state_dir"/snapshots
identity_pubkey=$($solana_keygen pubkey "$identity_keypair_path") identity_pubkey=$($solana_keygen pubkey "$identity_keypair_path")
vote_pubkey=$($solana_keygen pubkey "$vote_keypair_path") vote_pubkey=$($solana_keygen pubkey "$vote_keypair_path")
storage_pubkey=$($solana_keygen pubkey "$storage_keypair_path") storage_pubkey=$($solana_keygen pubkey "$storage_keypair_path")
@ -402,48 +408,47 @@ new_gensis_block() {
set -e set -e
PS4="$(basename "$0"): " PS4="$(basename "$0"): "
pid=
trap '[[ -n $pid ]] && kill "$pid" >/dev/null 2>&1 && wait "$pid"' INT TERM ERR
while true; do while true; do
if [[ ! -d "$SOLANA_RSYNC_CONFIG_DIR"/ledger ]]; then
if [[ $node_type = bootstrap_leader ]]; then
ledger_not_setup "$SOLANA_RSYNC_CONFIG_DIR/ledger does not exist"
fi
(
set -x
$rsync -qvPr "${rsync_entrypoint_url:?}"/config/ledger "$SOLANA_RSYNC_CONFIG_DIR"
$rsync -qvPr "${rsync_entrypoint_url:?}"/config/snapshot_dir "$SOLANA_RSYNC_CONFIG_DIR"
current_snapshot_dir=$(cat "$SOLANA_RSYNC_CONFIG_DIR"/snapshot_dir)
$rsync -vqPr "${rsync_entrypoint_url:?}"/config/"$current_snapshot_dir"/snapshots "$SOLANA_RSYNC_CONFIG_DIR"
$rsync -vqPr "${rsync_entrypoint_url:?}"/config/"$current_snapshot_dir"/accounts "$SOLANA_RSYNC_CONFIG_DIR"
) || true
fi
if new_gensis_block; then if new_gensis_block; then
# If the genesis block has changed remove the now stale ledger and vote # If the genesis block has changed remove the now stale ledger and vote
# keypair for the node and start all over again # keypair for the node and start all over again
( (
set -x set -x
rm -rf "$ledger_config_dir" "$accounts_config_dir" "$snapshot_config_dir" "$configured_flag" rm -rf "$ledger_config_dir" "$state_dir" "$configured_flag"
) )
fi fi
if [[ ! -d "$SOLANA_RSYNC_CONFIG_DIR"/ledger ]]; then
if [[ $node_type = bootstrap_leader ]]; then
ledger_not_setup "$SOLANA_RSYNC_CONFIG_DIR/ledger does not exist"
elif [[ $node_type = validator ]]; then
(
SECONDS=0
set -x
cd "$SOLANA_RSYNC_CONFIG_DIR"
$rsync -qPr "${rsync_entrypoint_url:?}"/config/{ledger,state.tgz} .
echo "Fetched snapshot in $SECONDS seconds"
) || true
fi
fi
( (
set -x set -x
if [[ $node_type = validator ]]; then if [[ $node_type = validator ]]; then
rm -rf "$ledger_config_dir" if [[ -f "$SOLANA_RSYNC_CONFIG_DIR"/state.tgz ]]; then
if [[ -d "$SOLANA_RSYNC_CONFIG_DIR"/snapshots ]]; then mkdir -p "$state_dir"
rm -rf "$snapshot_config_dir" "$accounts_config_dir" SECONDS=
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/snapshots/ "$snapshot_config_dir" tar -C "$state_dir" -zxf "$SOLANA_RSYNC_CONFIG_DIR"/state.tgz
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/accounts/ "$accounts_config_dir" echo "Extracted snapshot in $SECONDS seconds"
fi fi
fi fi
if [[ ! -d "$ledger_config_dir" ]]; then if [[ ! -d "$ledger_config_dir" ]]; then
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/ledger/ "$ledger_config_dir" cp -a "$SOLANA_RSYNC_CONFIG_DIR"/ledger/ "$ledger_config_dir"
$solana_ledger_tool --ledger "$ledger_config_dir" verify
fi fi
) )
trap '[[ -n $pid ]] && kill "$pid" >/dev/null 2>&1 && wait "$pid"' INT TERM ERR
if ((stake_lamports)); then if ((stake_lamports)); then
if [[ $node_type = validator ]]; then if [[ $node_type = validator ]]; then
setup_validator_accounts "${entrypoint_address%:*}" \ setup_validator_accounts "${entrypoint_address%:*}" \
@ -471,59 +476,58 @@ while true; do
exit $? exit $?
fi fi
if [[ $node_type = bootstrap_leader ]]; then secs_to_next_genesis_poll=5
snapshot_dir=0 secs_to_next_snapshot=30
secs_to_next_sync_poll=30 while true; do
while true; do if ! kill -0 "$pid"; then
if ! kill -0 "$pid"; then wait "$pid" || true
wait "$pid" echo "############## $node_type exited, restarting ##############"
exit 0 break
fi fi
sleep 1 sleep 1
((secs_to_next_sync_poll--)) && continue if ((generate_snapshots && --secs_to_next_snapshot == 0)); then
( (
if [[ -d $snapshot_config_dir ]]; then SECONDS=
current_config_dir="$SOLANA_RSYNC_CONFIG_DIR"/$snapshot_dir new_state_dir="$SOLANA_RSYNC_CONFIG_DIR"/new_state
mkdir -p "$current_config_dir" new_state_archive="$SOLANA_RSYNC_CONFIG_DIR"/new_state.tgz
cp -a "$snapshot_config_dir"/ "$current_config_dir"/snapshots (
cp -a "$accounts_config_dir"/ "$current_config_dir"/accounts rm -rf "$new_state_dir" "$new_state_archive"
echo $snapshot_dir > "$SOLANA_RSYNC_CONFIG_DIR"/snapshot_dir cp -a "$state_dir" "$new_state_dir"
fi cd "$new_state_dir"
) || true tar zcf "$new_state_archive" ./*
secs_to_next_sync_poll=60 )
snapshot_dir=$((snapshot_dir+1)) ln -f "$new_state_archive" "$SOLANA_RSYNC_CONFIG_DIR"/state.tgz
done rm -rf "$new_state_dir" "$new_state_archive"
else ls -hl "$SOLANA_RSYNC_CONFIG_DIR"/state.tgz
secs_to_next_genesis_poll=1 echo "Snapshot generated in $SECONDS seconds"
while true; do ) || (
if ! kill -0 "$pid"; then echo "Error: failed to generate snapshot"
wait "$pid" || true )
echo "############## $node_type exited, restarting ##############" secs_to_next_snapshot=60
break fi
fi
sleep 1 if ((poll_for_new_genesis_block && --secs_to_next_genesis_poll == 0)); then
echo "Polling for new genesis block..."
((poll_for_new_genesis_block)) || continue
((secs_to_next_genesis_poll--)) && continue
( (
set -x set -x
$rsync -r "${rsync_entrypoint_url:?}"/config/ledger "$SOLANA_RSYNC_CONFIG_DIR" $rsync -r "${rsync_entrypoint_url:?}"/config/ledger "$SOLANA_RSYNC_CONFIG_DIR"
) || true ) || (
echo "Error: failed to rsync ledger"
)
new_gensis_block && break new_gensis_block && break
secs_to_next_genesis_poll=60 secs_to_next_genesis_poll=60
done fi
echo "############## New genesis detected, restarting $node_type ##############" done
kill "$pid" || true
wait "$pid" || true echo "############## New genesis detected, restarting $node_type ##############"
# give the cluster time to come back up kill "$pid" || true
( wait "$pid" || true
set -x # give the cluster time to come back up
sleep 60 (
) set -x
fi sleep 60
)
done done

View File

@ -194,6 +194,7 @@ local|tar)
--blockstream /tmp/solana-blockstream.sock --blockstream /tmp/solana-blockstream.sock
--no-voting --no-voting
--stake 0 --stake 0
--generate-snapshots
) )
else else
args+=(--stake "$stake") args+=(--stake "$stake")