Improve error monitoring
This commit is contained in:
14
net/remote/remote-client.sh
Normal file → Executable file
14
net/remote/remote-client.sh
Normal file → Executable file
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/bash -ex
|
||||
|
||||
cd "$(dirname "$0")"/../..
|
||||
|
||||
@@ -22,12 +22,13 @@ scripts/install-earlyoom.sh
|
||||
|
||||
case $deployMethod in
|
||||
snap)
|
||||
rsync -vPr "$leaderIp:~/solana/solana.snap" .
|
||||
rsync -vPrc "$leaderIp:~/solana/solana.snap" .
|
||||
sudo snap install solana.snap --devmode --dangerous
|
||||
rm solana.snap
|
||||
|
||||
nodeConfig="\
|
||||
leader-ip=$leaderIp \
|
||||
default-metrics-rate=1 \
|
||||
metrics-config=$SOLANA_METRICS_CONFIG \
|
||||
rust-log=$RUST_LOG \
|
||||
"
|
||||
@@ -39,9 +40,10 @@ snap)
|
||||
local)
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
export USE_INSTALL=1
|
||||
export SOLANA_DEFAULT_METRICS_RATE=1
|
||||
export RUST_LOG
|
||||
|
||||
rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
||||
rsync -vPrc "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
||||
solana_bench_tps="multinode-demo/client.sh $leaderIp:~/solana"
|
||||
;;
|
||||
*)
|
||||
@@ -49,14 +51,16 @@ local)
|
||||
exit 1
|
||||
esac
|
||||
|
||||
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
|
||||
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
|
||||
|
||||
while true; do
|
||||
echo "=== Client start: $(date)" >> client.log
|
||||
clientCommand="$solana_bench_tps --num-nodes $numNodes --loop -s 600 --sustained -t threadCount"
|
||||
clientCommand="$solana_bench_tps --num-nodes $numNodes --seconds 600 --sustained --threads $threadCount"
|
||||
echo "$ $clientCommand" >> client.log
|
||||
|
||||
set +e
|
||||
$clientCommand >> client.log 2>&1
|
||||
set -e
|
||||
|
||||
$metricsWriteDatapoint "testnet-deploy,name=$netBasename clientexit=1"
|
||||
echo Error: bench-tps should never exit | tee -a client.log
|
||||
|
@@ -28,11 +28,12 @@ scripts/install-earlyoom.sh
|
||||
case $deployMethod in
|
||||
snap)
|
||||
SECONDS=0
|
||||
rsync -vPr "$leaderIp:~/solana/solana.snap" .
|
||||
rsync -vPrc "$leaderIp:~/solana/solana.snap" .
|
||||
sudo snap install solana.snap --devmode --dangerous
|
||||
|
||||
commonNodeConfig="\
|
||||
leader-ip=$leaderIp \
|
||||
default-metrics-rate=1 \
|
||||
metrics-config=$SOLANA_METRICS_CONFIG \
|
||||
rust-log=$RUST_LOG \
|
||||
setup-args=$setupArgs \
|
||||
@@ -65,6 +66,7 @@ local)
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
export USE_INSTALL=1
|
||||
export RUST_LOG
|
||||
export SOLANA_DEFAULT_METRICS_RATE=1
|
||||
if [[ -e /dev/nvidia0 ]]; then
|
||||
export SOLANA_CUDA=1
|
||||
fi
|
||||
@@ -80,7 +82,7 @@ local)
|
||||
./multinode-demo/leader.sh > leader.log 2>&1 &
|
||||
;;
|
||||
validator)
|
||||
rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
||||
rsync -vPrc "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
||||
|
||||
# shellcheck disable=SC2086 # Don't want to double quote "$setupArgs"
|
||||
./multinode-demo/setup.sh -t validator -p $setupArgs
|
||||
|
@@ -100,10 +100,13 @@ fi
|
||||
echo "--- $leaderIp: validator sanity"
|
||||
if $validatorSanity; then
|
||||
(
|
||||
set -ex -o pipefail
|
||||
./multinode-demo/setup.sh -t validator
|
||||
set -e pipefail
|
||||
timeout 10s ./multinode-demo/validator.sh "$leaderIp" 2>&1 | tee validator.log
|
||||
)
|
||||
timeout 10s ./multinode-demo/validator.sh "$leaderIp" "$leaderIp:8001" 2>&1 | tee validator.log
|
||||
) || {
|
||||
exitcode=$?
|
||||
[[ $exitcode -eq 124 ]] || exit $exitcode
|
||||
}
|
||||
wc -l validator.log
|
||||
if grep -C100 panic validator.log; then
|
||||
echo "^^^ +++"
|
||||
|
Reference in New Issue
Block a user