Add partial network outage before test functionality (#17291)
* Add partial network outage before testing functionality Allow a percentage of the stake to go offline before an automation test starts * code review fixes
This commit is contained in:
@@ -142,8 +142,12 @@ function launch_testnet() {
|
||||
-c idle=$NUMBER_OF_CLIENT_NODES $maybeStartAllowBootFailures \
|
||||
--gpu-mode $startGpuMode $maybeWarpSlot $maybeAsyncNodeInit $maybeExtraPrimordialStakes
|
||||
|
||||
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
|
||||
wait_for_bootstrap_validator_stake_drop "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"
|
||||
if [[ -n "$WAIT_FOR_EQUAL_STAKE" ]]; then
|
||||
wait_for_equal_stake
|
||||
else
|
||||
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
|
||||
wait_for_max_stake "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"
|
||||
fi
|
||||
|
||||
if [[ $NUMBER_OF_CLIENT_NODES -gt 0 ]]; then
|
||||
execution_step "Starting ${NUMBER_OF_CLIENT_NODES} client nodes"
|
||||
@@ -153,6 +157,24 @@ function launch_testnet() {
|
||||
sleep 180
|
||||
fi
|
||||
|
||||
if [[ -n "$WARMUP_SLOTS_BEFORE_TEST" ]]; then
|
||||
# Allow the network to run for a bit before beginning the test
|
||||
while [[ "$WARMUP_SLOTS_BEFORE_TEST" -gt $(get_slot) ]]; do
|
||||
sleep 5
|
||||
done
|
||||
fi
|
||||
|
||||
# Stop the specified number of nodes
|
||||
num_online_nodes=$(( NUMBER_OF_VALIDATOR_NODES + 1 ))
|
||||
if [[ -n "$NUMBER_OF_OFFLINE_NODES" ]]; then
|
||||
execution_step "Stopping $NUMBER_OF_OFFLINE_NODES nodes"
|
||||
for (( i=NUMBER_OF_VALIDATOR_NODES; i>$(( NUMBER_OF_VALIDATOR_NODES - NUMBER_OF_OFFLINE_NODES )); i-- )); do
|
||||
# shellcheck disable=SC2154
|
||||
"${REPO_ROOT}"/net/net.sh stopnode -i "${validatorIpList[$i]}"
|
||||
done
|
||||
num_online_nodes=$(( num_online_nodes - NUMBER_OF_OFFLINE_NODES ))
|
||||
fi
|
||||
|
||||
SECONDS=0
|
||||
START_SLOT=$(get_slot)
|
||||
SLOT_COUNT_START_SECONDS=$SECONDS
|
||||
@@ -170,11 +192,11 @@ function launch_testnet() {
|
||||
for (( i=1; i<=PARTITION_ITERATION_COUNT; i++ )); do
|
||||
execution_step "Partition Iteration $i of $PARTITION_ITERATION_COUNT"
|
||||
execution_step "Applying netem config $NETEM_CONFIG_FILE for $PARTITION_ACTIVE_DURATION seconds"
|
||||
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE"
|
||||
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE" -n $num_online_nodes
|
||||
sleep "$PARTITION_ACTIVE_DURATION"
|
||||
|
||||
execution_step "Resolving partitions for $PARTITION_INACTIVE_DURATION seconds"
|
||||
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE" --netem-cmd cleanup
|
||||
"${REPO_ROOT}"/net/net.sh netem --config-file "$NETEM_CONFIG_FILE" --netem-cmd cleanup -n $num_online_nodes
|
||||
sleep "$PARTITION_INACTIVE_DURATION"
|
||||
done
|
||||
STATS_FINISH_SECONDS=$SECONDS
|
||||
@@ -325,6 +347,9 @@ TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \
|
||||
ADDITIONAL_FLAGS \
|
||||
APPLY_PARTITIONS \
|
||||
NETEM_CONFIG_FILE \
|
||||
WAIT_FOR_EQUAL_STAKE \
|
||||
WARMUP_SLOTS_BEFORE_TEST \
|
||||
NUMBER_OF_OFFLINE_NODES \
|
||||
PARTITION_ACTIVE_DURATION \
|
||||
PARTITION_INACTIVE_DURATION \
|
||||
PARTITION_ITERATION_COUNT \
|
||||
|
Reference in New Issue
Block a user