Fix the flaky test test_restart_tower_rollback (backport #23129) (#23155)

* Fix the flaky test test_restart_tower_rollback (#23129)

* Add flag to disable voting until a slot to avoid duplicate voting

* Fix the tower rollback test and remove it from flaky.

(cherry picked from commit ab92578b02)

* Resolve conflicts

Co-authored-by: Ashwin Sekar <ashwin@solana.com>
This commit is contained in:
mergify[bot]
2022-02-17 20:31:27 +00:00
committed by GitHub
parent 0fdbec9735
commit 02f8651a9c
5 changed files with 61 additions and 19 deletions

View File

@ -138,6 +138,9 @@ pub struct ReplayStageConfig {
pub wait_for_vote_to_start_leader: bool,
pub ancestor_hashes_replay_update_sender: AncestorHashesReplayUpdateSender,
pub tower_storage: Arc<dyn TowerStorage>,
// Stops voting until this slot has been reached. Should be used to avoid
// duplicate voting which can lead to slashing.
pub wait_to_vote_slot: Option<Slot>,
}
#[derive(Default)]
@ -374,6 +377,7 @@ impl ReplayStage {
wait_for_vote_to_start_leader,
ancestor_hashes_replay_update_sender,
tower_storage,
wait_to_vote_slot,
} = config;
trace!("replay stage");
@ -595,6 +599,7 @@ impl ReplayStage {
has_new_vote_been_rooted, &mut
last_vote_refresh_time,
&voting_sender,
wait_to_vote_slot,
);
}
}
@ -678,6 +683,7 @@ impl ReplayStage {
&voting_sender,
&mut epoch_slots_frozen_slots,
&drop_bank_sender,
wait_to_vote_slot,
);
};
voting_time.stop();
@ -1654,6 +1660,7 @@ impl ReplayStage {
voting_sender: &Sender<VoteOp>,
epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots,
bank_drop_sender: &Sender<Vec<Arc<Bank>>>,
wait_to_vote_slot: Option<Slot>,
) {
if bank.is_empty() {
inc_new_counter_info!("replay_stage-voted_empty_bank", 1);
@ -1742,6 +1749,7 @@ impl ReplayStage {
*has_new_vote_been_rooted,
replay_timing,
voting_sender,
wait_to_vote_slot,
);
}
@ -1754,10 +1762,16 @@ impl ReplayStage {
switch_fork_decision: &SwitchForkDecision,
vote_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: bool,
wait_to_vote_slot: Option<Slot>,
) -> Option<Transaction> {
if authorized_voter_keypairs.is_empty() {
return None;
}
if let Some(slot) = wait_to_vote_slot {
if bank.slot() < slot {
return None;
}
}
let vote_account = match bank.get_vote_account(vote_account_pubkey) {
None => {
warn!(
@ -1852,6 +1866,7 @@ impl ReplayStage {
has_new_vote_been_rooted: bool,
last_vote_refresh_time: &mut LastVoteRefreshTime,
voting_sender: &Sender<VoteOp>,
wait_to_vote_slot: Option<Slot>,
) {
let last_voted_slot = tower.last_voted_slot();
if last_voted_slot.is_none() {
@ -1894,6 +1909,7 @@ impl ReplayStage {
&SwitchForkDecision::SameFork,
vote_signatures,
has_new_vote_been_rooted,
wait_to_vote_slot,
);
if let Some(vote_tx) = vote_tx {
@ -1931,6 +1947,7 @@ impl ReplayStage {
has_new_vote_been_rooted: bool,
replay_timing: &mut ReplayTiming,
voting_sender: &Sender<VoteOp>,
wait_to_vote_slot: Option<Slot>,
) {
let mut generate_time = Measure::start("generate_vote");
let vote_tx = Self::generate_vote_tx(
@ -1942,6 +1959,7 @@ impl ReplayStage {
switch_fork_decision,
vote_signatures,
has_new_vote_been_rooted,
wait_to_vote_slot,
);
generate_time.stop();
replay_timing.generate_vote_us += generate_time.as_us();
@ -5730,6 +5748,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
&voting_sender,
None,
);
let vote_info = voting_receiver
.recv_timeout(Duration::from_secs(1))
@ -5769,6 +5788,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
None,
);
// No new votes have been submitted to gossip
@ -5794,6 +5814,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
&voting_sender,
None,
);
let vote_info = voting_receiver
.recv_timeout(Duration::from_secs(1))
@ -5825,6 +5846,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
None,
);
// No new votes have been submitted to gossip
@ -5862,6 +5884,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
None,
);
let vote_info = voting_receiver
.recv_timeout(Duration::from_secs(1))
@ -5929,6 +5952,7 @@ pub mod tests {
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
None,
);
let votes = cluster_info.get_votes(&mut cursor);

View File

@ -149,6 +149,7 @@ impl Tvu {
accounts_package_channel: (AccountsPackageSender, AccountsPackageReceiver),
last_full_snapshot_slot: Option<Slot>,
block_metadata_notifier: Option<BlockMetadataNotifierLock>,
wait_to_vote_slot: Option<Slot>,
) -> Self {
let Sockets {
repair: repair_socket,
@ -297,6 +298,7 @@ impl Tvu {
wait_for_vote_to_start_leader: tvu_config.wait_for_vote_to_start_leader,
ancestor_hashes_replay_update_sender,
tower_storage: tower_storage.clone(),
wait_to_vote_slot,
};
let (voting_sender, voting_receiver) = channel();
@ -517,6 +519,7 @@ pub mod tests {
accounts_package_channel,
None,
None,
None,
);
exit.store(true, Ordering::Relaxed);
tvu.join().unwrap();

View File

@ -164,6 +164,7 @@ pub struct ValidatorConfig {
pub validator_exit: Arc<RwLock<Exit>>,
pub no_wait_for_vote_to_start_leader: bool,
pub accounts_shrink_ratio: AccountShrinkThreshold,
pub wait_to_vote_slot: Option<Slot>,
}
impl Default for ValidatorConfig {
@ -223,6 +224,7 @@ impl Default for ValidatorConfig {
no_wait_for_vote_to_start_leader: true,
accounts_shrink_ratio: AccountShrinkThreshold::default(),
accounts_db_config: None,
wait_to_vote_slot: None,
}
}
}
@ -893,6 +895,7 @@ impl Validator {
accounts_package_channel,
last_full_snapshot_slot,
block_metadata_notifier,
config.wait_to_vote_slot,
);
let tpu = Tpu::new(