diff --git a/core/src/ancestor_hashes_service.rs b/core/src/ancestor_hashes_service.rs index f431241887..cf35c2ae27 100644 --- a/core/src/ancestor_hashes_service.rs +++ b/core/src/ancestor_hashes_service.rs @@ -1367,7 +1367,7 @@ mod test { // Simulate Replay dumping this slot let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); - duplicate_slots_to_repair.insert((dead_slot, Hash::new_unique())); + duplicate_slots_to_repair.insert(dead_slot, Hash::new_unique()); ReplayStage::dump_then_repair_correct_slots( &mut duplicate_slots_to_repair, &mut bank_forks.read().unwrap().ancestors(), diff --git a/core/src/cluster_slot_state_verifier.rs b/core/src/cluster_slot_state_verifier.rs index 422a37b48b..b29a9608ab 100644 --- a/core/src/cluster_slot_state_verifier.rs +++ b/core/src/cluster_slot_state_verifier.rs @@ -5,12 +5,21 @@ use crate::{ }; use solana_ledger::blockstore::Blockstore; use solana_sdk::{clock::Slot, hash::Hash}; -use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; pub(crate) type DuplicateSlotsTracker = BTreeSet; -pub(crate) type DuplicateSlotsToRepair = HashSet<(Slot, Hash)>; +pub(crate) type DuplicateSlotsToRepair = HashMap; +pub(crate) type EpochSlotsFrozenSlots = BTreeMap; pub(crate) type GossipDuplicateConfirmedSlots = BTreeMap; +#[derive(PartialEq, Clone, Debug)] +pub enum ClusterConfirmedHash { + // Ordered from strongest confirmation to weakest. Stronger + // confirmations take precedence over weaker ones. + DuplicateConfirmed(Hash), + EpochSlotsFrozen(Hash), +} + #[derive(PartialEq, Clone, Debug)] pub enum BankStatus { Frozen(Hash), @@ -64,7 +73,7 @@ impl BankStatus { pub struct DeadState { // Keep fields private, forces construction // via constructor - duplicate_confirmed_hash: Option, + cluster_confirmed_hash: Option, is_slot_duplicate: bool, } @@ -74,20 +83,22 @@ impl DeadState { duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, fork_choice: &mut HeaviestSubtreeForkChoice, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, ) -> Self { - let duplicate_confirmed_hash = get_duplicate_confirmed_hash_from_state( + let cluster_confirmed_hash = get_cluster_confirmed_hash_from_state( slot, gossip_duplicate_confirmed_slots, + epoch_slots_frozen_slots, fork_choice, Some(Hash::default()), ); let is_slot_duplicate = duplicate_slots_tracker.contains(&slot); - Self::new(duplicate_confirmed_hash, is_slot_duplicate) + Self::new(cluster_confirmed_hash, is_slot_duplicate) } - fn new(duplicate_confirmed_hash: Option, is_slot_duplicate: bool) -> Self { + fn new(cluster_confirmed_hash: Option, is_slot_duplicate: bool) -> Self { Self { - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, } } @@ -98,7 +109,7 @@ pub struct BankFrozenState { // Keep fields private, forces construction // via constructor frozen_hash: Hash, - duplicate_confirmed_hash: Option, + cluster_confirmed_hash: Option, is_slot_duplicate: bool, } @@ -109,26 +120,28 @@ impl BankFrozenState { duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, fork_choice: &mut HeaviestSubtreeForkChoice, + epoch_slots_frozen_slots: &EpochSlotsFrozenSlots, ) -> Self { - let duplicate_confirmed_hash = get_duplicate_confirmed_hash_from_state( + let cluster_confirmed_hash = get_cluster_confirmed_hash_from_state( slot, gossip_duplicate_confirmed_slots, + epoch_slots_frozen_slots, fork_choice, Some(frozen_hash), ); let is_slot_duplicate = duplicate_slots_tracker.contains(&slot); - Self::new(frozen_hash, duplicate_confirmed_hash, is_slot_duplicate) + Self::new(frozen_hash, cluster_confirmed_hash, is_slot_duplicate) } fn new( frozen_hash: Hash, - duplicate_confirmed_hash: Option, + cluster_confirmed_hash: Option, is_slot_duplicate: bool, ) -> Self { assert!(frozen_hash != Hash::default()); Self { frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, } } @@ -175,6 +188,10 @@ impl DuplicateState { get_hash: impl Fn() -> Option, ) -> Self { let bank_status = BankStatus::new(is_dead, get_hash); + + // We can only skip marking duplicate if this slot has already been + // duplicate confirmed, any weaker confirmation levels are not sufficient + // to skip marking the slot as duplicate. let duplicate_confirmed_hash = get_duplicate_confirmed_hash_from_state( slot, gossip_duplicate_confirmed_slots, @@ -192,12 +209,57 @@ impl DuplicateState { } } +#[derive(PartialEq, Debug)] +pub struct EpochSlotsFrozenState { + // Keep fields private, forces construction + // via constructor + epoch_slots_frozen_hash: Hash, + duplicate_confirmed_hash: Option, + bank_status: BankStatus, +} +impl EpochSlotsFrozenState { + pub fn new_from_state( + slot: Slot, + epoch_slots_frozen_hash: Hash, + gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + fork_choice: &mut HeaviestSubtreeForkChoice, + is_dead: impl Fn() -> bool, + get_hash: impl Fn() -> Option, + ) -> Self { + let bank_status = BankStatus::new(is_dead, get_hash); + let duplicate_confirmed_hash = get_duplicate_confirmed_hash_from_state( + slot, + gossip_duplicate_confirmed_slots, + fork_choice, + bank_status.bank_hash(), + ); + Self::new( + epoch_slots_frozen_hash, + duplicate_confirmed_hash, + bank_status, + ) + } + + fn new( + epoch_slots_frozen_hash: Hash, + duplicate_confirmed_hash: Option, + bank_status: BankStatus, + ) -> Self { + Self { + epoch_slots_frozen_hash, + duplicate_confirmed_hash, + bank_status, + } + } +} + #[derive(PartialEq, Debug)] pub enum SlotStateUpdate { BankFrozen(BankFrozenState), DuplicateConfirmed(DuplicateConfirmedState), Dead(DeadState), Duplicate(DuplicateState), + EpochSlotsFrozen(EpochSlotsFrozenState), } impl SlotStateUpdate { @@ -209,6 +271,9 @@ impl SlotStateUpdate { } SlotStateUpdate::Dead(_) => Some(Hash::default()), SlotStateUpdate::Duplicate(duplicate_state) => duplicate_state.bank_status.bank_hash(), + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state) => { + epoch_slots_frozen_state.bank_status.bank_hash() + } } } } @@ -219,8 +284,10 @@ pub enum ResultingStateChange { BankFrozen(Hash), // Hash of our current frozen version of the slot MarkSlotDuplicate(Hash), - // Hash of the cluster confirmed slot that is not equivalent - // to our frozen version of the slot + // Hash of the either: + // 1) Cluster duplicate confirmed slot + // 2) Epoch Slots frozen sampled slot + // that is not equivalent to our frozen version of the slot RepairDuplicateConfirmedVersion(Hash), // Hash of our current frozen version of the slot DuplicateConfirmedSlotMatchesCluster(Hash), @@ -245,6 +312,9 @@ impl SlotStateUpdate { on_duplicate_confirmed(slot, duplicate_confirmed_state) } SlotStateUpdate::Duplicate(duplicate_state) => on_duplicate(duplicate_state), + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state) => { + on_epoch_slots_frozen(slot, epoch_slots_frozen_state) + } } } } @@ -261,7 +331,7 @@ fn check_duplicate_confirmed_hash_against_frozen_hash( // If the cluster duplicate confirmed some version of this slot, then // there's another version of our dead slot warn!( - "Cluster duplicate_confirmed slot {} with hash {}, but we marked slot dead", + "Cluster duplicate confirmed slot {} with hash {}, but we marked slot dead", slot, duplicate_confirmed_hash ); } else { @@ -269,7 +339,7 @@ fn check_duplicate_confirmed_hash_against_frozen_hash( // Modify fork choice rule to exclude our version from being voted // on and also repair the correct version warn!( - "Cluster duplicate_confirmed slot {} with hash {}, but we froze slot with hash {}", + "Cluster duplicate confirmed slot {} with hash {}, but our version has hash {}", slot, duplicate_confirmed_hash, bank_frozen_hash ); } @@ -287,28 +357,77 @@ fn check_duplicate_confirmed_hash_against_frozen_hash( } } +fn check_epoch_slots_hash_against_frozen_hash( + state_changes: &mut Vec, + slot: Slot, + epoch_slots_frozen_hash: Hash, + bank_frozen_hash: Hash, + is_dead: bool, +) { + if epoch_slots_frozen_hash != bank_frozen_hash { + if is_dead { + // If the cluster duplicate confirmed some version of this slot, then + // there's another version of our dead slot + warn!( + "EpochSlots sample returned slot {} with hash {}, but we marked slot dead", + slot, epoch_slots_frozen_hash + ); + } else { + // The duplicate confirmed slot hash does not match our frozen hash. + // Modify fork choice rule to exclude our version from being voted + // on and also repair the correct version + warn!( + "EpochSlots sample returned slot {} with hash {}, but our version + has hash {}", + slot, epoch_slots_frozen_hash, bank_frozen_hash + ); + } + state_changes.push(ResultingStateChange::MarkSlotDuplicate(bank_frozen_hash)); + state_changes.push(ResultingStateChange::RepairDuplicateConfirmedVersion( + epoch_slots_frozen_hash, + )); + } +} + fn on_dead_slot(slot: Slot, dead_state: DeadState) -> Vec { let DeadState { - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, } = dead_state; let mut state_changes = vec![]; - if let Some(duplicate_confirmed_hash) = duplicate_confirmed_hash { - // If the cluster duplicate_confirmed some version of this slot, then - // check if our version agrees with the cluster, - let bank_hash = Hash::default(); - let is_dead = true; - state_changes.push(ResultingStateChange::SendAncestorHashesReplayUpdate( - AncestorHashesReplayUpdate::DeadDuplicateConfirmed(slot), - )); - check_duplicate_confirmed_hash_against_frozen_hash( - &mut state_changes, - slot, - duplicate_confirmed_hash, - bank_hash, - is_dead, - ); + if let Some(cluster_confirmed_hash) = cluster_confirmed_hash { + match cluster_confirmed_hash { + ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash) => { + // If the cluster duplicate_confirmed some version of this slot, then + // check if our version agrees with the cluster, + let bank_hash = Hash::default(); + let is_dead = true; + state_changes.push(ResultingStateChange::SendAncestorHashesReplayUpdate( + AncestorHashesReplayUpdate::DeadDuplicateConfirmed(slot), + )); + check_duplicate_confirmed_hash_against_frozen_hash( + &mut state_changes, + slot, + duplicate_confirmed_hash, + bank_hash, + is_dead, + ); + } + ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash) => { + // Lower priority than having seen an actual duplicate confirmed hash in the + // match arm above. + let bank_hash = Hash::default(); + let is_dead = true; + check_epoch_slots_hash_against_frozen_hash( + &mut state_changes, + slot, + epoch_slots_frozen_hash, + bank_hash, + is_dead, + ); + } + } } else { state_changes.push(ResultingStateChange::SendAncestorHashesReplayUpdate( AncestorHashesReplayUpdate::Dead(slot), @@ -324,22 +443,41 @@ fn on_dead_slot(slot: Slot, dead_state: DeadState) -> Vec fn on_frozen_slot(slot: Slot, bank_frozen_state: BankFrozenState) -> Vec { let BankFrozenState { frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, } = bank_frozen_state; let mut state_changes = vec![ResultingStateChange::BankFrozen(frozen_hash)]; - if let Some(duplicate_confirmed_hash) = duplicate_confirmed_hash { - // If the cluster duplicate_confirmed some version of this slot, then - // check if our version agrees with the cluster, - let is_dead = false; - check_duplicate_confirmed_hash_against_frozen_hash( - &mut state_changes, - slot, - duplicate_confirmed_hash, - frozen_hash, - is_dead, - ); - } else if is_slot_duplicate { + if let Some(cluster_confirmed_hash) = cluster_confirmed_hash { + match cluster_confirmed_hash { + ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash) => { + // If the cluster duplicate_confirmed some version of this slot, then + // check if our version agrees with the cluster, + let is_dead = false; + check_duplicate_confirmed_hash_against_frozen_hash( + &mut state_changes, + slot, + duplicate_confirmed_hash, + frozen_hash, + is_dead, + ); + } + ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash) => { + // Lower priority than having seen an actual duplicate confirmed hash in the + // match arm above. + let is_dead = false; + check_epoch_slots_hash_against_frozen_hash( + &mut state_changes, + slot, + epoch_slots_frozen_hash, + frozen_hash, + is_dead, + ); + } + } + } + // If `cluster_confirmed_hash` is Some above we should have already pushed a + // `MarkSlotDuplicate` state change + else if is_slot_duplicate { state_changes.push(ResultingStateChange::MarkSlotDuplicate(frozen_hash)); } @@ -412,6 +550,81 @@ fn on_duplicate(duplicate_state: DuplicateState) -> Vec { vec![] } +fn on_epoch_slots_frozen( + slot: Slot, + epoch_slots_frozen_state: EpochSlotsFrozenState, +) -> Vec { + let EpochSlotsFrozenState { + bank_status, + epoch_slots_frozen_hash, + duplicate_confirmed_hash, + } = epoch_slots_frozen_state; + + if let Some(duplicate_confirmed_hash) = duplicate_confirmed_hash { + if epoch_slots_frozen_hash != duplicate_confirmed_hash { + warn!( + "EpochSlots sample returned slot {} with hash {}, but we already saw + duplicate confirmation on hash: {:?}", + slot, epoch_slots_frozen_hash, duplicate_confirmed_hash + ); + } + return vec![]; + } + + match bank_status { + BankStatus::Dead | BankStatus::Frozen(_) => (), + // No action to be taken yet + BankStatus::Unprocessed => { + return vec![]; + } + } + + let frozen_hash = bank_status.bank_hash().expect("bank hash must exist"); + let is_dead = bank_status.is_dead(); + let mut state_changes = vec![]; + check_epoch_slots_hash_against_frozen_hash( + &mut state_changes, + slot, + epoch_slots_frozen_hash, + frozen_hash, + is_dead, + ); + + state_changes +} + +fn get_cluster_confirmed_hash_from_state( + slot: Slot, + gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &EpochSlotsFrozenSlots, + fork_choice: &mut HeaviestSubtreeForkChoice, + bank_frozen_hash: Option, +) -> Option { + let gossip_duplicate_confirmed_hash = gossip_duplicate_confirmed_slots.get(&slot).cloned(); + // If the bank hasn't been frozen yet, then we haven't duplicate confirmed a local version + // this slot through replay yet. + let is_local_replay_duplicate_confirmed = if let Some(bank_frozen_hash) = bank_frozen_hash { + fork_choice + .is_duplicate_confirmed(&(slot, bank_frozen_hash)) + .unwrap_or(false) + } else { + false + }; + + get_duplicate_confirmed_hash( + slot, + gossip_duplicate_confirmed_hash, + bank_frozen_hash, + is_local_replay_duplicate_confirmed, + ) + .map(ClusterConfirmedHash::DuplicateConfirmed) + .or_else(|| { + epoch_slots_frozen_slots + .get(&slot) + .map(|hash| ClusterConfirmedHash::EpochSlotsFrozen(*hash)) + }) +} + fn get_duplicate_confirmed_hash_from_state( slot: Slot, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, @@ -498,7 +711,7 @@ fn apply_state_changes( fork_choice.mark_fork_invalid_candidate(&(slot, bank_frozen_hash)); } ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash) => { - duplicate_slots_to_repair.insert((slot, duplicate_confirmed_hash)); + duplicate_slots_to_repair.insert(slot, duplicate_confirmed_hash); } ResultingStateChange::DuplicateConfirmedSlotMatchesCluster(bank_frozen_hash) => { not_duplicate_confirmed_frozen_hash = None; @@ -512,6 +725,7 @@ fn apply_state_changes( new_duplicate_confirmed_slot_hashes.into_iter(), ) .unwrap(); + duplicate_slots_to_repair.remove(&slot); } ResultingStateChange::SendAncestorHashesReplayUpdate(ancestor_hashes_replay_update) => { let _ = ancestor_hashes_replay_update_sender.send(ancestor_hashes_replay_update); @@ -530,8 +744,9 @@ pub(crate) fn check_slot_agrees_with_cluster( root: Slot, blockstore: &Blockstore, duplicate_slots_tracker: &mut DuplicateSlotsTracker, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, fork_choice: &mut HeaviestSubtreeForkChoice, - duplicate_slots_to_repair: &mut HashSet<(Slot, Hash)>, + duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, slot_state_update: SlotStateUpdate, ) { @@ -566,6 +781,17 @@ pub(crate) fn check_slot_agrees_with_cluster( } } + if let SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state) = &slot_state_update { + if let Some(old_epoch_slots_frozen_hash) = + epoch_slots_frozen_slots.insert(slot, epoch_slots_frozen_state.epoch_slots_frozen_hash) + { + if old_epoch_slots_frozen_hash == epoch_slots_frozen_state.epoch_slots_frozen_hash { + // If EpochSlots has already told us this same hash was frozen, return + return; + } + } + } + let state_changes = slot_state_update.into_state_changes(slot); apply_state_changes( slot, @@ -606,11 +832,11 @@ mod test { bank_frozen_state_update_0: { // frozen hash has to be non-default for frozen state transition let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = None; + let cluster_confirmed_hash = None; let is_slot_duplicate = false; let bank_frozen_state = BankFrozenState::new( frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -621,11 +847,11 @@ mod test { bank_frozen_state_update_1: { // frozen hash has to be non-default for frozen state transition let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = None; + let cluster_confirmed_hash = None; let is_slot_duplicate = true; let bank_frozen_state = BankFrozenState::new( frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -636,11 +862,11 @@ mod test { bank_frozen_state_update_2: { // frozen hash has to be non-default for frozen state transition let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = Some(frozen_hash); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(frozen_hash)); let is_slot_duplicate = false; let bank_frozen_state = BankFrozenState::new( frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -652,11 +878,26 @@ mod test { bank_frozen_state_update_3: { // frozen hash has to be non-default for frozen state transition let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = Some(frozen_hash); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(frozen_hash)); + let is_slot_duplicate = false; + let bank_frozen_state = BankFrozenState::new( + frozen_hash, + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::BankFrozen(bank_frozen_state), + vec![ResultingStateChange::BankFrozen(frozen_hash)] + ) + }, + bank_frozen_state_update_4: { + // frozen hash has to be non-default for frozen state transition + let frozen_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(frozen_hash)); let is_slot_duplicate = true; let bank_frozen_state = BankFrozenState::new( frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -665,38 +906,91 @@ mod test { ResultingStateChange::DuplicateConfirmedSlotMatchesCluster(frozen_hash)] ) }, - bank_frozen_state_update_4: { - // frozen hash has to be non-default for frozen state transition - let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = Some(Hash::new_unique()); - let is_slot_duplicate = false; - let bank_frozen_state = BankFrozenState::new( - frozen_hash, - duplicate_confirmed_hash, - is_slot_duplicate, - ); - ( - SlotStateUpdate::BankFrozen(bank_frozen_state), - vec![ResultingStateChange::BankFrozen(frozen_hash), - ResultingStateChange::MarkSlotDuplicate(frozen_hash), - ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash.unwrap())], - ) - }, bank_frozen_state_update_5: { // frozen hash has to be non-default for frozen state transition let frozen_hash = Hash::new_unique(); - let duplicate_confirmed_hash = Some(Hash::new_unique()); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(frozen_hash)); let is_slot_duplicate = true; let bank_frozen_state = BankFrozenState::new( frozen_hash, - duplicate_confirmed_hash, + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::BankFrozen(bank_frozen_state), + vec![ResultingStateChange::BankFrozen(frozen_hash)], + ) + }, + bank_frozen_state_update_6: { + // frozen hash has to be non-default for frozen state transition + let frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash)); + let is_slot_duplicate = false; + let bank_frozen_state = BankFrozenState::new( + frozen_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( SlotStateUpdate::BankFrozen(bank_frozen_state), vec![ResultingStateChange::BankFrozen(frozen_hash), ResultingStateChange::MarkSlotDuplicate(frozen_hash), - ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash.unwrap())], + ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash)], + ) + }, + bank_frozen_state_update_7: { + // frozen hash has to be non-default for frozen state transition + let frozen_hash = Hash::new_unique(); + let epoch_slots_frozen_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash)); + let is_slot_duplicate = false; + let bank_frozen_state = BankFrozenState::new( + frozen_hash, + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::BankFrozen(bank_frozen_state), + vec![ResultingStateChange::BankFrozen(frozen_hash), + ResultingStateChange::MarkSlotDuplicate(frozen_hash), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], + ) + }, + bank_frozen_state_update_8: { + // frozen hash has to be non-default for frozen state transition + let frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash)); + let is_slot_duplicate = true; + let bank_frozen_state = BankFrozenState::new( + frozen_hash, + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::BankFrozen(bank_frozen_state), + vec![ResultingStateChange::BankFrozen(frozen_hash), + ResultingStateChange::MarkSlotDuplicate(frozen_hash), + ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash)], + ) + }, + bank_frozen_state_update_9: { + // frozen hash has to be non-default for frozen state transition + let frozen_hash = Hash::new_unique(); + let epoch_slots_frozen_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash)); + let is_slot_duplicate = true; + let bank_frozen_state = BankFrozenState::new( + frozen_hash, + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::BankFrozen(bank_frozen_state), + vec![ResultingStateChange::BankFrozen(frozen_hash), + ResultingStateChange::MarkSlotDuplicate(frozen_hash), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], ) }, duplicate_confirmed_state_update_0: { @@ -755,10 +1049,10 @@ mod test { ) }, dead_state_update_0: { - let duplicate_confirmed_hash = None; + let cluster_confirmed_hash = None; let is_slot_duplicate = false; let dead_state = DeadState::new( - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -769,10 +1063,10 @@ mod test { ) }, dead_state_update_1: { - let duplicate_confirmed_hash = None; + let cluster_confirmed_hash = None; let is_slot_duplicate = true; let dead_state = DeadState::new( - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -782,10 +1076,11 @@ mod test { ) }, dead_state_update_2: { - let duplicate_confirmed_hash = Some(Hash::new_unique()); + let duplicate_confirmed_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash)); let is_slot_duplicate = false; let dead_state = DeadState::new( - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( @@ -793,22 +1088,54 @@ mod test { vec![ ResultingStateChange::SendAncestorHashesReplayUpdate(AncestorHashesReplayUpdate::DeadDuplicateConfirmed(10)), ResultingStateChange::MarkSlotDuplicate(Hash::default()), - ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash.unwrap())], + ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash)], ) }, dead_state_update_3: { - let duplicate_confirmed_hash = Some(Hash::new_unique()); - let is_slot_duplicate = true; + let epoch_slots_frozen_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash)); + let is_slot_duplicate = false; let dead_state = DeadState::new( - duplicate_confirmed_hash, + cluster_confirmed_hash, is_slot_duplicate, ); ( SlotStateUpdate::Dead(dead_state), vec![ + ResultingStateChange::MarkSlotDuplicate(Hash::default()), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], + ) + }, + dead_state_update_4: { + let duplicate_confirmed_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::DuplicateConfirmed(duplicate_confirmed_hash)); + let is_slot_duplicate = true; + let dead_state = DeadState::new( + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::Dead(dead_state), + vec![ + ResultingStateChange::SendAncestorHashesReplayUpdate(AncestorHashesReplayUpdate::DeadDuplicateConfirmed(10)), ResultingStateChange::MarkSlotDuplicate(Hash::default()), - ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash.unwrap())], + ResultingStateChange::RepairDuplicateConfirmedVersion(duplicate_confirmed_hash)], + ) + }, + dead_state_update_5: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let cluster_confirmed_hash = Some(ClusterConfirmedHash::EpochSlotsFrozen(epoch_slots_frozen_hash)); + let is_slot_duplicate = true; + let dead_state = DeadState::new( + cluster_confirmed_hash, + is_slot_duplicate, + ); + ( + SlotStateUpdate::Dead(dead_state), + vec![ + ResultingStateChange::MarkSlotDuplicate(Hash::default()), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], ) }, duplicate_state_update_0: { @@ -876,6 +1203,121 @@ mod test { Vec::::new(), ) }, + epoch_slots_frozen_state_update_0: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = None; + let bank_status = BankStatus::Unprocessed; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_1: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(Hash::new_unique()); + let bank_status = BankStatus::Unprocessed; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_2: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(epoch_slots_frozen_hash); + let bank_status = BankStatus::Unprocessed; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_3: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = None; + let bank_status = BankStatus::Dead; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + vec![ + ResultingStateChange::MarkSlotDuplicate(Hash::default()), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], + ) + }, + epoch_slots_frozen_state_update_4: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(Hash::new_unique()); + let bank_status = BankStatus::Dead; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_5: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(epoch_slots_frozen_hash); + let bank_status = BankStatus::Dead; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_6: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = None; + let frozen_hash = Hash::new_unique(); + let bank_status = BankStatus::Frozen(frozen_hash); + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + vec![ + ResultingStateChange::MarkSlotDuplicate(frozen_hash), + ResultingStateChange::RepairDuplicateConfirmedVersion(epoch_slots_frozen_hash)], + ) + }, + epoch_slots_frozen_state_update_7: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = None; + let bank_status = BankStatus::Frozen(epoch_slots_frozen_hash); + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_8: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(Hash::new_unique()); + let bank_status = BankStatus::Frozen(Hash::new_unique()); + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_9: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(Hash::new_unique()); + let bank_status = BankStatus::Frozen(epoch_slots_frozen_hash); + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, + epoch_slots_frozen_state_update_10: { + let epoch_slots_frozen_hash = Hash::new_unique(); + let duplicate_confirmed_hash = Some(Hash::new_unique()); + let bank_status = BankStatus::Frozen(duplicate_confirmed_hash.unwrap()); + let epoch_slots_frozen_state = EpochSlotsFrozenState::new(epoch_slots_frozen_hash, duplicate_confirmed_hash, bank_status); + ( + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + Vec::::new() + ) + }, } struct InitialState { @@ -976,7 +1418,10 @@ mod test { )], ); assert_eq!(duplicate_slots_to_repair.len(), 1); - assert!(duplicate_slots_to_repair.contains(&(duplicate_slot, correct_hash))); + assert_eq!( + *duplicate_slots_to_repair.get(&duplicate_slot).unwrap(), + correct_hash + ); } #[test] @@ -989,6 +1434,8 @@ mod test { .. } = setup(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let duplicate_slot = bank_forks.read().unwrap().root() + 1; let duplicate_slot_hash = bank_forks .read() @@ -996,7 +1443,6 @@ mod test { .get(duplicate_slot) .unwrap() .hash(); - let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); // Simulate ReplayStage freezing a Bank with the given hash. // BankFrozen should mark it down in Blockstore. @@ -1053,6 +1499,8 @@ mod test { .. } = setup(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let duplicate_slot = bank_forks.read().unwrap().root() + 1; let our_duplicate_slot_hash = bank_forks .read() @@ -1060,15 +1508,17 @@ mod test { .get(duplicate_slot) .unwrap() .hash(); - let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); // Setup and check the state that is about to change. + duplicate_slots_to_repair.insert(duplicate_slot, Hash::new_unique()); assert!(blockstore.get_bank_hash(duplicate_slot).is_none()); assert!(!blockstore.is_duplicate_confirmed(duplicate_slot)); // DuplicateConfirmedSlotMatchesCluster should: // 1) Re-enable fork choice - // 2) Set the status to duplicate confirmed in Blockstore + // 2) Clear any pending repairs from `duplicate_slots_to_repair` since we have the + // right version now + // 3) Set the status to duplicate confirmed in Blockstore let mut state_changes = vec![ResultingStateChange::DuplicateConfirmedSlotMatchesCluster( our_duplicate_slot_hash, )]; @@ -1099,6 +1549,7 @@ mod test { assert!(heaviest_subtree_fork_choice .is_candidate(&(duplicate_slot, our_duplicate_slot_hash)) .unwrap()); + assert!(duplicate_slots_to_repair.is_empty()); assert_eq!( blockstore.get_bank_hash(duplicate_slot).unwrap(), our_duplicate_slot_hash @@ -1139,6 +1590,7 @@ mod test { let root = 0; let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); let duplicate_slot = 2; let duplicate_state = DuplicateState::new_from_state( @@ -1155,6 +1607,7 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, @@ -1182,12 +1635,14 @@ mod test { &mut duplicate_slots_tracker, &gossip_duplicate_confirmed_slots, &mut heaviest_subtree_fork_choice, + &epoch_slots_frozen_slots, ); check_slot_agrees_with_cluster( duplicate_slot, root, &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, @@ -1257,6 +1712,7 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut EpochSlotsFrozenSlots::default(), &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -1293,6 +1749,7 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut EpochSlotsFrozenSlots::default(), &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -1362,6 +1819,7 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut EpochSlotsFrozenSlots::default(), &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -1396,6 +1854,7 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut EpochSlotsFrozenSlots::default(), &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -1416,6 +1875,29 @@ mod test { ); } + fn verify_all_slots_duplicate_confirmed( + bank_forks: &RwLock, + heaviest_subtree_fork_choice: &HeaviestSubtreeForkChoice, + upper_bound: Slot, + expected_is_duplicate_confirmed: bool, + ) { + for slot in 0..upper_bound { + let slot_hash = bank_forks.read().unwrap().get(slot).unwrap().hash(); + let expected_is_duplicate_confirmed = expected_is_duplicate_confirmed || + // root is always duplicate confirmed + slot == 0; + assert_eq!( + heaviest_subtree_fork_choice + .is_duplicate_confirmed(&(slot, slot_hash)) + .unwrap(), + expected_is_duplicate_confirmed + ); + assert!(heaviest_subtree_fork_choice + .latest_invalid_ancestor(&(slot, slot_hash)) + .is_none()); + } + } + #[test] fn test_state_descendant_confirmed_ancestor_duplicate() { // Common state @@ -1435,6 +1917,7 @@ mod test { let root = 0; let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); // Mark 3 as duplicate confirmed @@ -1451,25 +1934,13 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); - let verify_all_slots_duplicate_confirmed = - |bank_forks: &RwLock, - heaviest_subtree_fork_choice: &HeaviestSubtreeForkChoice| { - for slot in 0..=3 { - let slot_hash = bank_forks.read().unwrap().get(slot).unwrap().hash(); - assert!(heaviest_subtree_fork_choice - .is_duplicate_confirmed(&(slot, slot_hash)) - .unwrap()); - assert!(heaviest_subtree_fork_choice - .latest_invalid_ancestor(&(slot, slot_hash)) - .is_none()); - } - }; - verify_all_slots_duplicate_confirmed(&bank_forks, &heaviest_subtree_fork_choice); + verify_all_slots_duplicate_confirmed(&bank_forks, &heaviest_subtree_fork_choice, 3, true); assert_eq!( heaviest_subtree_fork_choice.best_overall_slot(), (3, slot3_hash) @@ -1491,13 +1962,194 @@ mod test { root, &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, SlotStateUpdate::Duplicate(duplicate_state), ); assert!(duplicate_slots_tracker.contains(&1)); - verify_all_slots_duplicate_confirmed(&bank_forks, &heaviest_subtree_fork_choice); + verify_all_slots_duplicate_confirmed(&bank_forks, &heaviest_subtree_fork_choice, 3, true); + assert_eq!( + heaviest_subtree_fork_choice.best_overall_slot(), + (3, slot3_hash) + ); + } + + #[test] + fn test_duplicate_confirmed_and_epoch_slots_frozen() { + // Common state + let InitialState { + mut heaviest_subtree_fork_choice, + progress, + bank_forks, + blockstore, + .. + } = setup(); + + let slot3_hash = bank_forks.read().unwrap().get(3).unwrap().hash(); + assert_eq!( + heaviest_subtree_fork_choice.best_overall_slot(), + (3, slot3_hash) + ); + let root = 0; + let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); + let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + + // Mark 3 as only epoch slots frozen, matching our `slot3_hash`, should not duplicate + // confirm the slot + let mut expected_is_duplicate_confirmed = false; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new_from_state( + 3, + slot3_hash, + &gossip_duplicate_confirmed_slots, + &mut heaviest_subtree_fork_choice, + || progress.is_dead(3).unwrap_or(false), + || Some(slot3_hash), + ); + let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) = + unbounded(); + check_slot_agrees_with_cluster( + 3, + root, + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut heaviest_subtree_fork_choice, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + ); + verify_all_slots_duplicate_confirmed( + &bank_forks, + &heaviest_subtree_fork_choice, + 3, + expected_is_duplicate_confirmed, + ); + + // Mark 3 as duplicate confirmed and epoch slots frozen with the same hash. Should + // duplicate confirm all descendants of 3 + gossip_duplicate_confirmed_slots.insert(3, slot3_hash); + expected_is_duplicate_confirmed = true; + let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state( + slot3_hash, + || progress.is_dead(2).unwrap_or(false), + || Some(slot3_hash), + ); + let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) = + unbounded(); + check_slot_agrees_with_cluster( + 3, + root, + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut heaviest_subtree_fork_choice, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), + ); + assert_eq!(*epoch_slots_frozen_slots.get(&3).unwrap(), slot3_hash); + verify_all_slots_duplicate_confirmed( + &bank_forks, + &heaviest_subtree_fork_choice, + 3, + expected_is_duplicate_confirmed, + ); + assert_eq!( + heaviest_subtree_fork_choice.best_overall_slot(), + (3, slot3_hash) + ); + } + + #[test] + fn test_duplicate_confirmed_and_epoch_slots_frozen_mismatched() { + // Common state + let InitialState { + mut heaviest_subtree_fork_choice, + progress, + bank_forks, + blockstore, + .. + } = setup(); + + let slot3_hash = bank_forks.read().unwrap().get(3).unwrap().hash(); + assert_eq!( + heaviest_subtree_fork_choice.best_overall_slot(), + (3, slot3_hash) + ); + let root = 0; + let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); + let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + + // Mark 3 as only epoch slots frozen with different hash than the our + // locally replayed `slot3_hash`. This should not duplicate confirm the slot, + // but should add the epoch slots frozen hash to the repair set + let mismatched_hash = Hash::new_unique(); + let mut expected_is_duplicate_confirmed = false; + let epoch_slots_frozen_state = EpochSlotsFrozenState::new_from_state( + 3, + mismatched_hash, + &gossip_duplicate_confirmed_slots, + &mut heaviest_subtree_fork_choice, + || progress.is_dead(3).unwrap_or(false), + || Some(slot3_hash), + ); + let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) = + unbounded(); + check_slot_agrees_with_cluster( + 3, + root, + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut heaviest_subtree_fork_choice, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + ); + assert_eq!(*duplicate_slots_to_repair.get(&3).unwrap(), mismatched_hash); + verify_all_slots_duplicate_confirmed( + &bank_forks, + &heaviest_subtree_fork_choice, + 3, + expected_is_duplicate_confirmed, + ); + + // Mark our version of slot 3 as duplicate confirmed with a hash different than + // the epoch slots frozen hash above. Should duplicate confirm all descendants of + // 3 and remove the mismatched hash from `duplicate_slots_to_repair`, since we + // have the right version now, no need to repair + gossip_duplicate_confirmed_slots.insert(3, slot3_hash); + expected_is_duplicate_confirmed = true; + let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state( + slot3_hash, + || progress.is_dead(3).unwrap_or(false), + || Some(slot3_hash), + ); + check_slot_agrees_with_cluster( + 3, + root, + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut heaviest_subtree_fork_choice, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), + ); + assert!(duplicate_slots_to_repair.is_empty()); + assert_eq!(*epoch_slots_frozen_slots.get(&3).unwrap(), mismatched_hash); + verify_all_slots_duplicate_confirmed( + &bank_forks, + &heaviest_subtree_fork_choice, + 3, + expected_is_duplicate_confirmed, + ); assert_eq!( heaviest_subtree_fork_choice.best_overall_slot(), (3, slot3_hash) diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index a280912acc..d90352b5ae 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -317,7 +317,7 @@ impl ReplayStage { vote_tracker: Arc, cluster_slots: Arc, retransmit_slots_sender: RetransmitSlotsSender, - _duplicate_slots_reset_receiver: DuplicateSlotsResetReceiver, + epoch_slots_frozen_receiver: DuplicateSlotsResetReceiver, replay_vote_sender: ReplayVoteSender, gossip_duplicate_confirmed_slots_receiver: GossipDuplicateConfirmedSlotsReceiver, gossip_verified_vote_hash_receiver: GossipVerifiedVoteHashReceiver, @@ -374,7 +374,8 @@ impl ReplayStage { let mut replay_timing = ReplayTiming::default(); let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let mut gossip_duplicate_confirmed_slots: GossipDuplicateConfirmedSlots = GossipDuplicateConfirmedSlots::default(); - let mut duplicate_slots_to_repair = HashSet::new(); + let mut epoch_slots_frozen_slots: EpochSlotsFrozenSlots = EpochSlotsFrozenSlots::default(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); let mut unfrozen_gossip_verified_vote_hashes: UnfrozenGossipVerifiedVoteHashes = UnfrozenGossipVerifiedVoteHashes::default(); let mut latest_validator_votes_for_frozen_banks: LatestValidatorVotesForFrozenBanks = LatestValidatorVotesForFrozenBanks::default(); let mut voted_signatures = Vec::new(); @@ -421,6 +422,7 @@ impl ReplayStage { &rpc_subscriptions, &mut duplicate_slots_tracker, &gossip_duplicate_confirmed_slots, + &mut epoch_slots_frozen_slots, &mut unfrozen_gossip_verified_vote_hashes, &mut latest_validator_votes_for_frozen_banks, &cluster_slots_update_sender, @@ -432,6 +434,24 @@ impl ReplayStage { let forks_root = bank_forks.read().unwrap().root(); + // Reset any dead slots that have been frozen by a sufficient portion of + // the network. Signalled by repair_service. + let mut purge_dead_slots_time = Measure::start("purge_dead_slots"); + Self::process_epoch_slots_frozen_dead_slots( + &my_pubkey, + &blockstore, + &epoch_slots_frozen_receiver, + &mut duplicate_slots_tracker, + &gossip_duplicate_confirmed_slots, + &mut epoch_slots_frozen_slots, + &mut progress, + &mut heaviest_subtree_fork_choice, + &bank_forks, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender + ); + purge_dead_slots_time.stop(); + // Check for any newly confirmed slots detected from gossip. let mut process_gossip_duplicate_confirmed_slots_time = Measure::start("process_gossip_duplicate_confirmed_slots"); Self::process_gossip_duplicate_confirmed_slots( @@ -439,6 +459,7 @@ impl ReplayStage { &blockstore, &mut duplicate_slots_tracker, &mut gossip_duplicate_confirmed_slots, + &mut epoch_slots_frozen_slots, &bank_forks, &mut progress, &mut heaviest_subtree_fork_choice, @@ -470,6 +491,7 @@ impl ReplayStage { &duplicate_slots_receiver, &mut duplicate_slots_tracker, &gossip_duplicate_confirmed_slots, + &mut epoch_slots_frozen_slots, &bank_forks, &mut progress, &mut heaviest_subtree_fork_choice, @@ -516,7 +538,7 @@ impl ReplayStage { &bank_forks, ); - Self::mark_slots_confirmed(&confirmed_forks, &blockstore, &bank_forks, &mut progress, &mut duplicate_slots_tracker, &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender); + Self::mark_slots_confirmed(&confirmed_forks, &blockstore, &bank_forks, &mut progress, &mut duplicate_slots_tracker, &mut heaviest_subtree_fork_choice, &mut epoch_slots_frozen_slots, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender); } compute_slot_stats_time.stop(); @@ -618,6 +640,7 @@ impl ReplayStage { &mut has_new_vote_been_rooted, &mut replay_timing, &voting_sender, + &mut epoch_slots_frozen_slots, ); }; voting_time.stop(); @@ -875,7 +898,7 @@ impl ReplayStage { // TODO: handle if alternate version of descendant also got confirmed after ancestor was // confirmed, what happens then? Should probably keep track of purged list and skip things // in `duplicate_slots_to_repair` that have already been purged. Add test. - duplicate_slots_to_repair.retain(|(duplicate_slot, correct_hash)| { + duplicate_slots_to_repair.retain(|duplicate_slot, correct_hash| { // Should not purge duplicate slots if there is currently a poh bank building // on top of that slot, as BankingStage might still be referencing/touching that state // concurrently. @@ -934,6 +957,10 @@ impl ReplayStage { bank_forks, blockstore, ); + warn!( + "Notifying repair service to repair duplicate slot: {}", + *duplicate_slot, + ); true // TODO: Send signal to repair to repair the correct version of // `duplicate_slot` with hash == `correct_hash` @@ -952,6 +979,58 @@ impl ReplayStage { }); } + #[allow(clippy::too_many_arguments)] + fn process_epoch_slots_frozen_dead_slots( + pubkey: &Pubkey, + blockstore: &Blockstore, + epoch_slots_frozen_receiver: &DuplicateSlotsResetReceiver, + duplicate_slots_tracker: &mut DuplicateSlotsTracker, + gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, + progress: &mut ProgressMap, + fork_choice: &mut HeaviestSubtreeForkChoice, + bank_forks: &RwLock, + duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, + ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + ) { + let root = bank_forks.read().unwrap().root(); + for maybe_purgeable_duplicate_slots in epoch_slots_frozen_receiver.try_iter() { + warn!( + "{} ReplayStage notified of epoch slots duplicate frozen dead slots: {:?}", + pubkey, maybe_purgeable_duplicate_slots + ); + for (epoch_slots_frozen_slot, epoch_slots_frozen_hash) in + maybe_purgeable_duplicate_slots.into_iter() + { + let epoch_slots_frozen_state = EpochSlotsFrozenState::new_from_state( + epoch_slots_frozen_slot, + epoch_slots_frozen_hash, + gossip_duplicate_confirmed_slots, + fork_choice, + || progress.is_dead(epoch_slots_frozen_slot).unwrap_or(false), + || { + bank_forks + .read() + .unwrap() + .get(epoch_slots_frozen_slot) + .map(|b| b.hash()) + }, + ); + check_slot_agrees_with_cluster( + epoch_slots_frozen_slot, + root, + blockstore, + duplicate_slots_tracker, + epoch_slots_frozen_slots, + fork_choice, + duplicate_slots_to_repair, + ancestor_hashes_replay_update_sender, + SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), + ); + } + } + } + fn purge_unconfirmed_duplicate_slot( duplicate_slot: Slot, ancestors: &mut HashMap>, @@ -1077,11 +1156,13 @@ impl ReplayStage { // optimistic and in the future, duplicate slot confirmations on the exact // single slots and does not account for votes on their descendants. Used solely // for duplicate slot recovery. + #[allow(clippy::too_many_arguments)] fn process_gossip_duplicate_confirmed_slots( gossip_duplicate_confirmed_slots_receiver: &GossipDuplicateConfirmedSlotsReceiver, blockstore: &Blockstore, duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &mut GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, bank_forks: &RwLock, progress: &mut ProgressMap, fork_choice: &mut HeaviestSubtreeForkChoice, @@ -1111,6 +1192,7 @@ impl ReplayStage { root, blockstore, duplicate_slots_tracker, + epoch_slots_frozen_slots, fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, @@ -1140,11 +1222,13 @@ impl ReplayStage { } // Checks for and handle forks with duplicate slots. + #[allow(clippy::too_many_arguments)] fn process_duplicate_slots( blockstore: &Blockstore, duplicate_slots_receiver: &DuplicateSlotReceiver, duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, bank_forks: &RwLock, progress: &mut ProgressMap, fork_choice: &mut HeaviestSubtreeForkChoice, @@ -1177,6 +1261,7 @@ impl ReplayStage { root_slot, blockstore, duplicate_slots_tracker, + epoch_slots_frozen_slots, fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, @@ -1425,6 +1510,7 @@ impl ReplayStage { rpc_subscriptions: &Arc, duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, progress: &mut ProgressMap, heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, @@ -1468,12 +1554,14 @@ impl ReplayStage { duplicate_slots_tracker, gossip_duplicate_confirmed_slots, heaviest_subtree_fork_choice, + epoch_slots_frozen_slots, ); check_slot_agrees_with_cluster( slot, root, blockstore, duplicate_slots_tracker, + epoch_slots_frozen_slots, heaviest_subtree_fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, @@ -1507,6 +1595,7 @@ impl ReplayStage { has_new_vote_been_rooted: &mut bool, replay_timing: &mut ReplayTiming, voting_sender: &Sender, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, ) { if bank.is_empty() { inc_new_counter_info!("replay_stage-voted_empty_bank", 1); @@ -1556,6 +1645,7 @@ impl ReplayStage { unfrozen_gossip_verified_vote_hashes, has_new_vote_been_rooted, vote_signatures, + epoch_slots_frozen_slots, ); rpc_subscriptions.notify_roots(rooted_slots); if let Some(sender) = bank_notification_sender { @@ -1874,6 +1964,7 @@ impl ReplayStage { rpc_subscriptions: &Arc, duplicate_slots_tracker: &mut DuplicateSlotsTracker, gossip_duplicate_confirmed_slots: &GossipDuplicateConfirmedSlots, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, unfrozen_gossip_verified_vote_hashes: &mut UnfrozenGossipVerifiedVoteHashes, latest_validator_votes_for_frozen_banks: &mut LatestValidatorVotesForFrozenBanks, cluster_slots_update_sender: &ClusterSlotsUpdateSender, @@ -1943,6 +2034,7 @@ impl ReplayStage { rpc_subscriptions, duplicate_slots_tracker, gossip_duplicate_confirmed_slots, + epoch_slots_frozen_slots, progress, heaviest_subtree_fork_choice, duplicate_slots_to_repair, @@ -1993,12 +2085,14 @@ impl ReplayStage { duplicate_slots_tracker, gossip_duplicate_confirmed_slots, heaviest_subtree_fork_choice, + epoch_slots_frozen_slots, ); check_slot_agrees_with_cluster( bank.slot(), bank_forks.read().unwrap().root(), blockstore, duplicate_slots_tracker, + epoch_slots_frozen_slots, heaviest_subtree_fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, @@ -2519,6 +2613,7 @@ impl ReplayStage { progress: &mut ProgressMap, duplicate_slots_tracker: &mut DuplicateSlotsTracker, fork_choice: &mut HeaviestSubtreeForkChoice, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, ) { @@ -2544,6 +2639,7 @@ impl ReplayStage { root_slot, blockstore, duplicate_slots_tracker, + epoch_slots_frozen_slots, fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, @@ -2600,6 +2696,7 @@ impl ReplayStage { unfrozen_gossip_verified_vote_hashes: &mut UnfrozenGossipVerifiedVoteHashes, has_new_vote_been_rooted: &mut bool, voted_signatures: &mut Vec, + epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, ) { bank_forks.write().unwrap().set_root( new_root, @@ -2630,6 +2727,9 @@ impl ReplayStage { std::mem::swap(gossip_duplicate_confirmed_slots, &mut slots_ge_root); unfrozen_gossip_verified_vote_hashes.set_root(new_root); + let mut slots_ge_root = epoch_slots_frozen_slots.split_off(&new_root); + // epoch_slots_frozen_slots now only contains entries >= `new_root` + std::mem::swap(epoch_slots_frozen_slots, &mut slots_ge_root); } fn generate_new_bank_forks( @@ -3066,6 +3166,10 @@ pub mod tests { .map(|s| (s, HashMap::new())) .collect(), }; + let mut epoch_slots_frozen_slots: EpochSlotsFrozenSlots = vec![root - 1, root, root + 1] + .into_iter() + .map(|slot| (slot, Hash::default())) + .collect(); ReplayStage::handle_new_root( root, &bank_forks, @@ -3078,6 +3182,7 @@ pub mod tests { &mut unfrozen_gossip_verified_vote_hashes, &mut true, &mut Vec::new(), + &mut epoch_slots_frozen_slots, ); assert_eq!(bank_forks.read().unwrap().root(), root); assert_eq!(progress.len(), 1); @@ -3102,6 +3207,13 @@ pub mod tests { .collect::>(), vec![root, root + 1] ); + assert_eq!( + epoch_slots_frozen_slots + .into_iter() + .map(|(slot, _hash)| slot) + .collect::>(), + vec![root, root + 1] + ); } #[test] @@ -3149,6 +3261,7 @@ pub mod tests { &mut UnfrozenGossipVerifiedVoteHashes::default(), &mut true, &mut Vec::new(), + &mut EpochSlotsFrozenSlots::default(), ); assert_eq!(bank_forks.read().unwrap().root(), root); assert!(bank_forks.read().unwrap().get(confirmed_root).is_some()); @@ -3434,7 +3547,8 @@ pub mod tests { err, &rpc_subscriptions, &mut DuplicateSlotsTracker::default(), - &GossipDuplicateConfirmedSlots::default(), + &GossipDuplicateConfirmedSlots::new(), + &mut EpochSlotsFrozenSlots::default(), &mut progress, &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), @@ -4867,6 +4981,7 @@ pub mod tests { blockstore.store_duplicate_slot(4, vec![], vec![]).unwrap(); let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let bank4_hash = bank_forks.read().unwrap().bank_hash(4).unwrap(); assert_ne!(bank4_hash, Hash::default()); let duplicate_state = DuplicateState::new_from_state( @@ -4883,6 +4998,7 @@ pub mod tests { bank_forks.read().unwrap().root(), &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut vote_simulator.heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -4915,6 +5031,7 @@ pub mod tests { bank_forks.read().unwrap().root(), &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut vote_simulator.heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, @@ -4936,7 +5053,7 @@ pub mod tests { // If slot 4 is marked as confirmed, then this confirms slot 2 and 4, and // then slot 4 is now the heaviest bank again - let mut duplicate_slots_to_repair = HashSet::new(); + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); gossip_duplicate_confirmed_slots.insert(4, bank4_hash); let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state( bank4_hash, @@ -4948,6 +5065,7 @@ pub mod tests { bank_forks.read().unwrap().root(), &blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, &mut vote_simulator.heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, @@ -4996,8 +5114,9 @@ pub mod tests { // Insert different versions of both 1 and 2. Both slots 1 and 2 should // then be purged let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); - duplicate_slots_to_repair.insert((1, Hash::new_unique())); - duplicate_slots_to_repair.insert((2, Hash::new_unique())); + duplicate_slots_to_repair.insert(1, Hash::new_unique()); + duplicate_slots_to_repair.insert(2, Hash::new_unique()); + ReplayStage::dump_then_repair_correct_slots( &mut duplicate_slots_to_repair, &mut ancestors, @@ -5079,6 +5198,7 @@ pub mod tests { gossip_duplicate_confirmed_slots.insert(2, duplicate_confirmed_bank2_hash); let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); // Mark fork choice branch as invalid so select forks below doesn't panic // on a nonexistent `heaviest_bank_on_same_fork` after we dump the duplciate fork. @@ -5094,12 +5214,16 @@ pub mod tests { bank_forks.read().unwrap().root(), blockstore, &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); - assert!(duplicate_slots_to_repair.contains(&(2, duplicate_confirmed_bank2_hash))); + assert_eq!( + *duplicate_slots_to_repair.get(&2).unwrap(), + duplicate_confirmed_bank2_hash + ); let mut ancestors = bank_forks.read().unwrap().ancestors(); let mut descendants = bank_forks.read().unwrap().descendants().clone(); let old_descendants_of_2 = descendants.get(&2).unwrap().clone(); diff --git a/core/src/vote_simulator.rs b/core/src/vote_simulator.rs index 2b1f46b96d..b03fd5ec78 100644 --- a/core/src/vote_simulator.rs +++ b/core/src/vote_simulator.rs @@ -1,6 +1,8 @@ use crate::{ cluster_info_vote_listener::VoteTracker, - cluster_slot_state_verifier::{DuplicateSlotsTracker, GossipDuplicateConfirmedSlots}, + cluster_slot_state_verifier::{ + DuplicateSlotsTracker, EpochSlotsFrozenSlots, GossipDuplicateConfirmedSlots, + }, cluster_slots::ClusterSlots, consensus::Tower, fork_choice::SelectVoteAndResetForkResult, @@ -212,6 +214,7 @@ impl VoteSimulator { &mut UnfrozenGossipVerifiedVoteHashes::default(), &mut true, &mut Vec::new(), + &mut EpochSlotsFrozenSlots::default(), ) } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 31a6133baf..4830da9459 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -2008,8 +2008,13 @@ fn test_snapshots_restart_validity() { #[allow(unused_attributes)] #[ignore] fn test_fail_entry_verification_leader() { - let (cluster, _) = - test_faulty_node(BroadcastStageType::FailEntryVerification, vec![60, 50, 60]); + let leader_stake = (DUPLICATE_THRESHOLD * 100.0) as u64 + 1; + let validator_stake1 = (100 - leader_stake) / 2; + let validator_stake2 = 100 - leader_stake - validator_stake1; + let (cluster, _) = test_faulty_node( + BroadcastStageType::FailEntryVerification, + vec![leader_stake, validator_stake1, validator_stake2], + ); cluster.check_for_new_roots( 16, "test_fail_entry_verification_leader",