Introduce AncestorHashesService (#18812)
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
//! The `repair_service` module implements the tools necessary to generate a thread which
|
||||
//! regularly finds missing shreds in the ledger and sends repair requests for those shreds
|
||||
use crate::{
|
||||
ancestor_hashes_service::AncestorHashesService,
|
||||
cluster_info_vote_listener::VerifiedVoteReceiver,
|
||||
cluster_slots::ClusterSlots,
|
||||
duplicate_repair_status::DuplicateSlotRepairStatus,
|
||||
@ -33,8 +34,8 @@ use std::{
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
pub type DuplicateSlotsResetSender = CrossbeamSender<Slot>;
|
||||
pub type DuplicateSlotsResetReceiver = CrossbeamReceiver<Slot>;
|
||||
pub type DuplicateSlotsResetSender = CrossbeamSender<Vec<(Slot, Hash)>>;
|
||||
pub type DuplicateSlotsResetReceiver = CrossbeamReceiver<Vec<(Slot, Hash)>>;
|
||||
pub type ConfirmedSlotsSender = CrossbeamSender<Vec<Slot>>;
|
||||
pub type ConfirmedSlotsReceiver = CrossbeamReceiver<Vec<Slot>>;
|
||||
pub type OutstandingShredRepairs = OutstandingRequests<ShredRepairType>;
|
||||
@ -46,6 +47,12 @@ pub struct SlotRepairs {
|
||||
pubkey_repairs: HashMap<Pubkey, u64>,
|
||||
}
|
||||
|
||||
impl SlotRepairs {
|
||||
pub fn pubkey_repairs(&self) -> &HashMap<Pubkey, u64> {
|
||||
&self.pubkey_repairs
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct RepairStatsGroup {
|
||||
pub count: u64,
|
||||
@ -111,8 +118,11 @@ pub const MAX_DUPLICATE_WAIT_MS: usize = 10_000;
|
||||
pub const REPAIR_MS: u64 = 100;
|
||||
pub const MAX_ORPHANS: usize = 5;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RepairInfo {
|
||||
pub bank_forks: Arc<RwLock<BankForks>>,
|
||||
pub cluster_info: Arc<ClusterInfo>,
|
||||
pub cluster_slots: Arc<ClusterSlots>,
|
||||
pub epoch_schedule: EpochSchedule,
|
||||
pub duplicate_slots_reset_sender: DuplicateSlotsResetSender,
|
||||
pub repair_validators: Option<HashSet<Pubkey>>,
|
||||
@ -134,6 +144,7 @@ impl Default for RepairSlotRange {
|
||||
|
||||
pub struct RepairService {
|
||||
t_repair: JoinHandle<()>,
|
||||
ancestor_hashes_service: AncestorHashesService,
|
||||
}
|
||||
|
||||
impl RepairService {
|
||||
@ -141,44 +152,54 @@ impl RepairService {
|
||||
blockstore: Arc<Blockstore>,
|
||||
exit: Arc<AtomicBool>,
|
||||
repair_socket: Arc<UdpSocket>,
|
||||
cluster_info: Arc<ClusterInfo>,
|
||||
repair_info: RepairInfo,
|
||||
cluster_slots: Arc<ClusterSlots>,
|
||||
verified_vote_receiver: VerifiedVoteReceiver,
|
||||
outstanding_requests: Arc<RwLock<OutstandingShredRepairs>>,
|
||||
) -> Self {
|
||||
let t_repair = Builder::new()
|
||||
.name("solana-repair-service".to_string())
|
||||
.spawn(move || {
|
||||
Self::run(
|
||||
&blockstore,
|
||||
&exit,
|
||||
&repair_socket,
|
||||
cluster_info,
|
||||
repair_info,
|
||||
&cluster_slots,
|
||||
verified_vote_receiver,
|
||||
&outstanding_requests,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
let t_repair = {
|
||||
let blockstore = blockstore.clone();
|
||||
let exit = exit.clone();
|
||||
let repair_info = repair_info.clone();
|
||||
Builder::new()
|
||||
.name("solana-repair-service".to_string())
|
||||
.spawn(move || {
|
||||
Self::run(
|
||||
&blockstore,
|
||||
&exit,
|
||||
&repair_socket,
|
||||
repair_info,
|
||||
verified_vote_receiver,
|
||||
&outstanding_requests,
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
RepairService { t_repair }
|
||||
let ancestor_hashes_request_socket = Arc::new(UdpSocket::bind("0.0.0.0:0").unwrap());
|
||||
let ancestor_hashes_service = AncestorHashesService::new(
|
||||
exit,
|
||||
blockstore,
|
||||
ancestor_hashes_request_socket,
|
||||
repair_info,
|
||||
);
|
||||
|
||||
RepairService {
|
||||
t_repair,
|
||||
ancestor_hashes_service,
|
||||
}
|
||||
}
|
||||
|
||||
fn run(
|
||||
blockstore: &Blockstore,
|
||||
exit: &AtomicBool,
|
||||
repair_socket: &UdpSocket,
|
||||
cluster_info: Arc<ClusterInfo>,
|
||||
repair_info: RepairInfo,
|
||||
cluster_slots: &ClusterSlots,
|
||||
verified_vote_receiver: VerifiedVoteReceiver,
|
||||
outstanding_requests: &RwLock<OutstandingShredRepairs>,
|
||||
) {
|
||||
let mut repair_weight = RepairWeight::new(repair_info.bank_forks.read().unwrap().root());
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone());
|
||||
let id = cluster_info.id();
|
||||
let serve_repair = ServeRepair::new(repair_info.cluster_info.clone());
|
||||
let id = repair_info.cluster_info.id();
|
||||
let mut repair_stats = RepairStats::default();
|
||||
let mut repair_timing = RepairTiming::default();
|
||||
let mut last_stats = Instant::now();
|
||||
@ -243,7 +264,7 @@ impl RepairService {
|
||||
let mut outstanding_requests = outstanding_requests.write().unwrap();
|
||||
repairs.into_iter().for_each(|repair_request| {
|
||||
if let Ok((to, req)) = serve_repair.repair_request(
|
||||
cluster_slots,
|
||||
&repair_info.cluster_slots,
|
||||
repair_request,
|
||||
&mut peers_cache,
|
||||
&mut repair_stats,
|
||||
@ -389,12 +410,12 @@ impl RepairService {
|
||||
repairs: &mut Vec<ShredRepairType>,
|
||||
max_repairs: usize,
|
||||
slot: Slot,
|
||||
ancestor_hashes_request_statuses: &impl Contains<'a, Slot>,
|
||||
duplicate_slot_repair_statuses: &impl Contains<'a, Slot>,
|
||||
) {
|
||||
let mut pending_slots = vec![slot];
|
||||
while repairs.len() < max_repairs && !pending_slots.is_empty() {
|
||||
let slot = pending_slots.pop().unwrap();
|
||||
if ancestor_hashes_request_statuses.contains(&slot) {
|
||||
if duplicate_slot_repair_statuses.contains(&slot) {
|
||||
// These are repaired through a different path
|
||||
continue;
|
||||
}
|
||||
@ -554,7 +575,8 @@ impl RepairService {
|
||||
}
|
||||
|
||||
pub fn join(self) -> thread::Result<()> {
|
||||
self.t_repair.join()
|
||||
self.t_repair.join()?;
|
||||
self.ancestor_hashes_service.join()
|
||||
}
|
||||
}
|
||||
|
||||
@ -875,7 +897,7 @@ mod test {
|
||||
let cluster_slots = ClusterSlots::default();
|
||||
let serve_repair =
|
||||
ServeRepair::new(Arc::new(new_test_cluster_info(Node::new_localhost().info)));
|
||||
let mut ancestor_hashes_request_statuses = HashMap::new();
|
||||
let mut duplicate_slot_repair_statuses = HashMap::new();
|
||||
let dead_slot = 9;
|
||||
let receive_socket = &UdpSocket::bind("0.0.0.0:0").unwrap();
|
||||
let duplicate_status = DuplicateSlotRepairStatus {
|
||||
@ -891,12 +913,12 @@ mod test {
|
||||
.insert_shreds(shreds[..shreds.len() - 1].to_vec(), None, false)
|
||||
.unwrap();
|
||||
|
||||
ancestor_hashes_request_statuses.insert(dead_slot, duplicate_status);
|
||||
duplicate_slot_repair_statuses.insert(dead_slot, duplicate_status);
|
||||
|
||||
// There is no repair_addr, so should not get filtered because the timeout
|
||||
// `std::u64::MAX` has not expired
|
||||
RepairService::generate_and_send_duplicate_repairs(
|
||||
&mut ancestor_hashes_request_statuses,
|
||||
&mut duplicate_slot_repair_statuses,
|
||||
&cluster_slots,
|
||||
&blockstore,
|
||||
&serve_repair,
|
||||
@ -905,23 +927,23 @@ mod test {
|
||||
&None,
|
||||
&RwLock::new(OutstandingRequests::default()),
|
||||
);
|
||||
assert!(ancestor_hashes_request_statuses
|
||||
assert!(duplicate_slot_repair_statuses
|
||||
.get(&dead_slot)
|
||||
.unwrap()
|
||||
.repair_pubkey_and_addr
|
||||
.is_none());
|
||||
assert!(ancestor_hashes_request_statuses.get(&dead_slot).is_some());
|
||||
assert!(duplicate_slot_repair_statuses.get(&dead_slot).is_some());
|
||||
|
||||
// Give the slot a repair address
|
||||
ancestor_hashes_request_statuses
|
||||
duplicate_slot_repair_statuses
|
||||
.get_mut(&dead_slot)
|
||||
.unwrap()
|
||||
.repair_pubkey_and_addr =
|
||||
Some((Pubkey::default(), receive_socket.local_addr().unwrap()));
|
||||
|
||||
// Slot is not yet full, should not get filtered from `ancestor_hashes_request_statuses`
|
||||
// Slot is not yet full, should not get filtered from `duplicate_slot_repair_statuses`
|
||||
RepairService::generate_and_send_duplicate_repairs(
|
||||
&mut ancestor_hashes_request_statuses,
|
||||
&mut duplicate_slot_repair_statuses,
|
||||
&cluster_slots,
|
||||
&blockstore,
|
||||
&serve_repair,
|
||||
@ -930,16 +952,16 @@ mod test {
|
||||
&None,
|
||||
&RwLock::new(OutstandingRequests::default()),
|
||||
);
|
||||
assert_eq!(ancestor_hashes_request_statuses.len(), 1);
|
||||
assert!(ancestor_hashes_request_statuses.get(&dead_slot).is_some());
|
||||
assert_eq!(duplicate_slot_repair_statuses.len(), 1);
|
||||
assert!(duplicate_slot_repair_statuses.get(&dead_slot).is_some());
|
||||
|
||||
// Insert rest of shreds. Slot is full, should get filtered from
|
||||
// `ancestor_hashes_request_statuses`
|
||||
// `duplicate_slot_repair_statuses`
|
||||
blockstore
|
||||
.insert_shreds(vec![shreds.pop().unwrap()], None, false)
|
||||
.unwrap();
|
||||
RepairService::generate_and_send_duplicate_repairs(
|
||||
&mut ancestor_hashes_request_statuses,
|
||||
&mut duplicate_slot_repair_statuses,
|
||||
&cluster_slots,
|
||||
&blockstore,
|
||||
&serve_repair,
|
||||
@ -948,7 +970,7 @@ mod test {
|
||||
&None,
|
||||
&RwLock::new(OutstandingRequests::default()),
|
||||
);
|
||||
assert!(ancestor_hashes_request_statuses.is_empty());
|
||||
assert!(duplicate_slot_repair_statuses.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Reference in New Issue
Block a user