Upgrade Repair be more intelligent and agressive (#6789)

* Upgrade Repair be more intelligent and agressive

* Fix u64 casts

* Fix missing bracket

* Add 1 second delay to test to allow repair to kick in
This commit is contained in:
Sagar Dhawan
2019-11-07 11:08:09 -08:00
committed by GitHub
parent a9c4cd6cbe
commit 67d1e2903c
5 changed files with 104 additions and 36 deletions

View File

@ -834,7 +834,7 @@ impl ClusterInfo {
}
pub fn map_repair_request(&self, repair_request: &RepairType) -> Result<Vec<u8>> {
match repair_request {
RepairType::Blob(slot, blob_index) => {
RepairType::Shred(slot, blob_index) => {
datapoint_debug!(
"cluster_info-repair",
("repair-slot", *slot, i64),
@ -1896,7 +1896,7 @@ mod tests {
fn window_index_request() {
let me = ContactInfo::new_localhost(&Pubkey::new_rand(), timestamp());
let mut cluster_info = ClusterInfo::new_with_invalid_keypair(me);
let rv = cluster_info.repair_request(&RepairType::Blob(0, 0));
let rv = cluster_info.repair_request(&RepairType::Shred(0, 0));
assert_matches!(rv, Err(Error::ClusterInfoError(ClusterInfoError::NoPeers)));
let gossip_addr = socketaddr!([127, 0, 0, 1], 1234);
@ -1915,7 +1915,7 @@ mod tests {
);
cluster_info.insert_info(nxt.clone());
let rv = cluster_info
.repair_request(&RepairType::Blob(0, 0))
.repair_request(&RepairType::Shred(0, 0))
.unwrap();
assert_eq!(nxt.gossip, gossip_addr);
assert_eq!(rv.0, nxt.gossip);
@ -1940,7 +1940,7 @@ mod tests {
while !one || !two {
//this randomly picks an option, so eventually it should pick both
let rv = cluster_info
.repair_request(&RepairType::Blob(0, 0))
.repair_request(&RepairType::Shred(0, 0))
.unwrap();
if rv.0 == gossip_addr {
one = true;

View File

@ -20,8 +20,8 @@ use std::{
time::Duration,
};
pub const MAX_REPAIR_LENGTH: usize = 16;
pub const REPAIR_MS: u64 = 100;
pub const MAX_REPAIR_LENGTH: usize = 1024;
pub const REPAIR_MS: u64 = 50;
pub const MAX_ORPHANS: usize = 5;
pub enum RepairStrategy {
@ -37,7 +37,7 @@ pub enum RepairStrategy {
pub enum RepairType {
Orphan(u64),
HighestBlob(u64, u64),
Blob(u64, u64),
Shred(u64, u64),
}
pub struct RepairSlotRange {
@ -254,13 +254,13 @@ impl RepairService {
} else {
let reqs = blocktree.find_missing_data_indexes(
slot,
slot_meta.first_shred_timestamp,
slot_meta.consumed,
slot_meta.received,
max_repairs,
);
reqs.into_iter()
.map(|i| RepairType::Blob(slot, i))
.map(|i| RepairType::Shred(slot, i))
.collect()
}
}
@ -480,12 +480,13 @@ mod test {
}
}
blocktree.insert_shreds(shreds_to_write, None).unwrap();
// sleep so that the holes are ready for repair
sleep(Duration::from_secs(1));
let expected: Vec<RepairType> = (0..num_slots)
.flat_map(|slot| {
missing_indexes_per_slot
.iter()
.map(move |blob_index| RepairType::Blob(slot as u64, *blob_index))
.map(move |blob_index| RepairType::Shred(slot as u64, *blob_index))
})
.collect();
@ -545,7 +546,8 @@ mod test {
slot_shreds.remove(0);
blocktree.insert_shreds(slot_shreds, None).unwrap();
}
// sleep to make slot eligible for repair
sleep(Duration::from_secs(1));
// Iterate through all possible combinations of start..end (inclusive on both
// sides of the range)
for start in 0..slots.len() {
@ -557,7 +559,7 @@ mod test {
..=repair_slot_range.end)
.map(|slot_index| {
if slots.contains(&(slot_index as u64)) {
RepairType::Blob(slot_index as u64, 0)
RepairType::Shred(slot_index as u64, 0)
} else {
RepairType::HighestBlob(slot_index as u64, 0)
}