Cluster has no way to know which slots are available (#8732)

automerge
This commit is contained in:
anatoly yakovenko
2020-03-11 21:31:50 -07:00
committed by GitHub
parent fe1c99c0cf
commit f64ab49307
9 changed files with 1026 additions and 782 deletions

View File

@ -2,6 +2,7 @@
//! regularly finds missing shreds in the ledger and sends repair requests for those shreds
use crate::{
cluster_info::ClusterInfo,
cluster_slots::ClusterSlots,
result::Result,
serve_repair::{RepairType, ServeRepair},
};
@ -9,13 +10,11 @@ use solana_ledger::{
bank_forks::BankForks,
blockstore::{Blockstore, CompletedSlotsReceiver, SlotMeta},
};
use solana_sdk::clock::DEFAULT_SLOTS_PER_EPOCH;
use solana_sdk::{clock::Slot, epoch_schedule::EpochSchedule, pubkey::Pubkey};
use std::{
collections::{BTreeSet, HashSet},
iter::Iterator,
net::SocketAddr,
net::UdpSocket,
ops::Bound::{Included, Unbounded},
sync::atomic::{AtomicBool, Ordering},
sync::{Arc, RwLock},
thread::sleep,
@ -27,9 +26,6 @@ pub const MAX_REPAIR_LENGTH: usize = 512;
pub const REPAIR_MS: u64 = 100;
pub const MAX_ORPHANS: usize = 5;
const MAX_COMPLETED_SLOT_CACHE_LEN: usize = 256;
const COMPLETED_SLOT_CACHE_FLUSH_TRIGGER: usize = 512;
pub enum RepairStrategy {
RepairRange(RepairSlotRange),
RepairAll {
@ -89,23 +85,10 @@ impl RepairService {
repair_strategy: RepairStrategy,
) {
let serve_repair = ServeRepair::new(cluster_info.clone());
let mut epoch_slots: BTreeSet<Slot> = BTreeSet::new();
let mut old_incomplete_slots: BTreeSet<Slot> = BTreeSet::new();
let id = cluster_info.read().unwrap().id();
if let RepairStrategy::RepairAll {
ref epoch_schedule, ..
} = repair_strategy
{
let current_root = blockstore.last_root();
Self::initialize_epoch_slots(
id,
blockstore,
&mut epoch_slots,
&old_incomplete_slots,
current_root,
epoch_schedule,
cluster_info,
);
let mut cluster_slots = ClusterSlots::default();
if let RepairStrategy::RepairAll { .. } = repair_strategy {
Self::initialize_lowest_slot(id, blockstore, cluster_info);
}
loop {
if exit.load(Ordering::Relaxed) {
@ -125,26 +108,28 @@ impl RepairService {
RepairStrategy::RepairAll {
ref completed_slots_receiver,
ref bank_forks,
..
} => {
let new_root = blockstore.last_root();
let lowest_slot = blockstore.lowest_slot();
Self::update_epoch_slots(
id,
Self::update_lowest_slot(&id, lowest_slot, &cluster_info);
Self::update_completed_slots(
&id,
new_root,
lowest_slot,
&mut epoch_slots,
&mut old_incomplete_slots,
&cluster_info,
&mut cluster_slots,
blockstore,
completed_slots_receiver,
&cluster_info,
);
cluster_slots.update(new_root, cluster_info, bank_forks);
Self::generate_repairs(blockstore, new_root, MAX_REPAIR_LENGTH)
}
}
};
if let Ok(repairs) = repairs {
let reqs: Vec<_> = repairs
let reqs: Vec<((SocketAddr, Vec<u8>), RepairType)> = repairs
.into_iter()
.filter_map(|repair_request| {
serve_repair
@ -272,150 +257,54 @@ impl RepairService {
}
}
fn get_completed_slots_past_root(
blockstore: &Blockstore,
slots_in_gossip: &mut BTreeSet<Slot>,
root: Slot,
epoch_schedule: &EpochSchedule,
) {
let last_confirmed_epoch = epoch_schedule.get_leader_schedule_epoch(root);
let last_epoch_slot = epoch_schedule.get_last_slot_in_epoch(last_confirmed_epoch);
let meta_iter = blockstore
.slot_meta_iterator(root + 1)
.expect("Couldn't get db iterator");
for (current_slot, meta) in meta_iter {
if current_slot > last_epoch_slot {
break;
}
if meta.is_full() {
slots_in_gossip.insert(current_slot);
}
}
}
fn initialize_epoch_slots(
fn initialize_lowest_slot(
id: Pubkey,
blockstore: &Blockstore,
slots_in_gossip: &mut BTreeSet<Slot>,
old_incomplete_slots: &BTreeSet<Slot>,
root: Slot,
epoch_schedule: &EpochSchedule,
cluster_info: &RwLock<ClusterInfo>,
) {
Self::get_completed_slots_past_root(blockstore, slots_in_gossip, root, epoch_schedule);
// Safe to set into gossip because by this time, the leader schedule cache should
// also be updated with the latest root (done in blockstore_processor) and thus
// will provide a schedule to window_service for any incoming shreds up to the
// last_confirmed_epoch.
cluster_info.write().unwrap().push_epoch_slots(
id,
root,
blockstore.lowest_slot(),
slots_in_gossip.clone(),
old_incomplete_slots,
);
cluster_info
.write()
.unwrap()
.push_lowest_slot(id, blockstore.lowest_slot());
}
// Update the gossiped structure used for the "Repairmen" repair protocol. See docs
// for details.
fn update_epoch_slots(
id: Pubkey,
latest_known_root: Slot,
lowest_slot: Slot,
completed_slot_cache: &mut BTreeSet<Slot>,
incomplete_slot_stash: &mut BTreeSet<Slot>,
cluster_info: &RwLock<ClusterInfo>,
fn update_completed_slots(
id: &Pubkey,
root: Slot,
cluster_slots: &mut ClusterSlots,
blockstore: &Blockstore,
completed_slots_receiver: &CompletedSlotsReceiver,
cluster_info: &RwLock<ClusterInfo>,
) {
let mut should_update = false;
while let Ok(completed_slots) = completed_slots_receiver.try_recv() {
for slot in completed_slots {
let last_slot_in_stash = *incomplete_slot_stash.iter().next_back().unwrap_or(&0);
let removed_from_stash = incomplete_slot_stash.remove(&slot);
// If the newly completed slot was not being tracked in stash, and is > last
// slot being tracked in stash, add it to cache. Also, update gossip
if !removed_from_stash && slot >= last_slot_in_stash {
should_update |= completed_slot_cache.insert(slot);
}
// If the slot was removed from stash, update gossip
should_update |= removed_from_stash;
}
let mine = cluster_slots.collect(id);
let mut slots: Vec<Slot> = vec![];
while let Ok(mut more) = completed_slots_receiver.try_recv() {
more.retain(|x| !mine.contains(x));
slots.append(&mut more);
}
if should_update {
if completed_slot_cache.len() >= COMPLETED_SLOT_CACHE_FLUSH_TRIGGER {
Self::stash_old_incomplete_slots(completed_slot_cache, incomplete_slot_stash);
let lowest_completed_slot_in_cache =
*completed_slot_cache.iter().next().unwrap_or(&0);
Self::prune_incomplete_slot_stash(
incomplete_slot_stash,
lowest_completed_slot_in_cache,
);
}
cluster_info.write().unwrap().push_epoch_slots(
id,
latest_known_root,
lowest_slot,
completed_slot_cache.clone(),
incomplete_slot_stash,
);
}
}
fn stash_old_incomplete_slots(cache: &mut BTreeSet<Slot>, stash: &mut BTreeSet<Slot>) {
if cache.len() > MAX_COMPLETED_SLOT_CACHE_LEN {
let mut prev = *cache.iter().next().expect("Expected to find some slot");
cache.remove(&prev);
while cache.len() >= MAX_COMPLETED_SLOT_CACHE_LEN {
let next = *cache.iter().next().expect("Expected to find some slot");
cache.remove(&next);
// Prev slot and next slot are not included in incomplete slot list.
(prev + 1..next).for_each(|slot| {
stash.insert(slot);
});
prev = next;
}
}
}
fn prune_incomplete_slot_stash(
stash: &mut BTreeSet<Slot>,
lowest_completed_slot_in_cache: Slot,
) {
if let Some(oldest_incomplete_slot) = stash.iter().next() {
// Prune old slots
// Prune in batches to reduce overhead. Pruning starts when oldest slot is 1.5 epochs
// earlier than the new root. But, we prune all the slots that are older than 1 epoch.
// So slots in a batch of half epoch are getting pruned
if oldest_incomplete_slot + DEFAULT_SLOTS_PER_EPOCH + DEFAULT_SLOTS_PER_EPOCH / 2
< lowest_completed_slot_in_cache
{
let oldest_slot_to_retain =
lowest_completed_slot_in_cache.saturating_sub(DEFAULT_SLOTS_PER_EPOCH);
*stash = stash
.range((Included(&oldest_slot_to_retain), Unbounded))
.cloned()
.collect();
}
}
}
#[allow(dead_code)]
fn find_incomplete_slots(blockstore: &Blockstore, root: Slot) -> HashSet<Slot> {
blockstore
.live_slots_iterator(root)
.filter_map(|(slot, slot_meta)| {
if !slot_meta.is_full() {
Some(slot)
} else {
None
.for_each(|(slot, slot_meta)| {
if slot_meta.is_full() && !mine.contains(&slot) {
slots.push(slot)
}
})
.collect()
});
slots.sort();
slots.dedup();
if !slots.is_empty() {
cluster_info.write().unwrap().push_epoch_slots(&slots);
}
}
fn update_lowest_slot(id: &Pubkey, lowest_slot: Slot, cluster_info: &RwLock<ClusterInfo>) {
cluster_info
.write()
.unwrap()
.push_lowest_slot(*id, lowest_slot);
}
pub fn join(self) -> thread::Result<()> {
@ -427,15 +316,11 @@ impl RepairService {
mod test {
use super::*;
use crate::cluster_info::Node;
use itertools::Itertools;
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
use solana_ledger::blockstore::{
make_chaining_slot_entries, make_many_slot_entries, make_slot_entries,
};
use solana_ledger::shred::max_ticks_per_n_shreds;
use solana_ledger::{blockstore::Blockstore, get_tmp_ledger_path};
use std::thread::Builder;
#[test]
pub fn test_repair_orphan() {
@ -651,342 +536,19 @@ mod test {
}
#[test]
pub fn test_get_completed_slots_past_root() {
let blockstore_path = get_tmp_ledger_path!();
{
let blockstore = Blockstore::open(&blockstore_path).unwrap();
let num_entries_per_slot = 10;
let root = 10;
let fork1 = vec![5, 7, root, 15, 20, 21];
let fork1_shreds: Vec<_> = make_chaining_slot_entries(&fork1, num_entries_per_slot)
.into_iter()
.flat_map(|(shreds, _)| shreds)
.collect();
let fork2 = vec![8, 12];
let fork2_shreds = make_chaining_slot_entries(&fork2, num_entries_per_slot);
// Remove the last shred from each slot to make an incomplete slot
let fork2_incomplete_shreds: Vec<_> = fork2_shreds
.into_iter()
.flat_map(|(mut shreds, _)| {
shreds.pop();
shreds
})
.collect();
let mut full_slots = BTreeSet::new();
blockstore.insert_shreds(fork1_shreds, None, false).unwrap();
blockstore
.insert_shreds(fork2_incomplete_shreds, None, false)
.unwrap();
// Test that only slots > root from fork1 were included
let epoch_schedule = EpochSchedule::custom(32, 32, false);
RepairService::get_completed_slots_past_root(
&blockstore,
&mut full_slots,
root,
&epoch_schedule,
);
let mut expected: BTreeSet<_> = fork1.into_iter().filter(|x| *x > root).collect();
assert_eq!(full_slots, expected);
// Test that slots past the last confirmed epoch boundary don't get included
let last_epoch = epoch_schedule.get_leader_schedule_epoch(root);
let last_slot = epoch_schedule.get_last_slot_in_epoch(last_epoch);
let fork3 = vec![last_slot, last_slot + 1];
let fork3_shreds: Vec<_> = make_chaining_slot_entries(&fork3, num_entries_per_slot)
.into_iter()
.flat_map(|(shreds, _)| shreds)
.collect();
blockstore.insert_shreds(fork3_shreds, None, false).unwrap();
RepairService::get_completed_slots_past_root(
&blockstore,
&mut full_slots,
root,
&epoch_schedule,
);
expected.insert(last_slot);
assert_eq!(full_slots, expected);
}
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
}
#[test]
pub fn test_update_epoch_slots() {
let blockstore_path = get_tmp_ledger_path!();
{
// Create blockstore
let (blockstore, _, completed_slots_receiver) =
Blockstore::open_with_signal(&blockstore_path).unwrap();
let blockstore = Arc::new(blockstore);
let mut root = 0;
let num_slots = 100;
let entries_per_slot = 5;
let blockstore_ = blockstore.clone();
// Spin up thread to write to blockstore
let writer = Builder::new()
.name("writer".to_string())
.spawn(move || {
let slots: Vec<_> = (1..num_slots + 1).collect();
let mut shreds: Vec<_> = make_chaining_slot_entries(&slots, entries_per_slot)
.into_iter()
.flat_map(|(shreds, _)| shreds)
.collect();
shreds.shuffle(&mut thread_rng());
let mut i = 0;
let max_step = entries_per_slot * 4;
let repair_interval_ms = 10;
let mut rng = rand::thread_rng();
let num_shreds = shreds.len();
while i < num_shreds {
let step = rng.gen_range(1, max_step + 1) as usize;
let step = std::cmp::min(step, num_shreds - i);
let shreds_to_insert = shreds.drain(..step).collect_vec();
blockstore_
.insert_shreds(shreds_to_insert, None, false)
.unwrap();
sleep(Duration::from_millis(repair_interval_ms));
i += step;
}
})
.unwrap();
let mut completed_slots = BTreeSet::new();
let node_info = Node::new_localhost_with_pubkey(&Pubkey::default());
let cluster_info = RwLock::new(ClusterInfo::new_with_invalid_keypair(
node_info.info.clone(),
));
let mut old_incomplete_slots: BTreeSet<Slot> = BTreeSet::new();
while completed_slots.len() < num_slots as usize {
RepairService::update_epoch_slots(
Pubkey::default(),
root,
blockstore.lowest_slot(),
&mut completed_slots,
&mut old_incomplete_slots,
&cluster_info,
&completed_slots_receiver,
);
}
let mut expected: BTreeSet<_> = (1..num_slots + 1).collect();
assert_eq!(completed_slots, expected);
// Update with new root, should filter out the slots <= root
root = num_slots / 2;
let (shreds, _) = make_slot_entries(num_slots + 2, num_slots + 1, entries_per_slot);
blockstore.insert_shreds(shreds, None, false).unwrap();
RepairService::update_epoch_slots(
Pubkey::default(),
root,
0,
&mut completed_slots,
&mut old_incomplete_slots,
&cluster_info,
&completed_slots_receiver,
);
expected.insert(num_slots + 2);
assert_eq!(completed_slots, expected);
writer.join().unwrap();
}
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
}
#[test]
fn test_stash_old_incomplete_slots() {
let mut cache: BTreeSet<Slot> = BTreeSet::new();
let mut stash: BTreeSet<Slot> = BTreeSet::new();
// When cache is empty.
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
assert_eq!(stash.len(), 0);
// Insert some slots in cache ( < MAX_COMPLETED_SLOT_CACHE_LEN + 1)
cache.insert(101);
cache.insert(102);
cache.insert(104);
cache.insert(105);
// Not enough slots in cache. So stash should remain empty.
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
assert_eq!(stash.len(), 0);
assert_eq!(cache.len(), 4);
// Insert slots in cache ( = MAX_COMPLETED_SLOT_CACHE_LEN)
let mut cache: BTreeSet<Slot> = BTreeSet::new();
(0..MAX_COMPLETED_SLOT_CACHE_LEN as u64)
.into_iter()
.for_each(|slot| {
cache.insert(slot);
});
// Not enough slots in cache. So stash should remain empty.
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
assert_eq!(stash.len(), 0);
assert_eq!(cache.len(), MAX_COMPLETED_SLOT_CACHE_LEN);
// Insert 1 more to cross the threshold
cache.insert(MAX_COMPLETED_SLOT_CACHE_LEN as u64);
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
// Stash is still empty, as no missing slots
assert_eq!(stash.len(), 0);
// It removed some entries from cache
assert_eq!(cache.len(), MAX_COMPLETED_SLOT_CACHE_LEN - 1);
// Insert more slots to create a missing slot
let mut cache: BTreeSet<Slot> = BTreeSet::new();
cache.insert(0);
(2..=MAX_COMPLETED_SLOT_CACHE_LEN as u64 + 2)
.into_iter()
.for_each(|slot| {
cache.insert(slot);
});
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
// Stash is not empty
assert!(stash.contains(&1));
// It removed some entries from cache
assert_eq!(cache.len(), MAX_COMPLETED_SLOT_CACHE_LEN - 1);
// Test multiple missing slots at dispersed locations
let mut cache: BTreeSet<Slot> = BTreeSet::new();
(0..MAX_COMPLETED_SLOT_CACHE_LEN as u64 * 2)
.into_iter()
.for_each(|slot| {
cache.insert(slot);
});
cache.remove(&10);
cache.remove(&11);
cache.remove(&28);
cache.remove(&29);
cache.remove(&148);
cache.remove(&149);
cache.remove(&150);
cache.remove(&151);
RepairService::stash_old_incomplete_slots(&mut cache, &mut stash);
// Stash is not empty
assert!(stash.contains(&10));
assert!(stash.contains(&11));
assert!(stash.contains(&28));
assert!(stash.contains(&29));
assert!(stash.contains(&148));
assert!(stash.contains(&149));
assert!(stash.contains(&150));
assert!(stash.contains(&151));
assert!(!stash.contains(&147));
assert!(!stash.contains(&152));
// It removed some entries from cache
assert_eq!(cache.len(), MAX_COMPLETED_SLOT_CACHE_LEN - 1);
(MAX_COMPLETED_SLOT_CACHE_LEN + 1..MAX_COMPLETED_SLOT_CACHE_LEN * 2)
.into_iter()
.for_each(|slot| {
let slot: u64 = slot as u64;
assert!(cache.contains(&slot));
});
}
#[test]
fn test_prune_incomplete_slot_stash() {
// Prune empty stash
let mut stash: BTreeSet<Slot> = BTreeSet::new();
RepairService::prune_incomplete_slot_stash(&mut stash, 0);
assert!(stash.is_empty());
// Prune stash with slots < DEFAULT_SLOTS_PER_EPOCH
stash.insert(0);
stash.insert(10);
stash.insert(11);
stash.insert(50);
assert_eq!(stash.len(), 4);
RepairService::prune_incomplete_slot_stash(&mut stash, 100);
assert_eq!(stash.len(), 4);
// Prune stash with slots > DEFAULT_SLOTS_PER_EPOCH, but < 1.5 * DEFAULT_SLOTS_PER_EPOCH
stash.insert(DEFAULT_SLOTS_PER_EPOCH + 50);
assert_eq!(stash.len(), 5);
RepairService::prune_incomplete_slot_stash(&mut stash, DEFAULT_SLOTS_PER_EPOCH + 100);
assert_eq!(stash.len(), 5);
// Prune stash with slots > 1.5 * DEFAULT_SLOTS_PER_EPOCH
stash.insert(DEFAULT_SLOTS_PER_EPOCH + DEFAULT_SLOTS_PER_EPOCH / 2);
assert_eq!(stash.len(), 6);
RepairService::prune_incomplete_slot_stash(
&mut stash,
DEFAULT_SLOTS_PER_EPOCH + DEFAULT_SLOTS_PER_EPOCH / 2 + 1,
);
assert_eq!(stash.len(), 2);
}
#[test]
fn test_find_incomplete_slots() {
let blockstore_path = get_tmp_ledger_path!();
{
let blockstore = Blockstore::open(&blockstore_path).unwrap();
let num_entries_per_slot = 100;
let (mut shreds, _) = make_slot_entries(0, 0, num_entries_per_slot);
assert!(shreds.len() > 1);
let (shreds4, _) = make_slot_entries(4, 0, num_entries_per_slot);
shreds.extend(shreds4);
blockstore.insert_shreds(shreds, None, false).unwrap();
// Nothing is incomplete
assert!(RepairService::find_incomplete_slots(&blockstore, 0).is_empty());
// Insert a slot 5 that chains to an incomplete orphan slot 3
let (shreds5, _) = make_slot_entries(5, 3, num_entries_per_slot);
blockstore.insert_shreds(shreds5, None, false).unwrap();
assert_eq!(
RepairService::find_incomplete_slots(&blockstore, 0),
vec![3].into_iter().collect()
);
// Insert another incomplete orphan slot 2 that is the parent of slot 3.
// Both should be incomplete
let (shreds3, _) = make_slot_entries(3, 2, num_entries_per_slot);
blockstore
.insert_shreds(shreds3[1..].to_vec(), None, false)
.unwrap();
assert_eq!(
RepairService::find_incomplete_slots(&blockstore, 0),
vec![2, 3].into_iter().collect()
);
// Insert a incomplete slot 6 that chains to the root 0,
// should also be incomplete
let (shreds6, _) = make_slot_entries(6, 0, num_entries_per_slot);
blockstore
.insert_shreds(shreds6[1..].to_vec(), None, false)
.unwrap();
assert_eq!(
RepairService::find_incomplete_slots(&blockstore, 0),
vec![2, 3, 6].into_iter().collect()
);
// Complete slot 3, should no longer be marked incomplete
blockstore
.insert_shreds(shreds3[..].to_vec(), None, false)
.unwrap();
assert_eq!(
RepairService::find_incomplete_slots(&blockstore, 0),
vec![2, 6].into_iter().collect()
);
}
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
pub fn test_update_lowest_slot() {
let node_info = Node::new_localhost_with_pubkey(&Pubkey::default());
let cluster_info = RwLock::new(ClusterInfo::new_with_invalid_keypair(
node_info.info.clone(),
));
RepairService::update_lowest_slot(&Pubkey::default(), 5, &cluster_info);
let lowest = cluster_info
.read()
.unwrap()
.get_lowest_slot_for_node(&Pubkey::default(), None)
.unwrap()
.0
.clone();
assert_eq!(lowest.lowest, 5);
}
}