Initialize and Update EpochSlots in RepairService (#4255)
* Initialize EpochSlots in RepairService * Fix flaky test
This commit is contained in:
@ -7,6 +7,7 @@ use crate::cluster_info::ClusterInfo;
|
|||||||
use crate::result::Result;
|
use crate::result::Result;
|
||||||
use crate::service::Service;
|
use crate::service::Service;
|
||||||
use solana_metrics::datapoint;
|
use solana_metrics::datapoint;
|
||||||
|
use solana_runtime::bank::EpochSchedule;
|
||||||
use solana_sdk::pubkey::Pubkey;
|
use solana_sdk::pubkey::Pubkey;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
@ -27,6 +28,7 @@ pub enum RepairStrategy {
|
|||||||
RepairAll {
|
RepairAll {
|
||||||
bank_forks: Arc<RwLock<BankForks>>,
|
bank_forks: Arc<RwLock<BankForks>>,
|
||||||
completed_slots_receiver: CompletedSlotsReceiver,
|
completed_slots_receiver: CompletedSlotsReceiver,
|
||||||
|
epoch_schedule: EpochSchedule,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,8 +105,24 @@ impl RepairService {
|
|||||||
repair_strategy: RepairStrategy,
|
repair_strategy: RepairStrategy,
|
||||||
) {
|
) {
|
||||||
let mut repair_info = RepairInfo::new();
|
let mut repair_info = RepairInfo::new();
|
||||||
let epoch_slots: HashSet<u64> = HashSet::new();
|
let mut epoch_slots: HashSet<u64> = HashSet::new();
|
||||||
let id = cluster_info.read().unwrap().id();
|
let id = cluster_info.read().unwrap().id();
|
||||||
|
if let RepairStrategy::RepairAll {
|
||||||
|
ref bank_forks,
|
||||||
|
ref epoch_schedule,
|
||||||
|
..
|
||||||
|
} = repair_strategy
|
||||||
|
{
|
||||||
|
let root = bank_forks.read().unwrap().root();
|
||||||
|
Self::initialize_epoch_slots(
|
||||||
|
id,
|
||||||
|
blocktree,
|
||||||
|
&mut epoch_slots,
|
||||||
|
root,
|
||||||
|
epoch_schedule,
|
||||||
|
cluster_info,
|
||||||
|
);
|
||||||
|
}
|
||||||
loop {
|
loop {
|
||||||
if exit.load(Ordering::Relaxed) {
|
if exit.load(Ordering::Relaxed) {
|
||||||
break;
|
break;
|
||||||
@ -125,12 +143,14 @@ impl RepairService {
|
|||||||
RepairStrategy::RepairAll {
|
RepairStrategy::RepairAll {
|
||||||
ref bank_forks,
|
ref bank_forks,
|
||||||
ref completed_slots_receiver,
|
ref completed_slots_receiver,
|
||||||
|
..
|
||||||
} => {
|
} => {
|
||||||
|
let root = bank_forks.read().unwrap().root();
|
||||||
Self::update_epoch_slots(
|
Self::update_epoch_slots(
|
||||||
id,
|
id,
|
||||||
&epoch_slots,
|
root,
|
||||||
|
&mut epoch_slots,
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
bank_forks,
|
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
);
|
);
|
||||||
Self::generate_repairs(blocktree, MAX_REPAIR_LENGTH)
|
Self::generate_repairs(blocktree, MAX_REPAIR_LENGTH)
|
||||||
@ -289,20 +309,76 @@ impl RepairService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_completed_slots_past_root(
|
||||||
|
blocktree: &Blocktree,
|
||||||
|
slots_in_gossip: &mut HashSet<u64>,
|
||||||
|
root: u64,
|
||||||
|
epoch_schedule: &EpochSchedule,
|
||||||
|
) {
|
||||||
|
let last_confirmed_epoch = epoch_schedule.get_stakers_epoch(root);
|
||||||
|
let last_epoch_slot = epoch_schedule.get_last_slot_in_epoch(last_confirmed_epoch);
|
||||||
|
|
||||||
|
let mut meta_iter = blocktree
|
||||||
|
.slot_meta_iterator(root + 1)
|
||||||
|
.expect("Couldn't get db iterator");
|
||||||
|
|
||||||
|
while meta_iter.valid() && meta_iter.key().unwrap() <= last_epoch_slot {
|
||||||
|
let current_slot = meta_iter.key().unwrap();
|
||||||
|
let meta = meta_iter.value().unwrap();
|
||||||
|
if meta.is_full() {
|
||||||
|
slots_in_gossip.insert(current_slot);
|
||||||
|
}
|
||||||
|
meta_iter.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn initialize_epoch_slots(
|
||||||
|
id: Pubkey,
|
||||||
|
blocktree: &Blocktree,
|
||||||
|
slots_in_gossip: &mut HashSet<u64>,
|
||||||
|
root: u64,
|
||||||
|
epoch_schedule: &EpochSchedule,
|
||||||
|
cluster_info: &RwLock<ClusterInfo>,
|
||||||
|
) {
|
||||||
|
Self::get_completed_slots_past_root(blocktree, slots_in_gossip, root, epoch_schedule);
|
||||||
|
|
||||||
|
// Safe to set into gossip because by this time, the leader schedule cache should
|
||||||
|
// also be updated with the latest root (done in blocktree_processor) and thus
|
||||||
|
// will provide a schedule to window_service for any incoming blobs up to the
|
||||||
|
// last_confirmed_epoch.
|
||||||
|
cluster_info
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.push_epoch_slots(id, root, slots_in_gossip.clone());
|
||||||
|
}
|
||||||
|
|
||||||
// Update the gossiped structure used for the "Repairmen" repair protocol. See book
|
// Update the gossiped structure used for the "Repairmen" repair protocol. See book
|
||||||
// for details.
|
// for details.
|
||||||
fn update_epoch_slots(
|
fn update_epoch_slots(
|
||||||
id: Pubkey,
|
id: Pubkey,
|
||||||
slots: &HashSet<u64>,
|
root: u64,
|
||||||
|
slots_in_gossip: &mut HashSet<u64>,
|
||||||
cluster_info: &RwLock<ClusterInfo>,
|
cluster_info: &RwLock<ClusterInfo>,
|
||||||
bank_forks: &Arc<RwLock<BankForks>>,
|
completed_slots_receiver: &CompletedSlotsReceiver,
|
||||||
_completed_slots_receiver: &CompletedSlotsReceiver,
|
|
||||||
) {
|
) {
|
||||||
let root = bank_forks.read().unwrap().root();
|
let mut should_update = false;
|
||||||
cluster_info
|
while let Ok(completed_slots) = completed_slots_receiver.try_recv() {
|
||||||
.write()
|
for slot in completed_slots {
|
||||||
.unwrap()
|
// If the newly completed slot > root, and the set did not contain this value
|
||||||
.push_epoch_slots(id, root, slots.clone());
|
// before, we should update gossip.
|
||||||
|
if slot > root && slots_in_gossip.insert(slot) {
|
||||||
|
should_update = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if should_update {
|
||||||
|
slots_in_gossip.retain(|x| *x > root);
|
||||||
|
cluster_info
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.push_epoch_slots(id, root, slots_in_gossip.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -321,6 +397,11 @@ mod test {
|
|||||||
make_chaining_slot_entries, make_many_slot_entries, make_slot_entries,
|
make_chaining_slot_entries, make_many_slot_entries, make_slot_entries,
|
||||||
};
|
};
|
||||||
use crate::blocktree::{get_tmp_ledger_path, Blocktree};
|
use crate::blocktree::{get_tmp_ledger_path, Blocktree};
|
||||||
|
use crate::cluster_info::Node;
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
use std::cmp::min;
|
||||||
|
use std::thread::Builder;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_repair_orphan() {
|
pub fn test_repair_orphan() {
|
||||||
@ -523,4 +604,145 @@ mod test {
|
|||||||
}
|
}
|
||||||
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_get_completed_slots_past_root() {
|
||||||
|
let blocktree_path = get_tmp_ledger_path!();
|
||||||
|
{
|
||||||
|
let blocktree = Blocktree::open(&blocktree_path).unwrap();
|
||||||
|
let num_entries_per_slot = 10;
|
||||||
|
let root = 10;
|
||||||
|
|
||||||
|
let fork1 = vec![5, 7, root, 15, 20, 21];
|
||||||
|
let fork1_blobs: Vec<_> = make_chaining_slot_entries(&fork1, num_entries_per_slot)
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|(blobs, _)| blobs)
|
||||||
|
.collect();
|
||||||
|
let fork2 = vec![8, 12];
|
||||||
|
let fork2_blobs = make_chaining_slot_entries(&fork2, num_entries_per_slot);
|
||||||
|
|
||||||
|
// Remove the last blob from each slot to make an incomplete slot
|
||||||
|
let fork2_incomplete_blobs: Vec<_> = fork2_blobs
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|(mut blobs, _)| {
|
||||||
|
blobs.pop();
|
||||||
|
blobs
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let mut full_slots = HashSet::new();
|
||||||
|
|
||||||
|
blocktree.write_blobs(&fork1_blobs).unwrap();
|
||||||
|
blocktree.write_blobs(&fork2_incomplete_blobs).unwrap();
|
||||||
|
|
||||||
|
// Test that only slots > root from fork1 were included
|
||||||
|
let epoch_schedule = EpochSchedule::new(32, 32, false);
|
||||||
|
|
||||||
|
RepairService::get_completed_slots_past_root(
|
||||||
|
&blocktree,
|
||||||
|
&mut full_slots,
|
||||||
|
root,
|
||||||
|
&epoch_schedule,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut expected: HashSet<_> = fork1.into_iter().filter(|x| *x > root).collect();
|
||||||
|
assert_eq!(full_slots, expected);
|
||||||
|
|
||||||
|
// Test that slots past the last confirmed epoch boundary don't get included
|
||||||
|
let last_epoch = epoch_schedule.get_stakers_epoch(root);
|
||||||
|
let last_slot = epoch_schedule.get_last_slot_in_epoch(last_epoch);
|
||||||
|
let fork3 = vec![last_slot, last_slot + 1];
|
||||||
|
let fork3_blobs: Vec<_> = make_chaining_slot_entries(&fork3, num_entries_per_slot)
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|(blobs, _)| blobs)
|
||||||
|
.collect();
|
||||||
|
blocktree.write_blobs(&fork3_blobs).unwrap();
|
||||||
|
RepairService::get_completed_slots_past_root(
|
||||||
|
&blocktree,
|
||||||
|
&mut full_slots,
|
||||||
|
root,
|
||||||
|
&epoch_schedule,
|
||||||
|
);
|
||||||
|
expected.insert(last_slot);
|
||||||
|
assert_eq!(full_slots, expected);
|
||||||
|
}
|
||||||
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_update_epoch_slots() {
|
||||||
|
let blocktree_path = get_tmp_ledger_path!();
|
||||||
|
{
|
||||||
|
// Create blocktree
|
||||||
|
let (blocktree, _, completed_slots_receiver) =
|
||||||
|
Blocktree::open_with_signal(&blocktree_path).unwrap();
|
||||||
|
|
||||||
|
let blocktree = Arc::new(blocktree);
|
||||||
|
|
||||||
|
let mut root = 0;
|
||||||
|
let num_slots = 100;
|
||||||
|
let entries_per_slot = 5;
|
||||||
|
let blocktree_ = blocktree.clone();
|
||||||
|
|
||||||
|
// Spin up thread to write to blocktree
|
||||||
|
let writer = Builder::new()
|
||||||
|
.name("writer".to_string())
|
||||||
|
.spawn(move || {
|
||||||
|
let slots: Vec<_> = (1..num_slots + 1).collect();
|
||||||
|
let mut blobs: Vec<_> = make_chaining_slot_entries(&slots, entries_per_slot)
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|(blobs, _)| blobs)
|
||||||
|
.collect();
|
||||||
|
blobs.shuffle(&mut thread_rng());
|
||||||
|
let mut i = 0;
|
||||||
|
let max_step = entries_per_slot * 4;
|
||||||
|
let repair_interval_ms = 10;
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
while i < blobs.len() as usize {
|
||||||
|
let step = rng.gen_range(1, max_step + 1);
|
||||||
|
blocktree_
|
||||||
|
.insert_data_blobs(&blobs[i..min(i + max_step as usize, blobs.len())])
|
||||||
|
.unwrap();
|
||||||
|
sleep(Duration::from_millis(repair_interval_ms));
|
||||||
|
i += step as usize;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut completed_slots = HashSet::new();
|
||||||
|
let node_info = Node::new_localhost_with_pubkey(&Pubkey::default());
|
||||||
|
let cluster_info = RwLock::new(ClusterInfo::new_with_invalid_keypair(
|
||||||
|
node_info.info.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
|
while completed_slots.len() < num_slots as usize {
|
||||||
|
RepairService::update_epoch_slots(
|
||||||
|
Pubkey::default(),
|
||||||
|
root,
|
||||||
|
&mut completed_slots,
|
||||||
|
&cluster_info,
|
||||||
|
&completed_slots_receiver,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut expected: HashSet<_> = (1..num_slots + 1).collect();
|
||||||
|
assert_eq!(completed_slots, expected);
|
||||||
|
|
||||||
|
// Update with new root, should filter out the slots <= root
|
||||||
|
root = num_slots / 2;
|
||||||
|
let (blobs, _) = make_slot_entries(num_slots + 2, num_slots + 1, entries_per_slot);
|
||||||
|
blocktree.insert_data_blobs(&blobs).unwrap();
|
||||||
|
RepairService::update_epoch_slots(
|
||||||
|
Pubkey::default(),
|
||||||
|
root,
|
||||||
|
&mut completed_slots,
|
||||||
|
&cluster_info,
|
||||||
|
&completed_slots_receiver,
|
||||||
|
);
|
||||||
|
expected.insert(num_slots + 2);
|
||||||
|
expected.retain(|x| *x > root);
|
||||||
|
assert_eq!(completed_slots, expected);
|
||||||
|
writer.join().unwrap();
|
||||||
|
}
|
||||||
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,7 @@ use crate::staking_utils;
|
|||||||
use crate::streamer::BlobReceiver;
|
use crate::streamer::BlobReceiver;
|
||||||
use crate::window_service::WindowService;
|
use crate::window_service::WindowService;
|
||||||
use solana_metrics::{datapoint, inc_new_counter_info};
|
use solana_metrics::{datapoint, inc_new_counter_info};
|
||||||
|
use solana_runtime::bank::EpochSchedule;
|
||||||
use solana_sdk::hash::Hash;
|
use solana_sdk::hash::Hash;
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
@ -117,6 +118,7 @@ impl RetransmitStage {
|
|||||||
exit: &Arc<AtomicBool>,
|
exit: &Arc<AtomicBool>,
|
||||||
genesis_blockhash: &Hash,
|
genesis_blockhash: &Hash,
|
||||||
completed_slots_receiver: CompletedSlotsReceiver,
|
completed_slots_receiver: CompletedSlotsReceiver,
|
||||||
|
epoch_schedule: EpochSchedule,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let (retransmit_sender, retransmit_receiver) = channel();
|
let (retransmit_sender, retransmit_receiver) = channel();
|
||||||
|
|
||||||
@ -131,6 +133,7 @@ impl RetransmitStage {
|
|||||||
let repair_strategy = RepairStrategy::RepairAll {
|
let repair_strategy = RepairStrategy::RepairAll {
|
||||||
bank_forks,
|
bank_forks,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
epoch_schedule,
|
||||||
};
|
};
|
||||||
let window_service = WindowService::new(
|
let window_service = WindowService::new(
|
||||||
Some(leader_schedule_cache.clone()),
|
Some(leader_schedule_cache.clone()),
|
||||||
|
@ -110,6 +110,7 @@ impl Tvu {
|
|||||||
&exit,
|
&exit,
|
||||||
genesis_blockhash,
|
genesis_blockhash,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
*bank_forks.read().unwrap().working_bank().epoch_schedule(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let (replay_stage, slot_full_receiver, root_slot_receiver) = ReplayStage::new(
|
let (replay_stage, slot_full_receiver, root_slot_receiver) = ReplayStage::new(
|
||||||
|
@ -364,8 +364,14 @@ mod test {
|
|||||||
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
|
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
|
||||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(0, bank)));
|
let bank_forks = Arc::new(RwLock::new(BankForks::new(0, bank)));
|
||||||
let repair_strategy = RepairStrategy::RepairAll {
|
let repair_strategy = RepairStrategy::RepairAll {
|
||||||
bank_forks,
|
bank_forks: bank_forks.clone(),
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
epoch_schedule: bank_forks
|
||||||
|
.read()
|
||||||
|
.unwrap()
|
||||||
|
.working_bank()
|
||||||
|
.epoch_schedule()
|
||||||
|
.clone(),
|
||||||
};
|
};
|
||||||
let t_window = WindowService::new(
|
let t_window = WindowService::new(
|
||||||
Some(leader_schedule_cache),
|
Some(leader_schedule_cache),
|
||||||
@ -445,9 +451,11 @@ mod test {
|
|||||||
let bank = Bank::new(&create_genesis_block_with_leader(100, &me_id, 10).0);
|
let bank = Bank::new(&create_genesis_block_with_leader(100, &me_id, 10).0);
|
||||||
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
|
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
|
||||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(0, bank)));
|
let bank_forks = Arc::new(RwLock::new(BankForks::new(0, bank)));
|
||||||
|
let epoch_schedule = *bank_forks.read().unwrap().working_bank().epoch_schedule();
|
||||||
let repair_strategy = RepairStrategy::RepairAll {
|
let repair_strategy = RepairStrategy::RepairAll {
|
||||||
bank_forks,
|
bank_forks,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
epoch_schedule,
|
||||||
};
|
};
|
||||||
let t_window = WindowService::new(
|
let t_window = WindowService::new(
|
||||||
Some(leader_schedule_cache),
|
Some(leader_schedule_cache),
|
||||||
|
@ -577,7 +577,7 @@ mod tests {
|
|||||||
use solana_sdk::signature::{Keypair, KeypairUtil};
|
use solana_sdk::signature::{Keypair, KeypairUtil};
|
||||||
use solana_sdk::transaction::Transaction;
|
use solana_sdk::transaction::Transaction;
|
||||||
use std::thread::{sleep, Builder};
|
use std::thread::{sleep, Builder};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::Duration;
|
||||||
|
|
||||||
fn load_accounts_with_fee(
|
fn load_accounts_with_fee(
|
||||||
tx: Transaction,
|
tx: Transaction,
|
||||||
@ -1061,7 +1061,6 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Function will block until the parent_thread unlocks the parent's record lock
|
// Function will block until the parent_thread unlocks the parent's record lock
|
||||||
let now = Instant::now();
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Accounts::lock_account(
|
Accounts::lock_account(
|
||||||
(
|
(
|
||||||
@ -1074,7 +1073,6 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
);
|
);
|
||||||
// Make sure that the function blocked
|
// Make sure that the function blocked
|
||||||
assert!(now.elapsed().as_secs() > 1);
|
|
||||||
parent_thread.join().unwrap();
|
parent_thread.join().unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user