leader_scheduler: remove bootstrap_height

This commit is contained in:
Michael Vines
2019-02-05 08:03:52 -08:00
parent 73979d8f5a
commit c5a74ada05
27 changed files with 1591 additions and 1789 deletions

View File

@@ -31,7 +31,6 @@ pub fn repair(
leader_scheduler_option: &Arc<RwLock<LeaderScheduler>>,
) -> Result<Vec<(SocketAddr, Vec<u8>)>> {
let rcluster_info = cluster_info.read().unwrap();
let mut is_next_leader = false;
let meta = db_ledger.meta()?;
if meta.is_none() {
return Ok(vec![]);
@@ -43,35 +42,33 @@ pub fn repair(
// Repair should only be called when received > consumed, enforced in window_service
assert!(received > consumed);
{
let ls_lock = leader_scheduler_option.read().unwrap();
if !ls_lock.use_only_bootstrap_leader {
// Calculate the next leader rotation height and check if we are the leader
if let Some(next_leader_rotation_height) = ls_lock.max_height_for_leader(tick_height) {
match ls_lock.get_scheduled_leader(next_leader_rotation_height) {
Some((leader_id, _)) if leader_id == *id => is_next_leader = true,
// In the case that we are not in the current scope of the leader schedule
// window then either:
//
// 1) The replay stage hasn't caught up to the "consumed" entries we sent,
// in which case it will eventually catch up
//
// 2) We are on the border between seed_rotation_intervals, so the
// schedule won't be known until the entry on that cusp is received
// by the replay stage (which comes after this stage). Hence, the next
// leader at the beginning of that next epoch will not know they are the
// leader until they receive that last "cusp" entry. The leader also won't ask for repairs
// for that entry because "is_next_leader" won't be set here. In this case,
// everybody will be blocking waiting for that "cusp" entry instead of repairing,
// until the leader hits "times" >= the max times in calculate_max_repair_entry_height().
// The impact of this, along with the similar problem from broadcast for the transitioning
// leader, can be observed in the multinode test, test_full_leader_validator_network(),
None => (),
_ => (),
}
}
// Check if we are the next next slot leader
let is_next_leader = {
let leader_scheduler = leader_scheduler_option.read().unwrap();
let next_slot = leader_scheduler.tick_height_to_slot(tick_height) + 1;
match leader_scheduler.get_leader_for_slot(next_slot) {
Some(leader_id) if leader_id == *id => true,
// In the case that we are not in the current scope of the leader schedule
// window then either:
//
// 1) The replay stage hasn't caught up to the "consumed" entries we sent,
// in which case it will eventually catch up
//
// 2) We are on the border between seed_rotation_intervals, so the
// schedule won't be known until the entry on that cusp is received
// by the replay stage (which comes after this stage). Hence, the next
// leader at the beginning of that next epoch will not know they are the
// leader until they receive that last "cusp" entry. The leader also won't ask for repairs
// for that entry because "is_next_leader" won't be set here. In this case,
// everybody will be blocking waiting for that "cusp" entry instead of repairing,
// until the leader hits "times" >= the max times in calculate_max_repair_entry_height().
// The impact of this, along with the similar problem from broadcast for the transitioning
// leader, can be observed in the multinode test, test_full_leader_validator_network(),
None => false,
_ => false,
}
}
};
let num_peers = rcluster_info.repair_peers().len() as u64;
@@ -195,7 +192,8 @@ pub fn process_blob(
// TODO: Once the original leader signature is added to the blob, make sure that
// the blob was originally generated by the expected leader for this slot
if leader.is_none() {
return Ok(());
warn!("No leader for slot {}, blob dropped", slot);
return Ok(()); // Occurs as a leader is rotating into a validator
}
// Insert the new blob into the window
@@ -393,8 +391,9 @@ mod test {
pub fn test_retransmit() {
let leader = Keypair::new().pubkey();
let nonleader = Keypair::new().pubkey();
let leader_scheduler =
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(leader)));
let mut leader_scheduler = LeaderScheduler::default();
leader_scheduler.set_leader_schedule(vec![leader]);
let leader_scheduler = Arc::new(RwLock::new(leader_scheduler));
let blob = SharedBlob::default();
let (blob_sender, blob_receiver) = channel();
@@ -655,17 +654,12 @@ mod test {
#[test]
fn test_process_blob() {
// Create the leader scheduler
let leader_keypair = Keypair::new();
let mut leader_scheduler = LeaderScheduler::from_bootstrap_leader(leader_keypair.pubkey());
let mut leader_scheduler = LeaderScheduler::default();
leader_scheduler.set_leader_schedule(vec![Keypair::new().pubkey()]);
let db_ledger_path = get_tmp_ledger_path("test_process_blob");
let db_ledger = Arc::new(DbLedger::open(&db_ledger_path).unwrap());
// Mock the tick height to look like the tick height right after a leader transition
leader_scheduler.last_seed_height = None;
leader_scheduler.use_only_bootstrap_leader = false;
let leader_scheduler = Arc::new(RwLock::new(leader_scheduler));
let num_entries = 10;
let original_entries = make_tiny_test_entries(num_entries);