Leader scheduler plumbing (#1440)
* Added LeaderScheduler module and tests * plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage * Add LeaderScheduler plumbing for Tvu, window, and tests * Fix bank and switch tests to use new LeaderScheduler * move leader rotation check from window service to replicate stage * Add replicate_stage leader rotation exit test * removed leader scheduler from the window service and associated modules/tests * Corrected is_leader calculation in repair() function in window.rs * Integrate LeaderScheduler with write_stage for leader to validator transitions * Integrated LeaderScheduler with BroadcastStage * Removed gossip leader rotation from crdt * Add multi validator, leader test * Comments and cleanup * Remove unneeded checks from broadcast stage * Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role * Set new leader in validator -> validator transitions * Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail * Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops * Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
This commit is contained in:
@@ -5,6 +5,7 @@ use counter::Counter;
|
||||
use entry::Entry;
|
||||
#[cfg(feature = "erasure")]
|
||||
use erasure;
|
||||
use leader_scheduler::LeaderScheduler;
|
||||
use ledger::{reconstruct_entries_from_blobs, Block};
|
||||
use log::Level;
|
||||
use packet::SharedBlob;
|
||||
@@ -51,6 +52,7 @@ pub trait WindowUtil {
|
||||
/// Finds available slots, clears them, and returns their indices.
|
||||
fn clear_slots(&mut self, consumed: u64, received: u64) -> Vec<u64>;
|
||||
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(too_many_arguments))]
|
||||
fn repair(
|
||||
&mut self,
|
||||
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
||||
@@ -59,6 +61,7 @@ pub trait WindowUtil {
|
||||
consumed: u64,
|
||||
received: u64,
|
||||
max_entry_height: u64,
|
||||
leader_scheduler_option: &Arc<RwLock<LeaderScheduler>>,
|
||||
) -> Vec<(SocketAddr, Vec<u8>)>;
|
||||
|
||||
fn print(&self, id: &Pubkey, consumed: u64) -> String;
|
||||
@@ -67,14 +70,12 @@ pub trait WindowUtil {
|
||||
fn process_blob(
|
||||
&mut self,
|
||||
id: &Pubkey,
|
||||
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
||||
blob: SharedBlob,
|
||||
pix: u64,
|
||||
consume_queue: &mut Vec<Entry>,
|
||||
consumed: &mut u64,
|
||||
leader_unknown: bool,
|
||||
pending_retransmits: &mut bool,
|
||||
leader_rotation_interval: u64,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -101,13 +102,40 @@ impl WindowUtil for Window {
|
||||
consumed: u64,
|
||||
received: u64,
|
||||
max_entry_height: u64,
|
||||
leader_scheduler_option: &Arc<RwLock<LeaderScheduler>>,
|
||||
) -> Vec<(SocketAddr, Vec<u8>)> {
|
||||
let rcluster_info = cluster_info.read().unwrap();
|
||||
let leader_rotation_interval = rcluster_info.get_leader_rotation_interval();
|
||||
// Calculate the next leader rotation height and check if we are the leader
|
||||
let next_leader_rotation =
|
||||
consumed + leader_rotation_interval - (consumed % leader_rotation_interval);
|
||||
let is_next_leader = rcluster_info.get_scheduled_leader(next_leader_rotation) == Some(*id);
|
||||
let mut is_next_leader = false;
|
||||
{
|
||||
let ls_lock = leader_scheduler_option.read().unwrap();
|
||||
if !ls_lock.use_only_bootstrap_leader {
|
||||
// Calculate the next leader rotation height and check if we are the leader
|
||||
let next_leader_rotation_height = consumed + ls_lock.entries_until_next_leader_rotation(consumed).expect("Leader rotation should exist when not using default implementation of LeaderScheduler");
|
||||
|
||||
match ls_lock.get_scheduled_leader(next_leader_rotation_height) {
|
||||
Some(leader_id) if leader_id == *id => is_next_leader = true,
|
||||
// In the case that we are not in the current scope of the leader schedule
|
||||
// window then either:
|
||||
//
|
||||
// 1) The replicate stage hasn't caught up to the "consumed" entries we sent,
|
||||
// in which case it will eventually catch up
|
||||
//
|
||||
// 2) We are on the border between seed_rotation_intervals, so the
|
||||
// schedule won't be known until the entry on that cusp is received
|
||||
// by the replicate stage (which comes after this stage). Hence, the next
|
||||
// leader at the beginning of that next epoch will not know he is the
|
||||
// leader until he receives that last "cusp" entry. He also won't ask for repairs
|
||||
// for that entry because "is_next_leader" won't be set here. In this case,
|
||||
// everybody will be blocking waiting for that "cusp" entry instead of repairing,
|
||||
// until the leader hits "times" >= the max times in calculate_max_repair().
|
||||
// The impact of this, along with the similar problem from broadcast for the transitioning
|
||||
// leader, can be observed in the multinode test, test_full_leader_validator_network(),
|
||||
None => (),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let num_peers = rcluster_info.table.len() as u64;
|
||||
|
||||
let max_repair = if max_entry_height == 0 {
|
||||
@@ -196,14 +224,12 @@ impl WindowUtil for Window {
|
||||
fn process_blob(
|
||||
&mut self,
|
||||
id: &Pubkey,
|
||||
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
||||
blob: SharedBlob,
|
||||
pix: u64,
|
||||
consume_queue: &mut Vec<Entry>,
|
||||
consumed: &mut u64,
|
||||
leader_unknown: bool,
|
||||
pending_retransmits: &mut bool,
|
||||
leader_rotation_interval: u64,
|
||||
) {
|
||||
let w = (pix % WINDOW_SIZE) as usize;
|
||||
|
||||
@@ -258,18 +284,6 @@ impl WindowUtil for Window {
|
||||
|
||||
// push all contiguous blobs into consumed queue, increment consumed
|
||||
loop {
|
||||
if *consumed != 0 && *consumed % (leader_rotation_interval as u64) == 0 {
|
||||
let rcluster_info = cluster_info.read().unwrap();
|
||||
let my_id = rcluster_info.my_data().id;
|
||||
match rcluster_info.get_scheduled_leader(*consumed) {
|
||||
// If we are the next leader, exit
|
||||
Some(id) if id == my_id => {
|
||||
break;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let k = (*consumed % WINDOW_SIZE) as usize;
|
||||
trace!("{}: k: {} consumed: {}", id, k, *consumed,);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user