Leader scheduler plumbing (#1440)

* Added LeaderScheduler module and tests

* plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage

* Add LeaderScheduler plumbing for Tvu, window, and tests

* Fix bank and switch tests to use new LeaderScheduler

* move leader rotation check from window service to replicate stage

* Add replicate_stage leader rotation exit test

* removed leader scheduler from the window service and associated modules/tests

* Corrected is_leader calculation in repair() function in window.rs

* Integrate LeaderScheduler with write_stage for leader to validator transitions

* Integrated LeaderScheduler with BroadcastStage

* Removed gossip leader rotation from crdt

* Add multi validator, leader test

* Comments and cleanup

* Remove unneeded checks from broadcast stage

* Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role

* Set new leader in validator -> validator transitions

* Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail

* Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops

* Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
This commit is contained in:
carllin
2018-10-10 16:49:41 -07:00
committed by GitHub
parent 2ba2bc72ca
commit 9931ac9780
22 changed files with 1743 additions and 898 deletions

View File

@@ -12,6 +12,7 @@ use solana::client::mk_client;
use solana::cluster_info::Node;
use solana::drone::DRONE_PORT;
use solana::fullnode::{Config, Fullnode, FullnodeReturnType};
use solana::leader_scheduler::LeaderScheduler;
use solana::logger;
use solana::metrics::set_panic_hook;
use solana::signature::{Keypair, KeypairUtil};
@@ -83,9 +84,6 @@ fn main() -> () {
let node_info = node.info.clone();
let pubkey = keypair.pubkey();
let mut fullnode = Fullnode::new(node, ledger_path, keypair, network, false, None);
// airdrop stuff, probably goes away at some point
let leader = match network {
Some(network) => {
poll_gossip_for_leader(network, None).expect("can't find leader on network")
@@ -93,6 +91,14 @@ fn main() -> () {
None => node_info,
};
let mut fullnode = Fullnode::new(
node,
ledger_path,
keypair,
network,
false,
LeaderScheduler::from_bootstrap_leader(leader.id),
);
let mut client = mk_client(&leader);
// TODO: maybe have the drone put itself in gossip somewhere instead of hardcoding?
@@ -126,7 +132,8 @@ fn main() -> () {
loop {
let status = fullnode.handle_role_transition();
match status {
Ok(Some(FullnodeReturnType::LeaderRotation)) => (),
Ok(Some(FullnodeReturnType::LeaderToValidatorRotation)) => (),
Ok(Some(FullnodeReturnType::ValidatorToLeaderRotation)) => (),
_ => {
// Fullnode tpu/tvu exited for some unexpected
// reason, so exit