Leader scheduler plumbing (#1440)

* Added LeaderScheduler module and tests

* plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage

* Add LeaderScheduler plumbing for Tvu, window, and tests

* Fix bank and switch tests to use new LeaderScheduler

* move leader rotation check from window service to replicate stage

* Add replicate_stage leader rotation exit test

* removed leader scheduler from the window service and associated modules/tests

* Corrected is_leader calculation in repair() function in window.rs

* Integrate LeaderScheduler with write_stage for leader to validator transitions

* Integrated LeaderScheduler with BroadcastStage

* Removed gossip leader rotation from crdt

* Add multi validator, leader test

* Comments and cleanup

* Remove unneeded checks from broadcast stage

* Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role

* Set new leader in validator -> validator transitions

* Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail

* Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops

* Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
This commit is contained in:
carllin
2018-10-10 16:49:41 -07:00
committed by GitHub
parent 2ba2bc72ca
commit 9931ac9780
22 changed files with 1743 additions and 898 deletions

View File

@@ -1,6 +1,7 @@
use blob_fetch_stage::BlobFetchStage;
use cluster_info::{ClusterInfo, Node, NodeInfo};
use hash::{Hash, Hasher};
use leader_scheduler::LeaderScheduler;
use ncp::Ncp;
use service::Service;
use std::fs::File;
@@ -22,13 +23,13 @@ use std::time::Duration;
use store_ledger_stage::StoreLedgerStage;
use streamer::BlobReceiver;
use window;
use window_service::{window_service, WindowServiceReturnType};
use window_service::window_service;
pub struct Replicator {
ncp: Ncp,
fetch_stage: BlobFetchStage,
store_ledger_stage: StoreLedgerStage,
t_window: JoinHandle<Option<WindowServiceReturnType>>,
t_window: JoinHandle<()>,
pub retransmit_receiver: BlobReceiver,
}
@@ -82,8 +83,9 @@ impl Replicator {
));
let leader_info = network_addr.map(|i| NodeInfo::new_entry_point(&i));
let leader_pubkey;
if let Some(leader_info) = leader_info.as_ref() {
leader_pubkey = leader_info.id;
cluster_info.write().unwrap().insert(leader_info);
} else {
panic!("No leader info!");
@@ -108,6 +110,9 @@ impl Replicator {
entry_window_sender,
retransmit_sender,
repair_socket,
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
done,
);
@@ -152,6 +157,7 @@ mod tests {
use cluster_info::Node;
use fullnode::Fullnode;
use hash::Hash;
use leader_scheduler::LeaderScheduler;
use ledger::{genesis, read_ledger, tmp_ledger_path};
use logger;
use replicator::sample_file;
@@ -185,14 +191,13 @@ mod tests {
let leader_node = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
let network_addr = leader_node.sockets.gossip.local_addr().unwrap();
let leader_info = leader_node.info.clone();
let leader_rotation_interval = 20;
let leader = Fullnode::new(
leader_node,
&leader_ledger_path,
leader_keypair,
None,
false,
Some(leader_rotation_interval),
LeaderScheduler::from_bootstrap_leader(leader_info.id),
);
let mut leader_client = mk_client(&leader_info);