Add Fullnode::run() to optionally manage node role transitions automatically

This commit is contained in:
Michael Vines
2019-02-01 18:09:38 -08:00
parent 5f565c92c9
commit f90d96367d
3 changed files with 68 additions and 50 deletions

View File

@ -23,11 +23,9 @@ use solana_sdk::timing::{duration_as_ms, timestamp};
use std::net::UdpSocket;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::mpsc::channel;
use std::sync::mpsc::{Receiver, Sender, SyncSender};
use std::sync::mpsc::{channel, Receiver, Sender, SyncSender};
use std::sync::{Arc, RwLock};
use std::thread::sleep;
use std::thread::Result;
use std::thread::{sleep, spawn, Result};
use std::time::Duration;
use std::time::Instant;
@ -67,6 +65,7 @@ impl NodeServices {
pub enum FullnodeReturnType {
LeaderToValidatorRotation,
ValidatorToLeaderRotation,
LeaderToLeaderRotation,
}
pub struct FullnodeConfig {
@ -286,8 +285,8 @@ impl Fullnode {
}
}
pub fn leader_to_validator(&mut self, tick_height: u64) -> Result<()> {
trace!("leader_to_validator");
pub fn leader_to_validator(&mut self, tick_height: u64) -> FullnodeReturnType {
trace!("leader_to_validator: tick_height={}", tick_height);
while self.bank.tick_height() < tick_height {
sleep(Duration::from_millis(10));
@ -305,14 +304,11 @@ impl Fullnode {
.write()
.unwrap()
.set_leader(scheduled_leader);
// In the rare case that the leader exited on a multiple of seed_rotation_interval
// when the new leader schedule was being generated, and there are no other validators
// in the active set, then the leader scheduler will pick the same leader again, so
// check for that
if scheduled_leader == self.id {
let (last_entry_id, entry_height) = self.node_services.tvu.get_state();
self.validator_to_leader(tick_height, entry_height, last_entry_id);
Ok(())
FullnodeReturnType::LeaderToLeaderRotation
} else {
self.node_services.tpu.switch_to_forwarder(
self.tpu_sockets
@ -321,7 +317,7 @@ impl Fullnode {
.collect(),
self.cluster_info.clone(),
);
Ok(())
FullnodeReturnType::LeaderToValidatorRotation
}
}
@ -357,22 +353,21 @@ impl Fullnode {
)
}
pub fn handle_role_transition(&mut self) -> Result<Option<FullnodeReturnType>> {
pub fn handle_role_transition(&mut self) -> Option<FullnodeReturnType> {
loop {
if self.exit.load(Ordering::Relaxed) {
return Ok(None);
return None;
}
let should_be_forwarder = self.role_notifiers.1.try_recv();
let should_be_leader = self.role_notifiers.0.try_recv();
match should_be_leader {
Ok(TvuReturnType::LeaderRotation(tick_height, entry_height, last_entry_id)) => {
self.validator_to_leader(tick_height, entry_height, last_entry_id);
return Ok(Some(FullnodeReturnType::ValidatorToLeaderRotation));
return Some(FullnodeReturnType::ValidatorToLeaderRotation);
}
_ => match should_be_forwarder {
Ok(TpuReturnType::LeaderRotation(tick_height)) => {
self.leader_to_validator(tick_height)?;
return Ok(Some(FullnodeReturnType::LeaderToValidatorRotation));
return Some(self.leader_to_validator(tick_height))
}
_ => {
continue;
@ -382,6 +377,35 @@ impl Fullnode {
}
}
// Runs a thread to manage node role transitions. The returned closure can be used to signal the
// node to exit.
pub fn run(mut self, rotation_notifier: Option<Sender<FullnodeReturnType>>) -> impl FnOnce() {
let (sender, receiver) = channel();
let exit = self.exit.clone();
spawn(move || loop {
let status = self.handle_role_transition();
match status {
None => {
debug!("node shutdown requested");
self.close().expect("Unable to close node");
sender.send(true).expect("Unable to signal exit");
break;
}
Some(transition) => {
debug!("role_transition complete: {:?}", transition);
if let Some(ref rotation_notifier) = rotation_notifier {
rotation_notifier.send(transition).unwrap();
}
}
};
});
move || {
exit.store(true, Ordering::Relaxed);
receiver.recv().unwrap();
debug!("node shutdown complete");
}
}
// Used for notifying many nodes in parallel to exit
pub fn exit(&self) {
self.exit.store(true, Ordering::Relaxed);
@ -597,7 +621,7 @@ mod tests {
let bootstrap_leader_keypair = Arc::new(bootstrap_leader_keypair);
let voting_keypair = VotingKeypair::new_local(&bootstrap_leader_keypair);
// Start up the leader
let mut bootstrap_leader = Fullnode::new(
let bootstrap_leader = Fullnode::new(
bootstrap_leader_node,
&bootstrap_leader_keypair,
&bootstrap_leader_ledger_path,
@ -607,16 +631,16 @@ mod tests {
&FullnodeConfig::default(),
);
// Wait for the leader to transition, ticks should cause the leader to
// reach the height for leader rotation
match bootstrap_leader.handle_role_transition().unwrap() {
Some(FullnodeReturnType::LeaderToValidatorRotation) => (),
_ => {
panic!("Expected a leader transition");
}
}
assert!(bootstrap_leader.node_services.tpu.is_leader());
bootstrap_leader.close().unwrap();
let (rotation_sender, rotation_receiver) = channel();
let bootstrap_leader_exit = bootstrap_leader.run(Some(rotation_sender));
// Wait for the bootstrap leader to transition. Since there are no other nodes in the
// cluster it will continue to be the leader
assert_eq!(
rotation_receiver.recv().unwrap(),
FullnodeReturnType::LeaderToLeaderRotation
);
bootstrap_leader_exit();
}
#[test]
@ -860,7 +884,7 @@ mod tests {
// Release tvu bank lock, tvu should start making progress again and
// handle_role_transition should successfully rotate the leader to a validator
assert_eq!(
leader.handle_role_transition().unwrap().unwrap(),
leader.handle_role_transition().unwrap(),
FullnodeReturnType::LeaderToValidatorRotation
);
assert_eq!(