Validator to leader (#1303)

* Add check in window_service to exit in checks for leader rotation, and propagate that service exit up to fullnode

* Added logic to shutdown Tvu once ReplicateStage finishes

* Added test for successfully shutting down validator and starting up leader

* Add test for leader validator interaction

* fix streamer to check for exit signal before checking socket again to prevent busy leaders from never returning

* PR comments - Rewrite make_consecutive_blobs() function, revert genesis function change
This commit is contained in:
carllin
2018-09-25 15:41:29 -07:00
committed by GitHub
parent 8a7545197f
commit e7383a7e66
13 changed files with 629 additions and 109 deletions

View File

@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::Result;
use tpu::{Tpu, TpuReturnType};
use tvu::Tvu;
use tvu::{Tvu, TvuReturnType};
use untrusted::Input;
use window;
@ -58,12 +58,12 @@ impl ValidatorServices {
ValidatorServices { tvu }
}
pub fn join(self) -> Result<()> {
pub fn join(self) -> Result<Option<TvuReturnType>> {
self.tvu.join()
}
pub fn exit(&self) -> () {
//TODO: implement exit for Tvu
self.tvu.exit()
}
}
@ -81,10 +81,13 @@ pub struct Fullnode {
bank: Arc<Bank>,
crdt: Arc<RwLock<Crdt>>,
ledger_path: String,
sigverify_disabled: bool,
shared_window: window::SharedWindow,
replicate_socket: Vec<UdpSocket>,
repair_socket: UdpSocket,
retransmit_socket: UdpSocket,
transaction_sockets: Vec<UdpSocket>,
broadcast_socket: UdpSocket,
requests_socket: UdpSocket,
respond_socket: UdpSocket,
}
@ -307,7 +310,6 @@ impl Fullnode {
.try_clone()
.expect("Failed to clone retransmit socket"),
Some(ledger_path),
exit.clone(),
);
let validator_state = ValidatorServices::new(tvu);
node_role = Some(NodeRole::Validator(validator_state));
@ -353,6 +355,7 @@ impl Fullnode {
crdt,
shared_window,
bank,
sigverify_disabled,
rpu,
ncp,
rpc_service,
@ -362,6 +365,8 @@ impl Fullnode {
replicate_socket: node.sockets.replicate,
repair_socket: node.sockets.repair,
retransmit_socket: node.sockets.retransmit,
transaction_sockets: node.sockets.transaction,
broadcast_socket: node.sockets.broadcast,
requests_socket: node.sockets.requests,
respond_socket: node.sockets.respond,
}
@ -417,13 +422,45 @@ impl Fullnode {
.try_clone()
.expect("Failed to clone retransmit socket"),
Some(&self.ledger_path),
self.exit.clone(),
);
let validator_state = ValidatorServices::new(tvu);
self.node_role = Some(NodeRole::Validator(validator_state));
Ok(())
}
fn validator_to_leader(&mut self, entry_height: u64) {
self.crdt.write().unwrap().set_leader(self.keypair.pubkey());
let tick_duration = None;
// TODO: To light up PoH, uncomment the following line:
//let tick_duration = Some(Duration::from_millis(1000));
let (tpu, blob_receiver, tpu_exit) = Tpu::new(
self.keypair.clone(),
&self.bank,
&self.crdt,
tick_duration,
self.transaction_sockets
.iter()
.map(|s| s.try_clone().expect("Failed to clone transaction sockets"))
.collect(),
&self.ledger_path,
self.sigverify_disabled,
entry_height,
);
let broadcast_stage = BroadcastStage::new(
self.broadcast_socket
.try_clone()
.expect("Failed to clone broadcast socket"),
self.crdt.clone(),
self.shared_window.clone(),
entry_height,
blob_receiver,
tpu_exit,
);
let leader_state = LeaderServices::new(tpu, broadcast_stage);
self.node_role = Some(NodeRole::Leader(leader_state));
}
pub fn handle_role_transition(&mut self) -> Result<Option<FullnodeReturnType>> {
let node_role = self.node_role.take();
match node_role {
@ -435,6 +472,10 @@ impl Fullnode {
_ => Ok(None),
},
Some(NodeRole::Validator(validator_services)) => match validator_services.join()? {
Some(TvuReturnType::LeaderRotation(entry_height)) => {
self.validator_to_leader(entry_height);
Ok(Some(FullnodeReturnType::LeaderRotation))
}
_ => Ok(None),
},
None => Ok(None),
@ -494,7 +535,9 @@ impl Service for Fullnode {
match self.node_role {
Some(NodeRole::Validator(validator_service)) => {
validator_service.join()?;
if let Some(TvuReturnType::LeaderRotation(_)) = validator_service.join()? {
return Ok(Some(FullnodeReturnType::LeaderRotation));
}
}
Some(NodeRole::Leader(leader_service)) => {
if let Some(TpuReturnType::LeaderRotation) = leader_service.join()? {
@ -512,11 +555,17 @@ impl Service for Fullnode {
mod tests {
use bank::Bank;
use crdt::Node;
use fullnode::Fullnode;
use fullnode::{Fullnode, FullnodeReturnType};
use ledger::genesis;
use packet::{make_consecutive_blobs, BlobRecycler};
use service::Service;
use signature::{Keypair, KeypairUtil};
use std::cmp;
use std::fs::remove_dir_all;
use std::net::UdpSocket;
use std::sync::mpsc::channel;
use std::sync::Arc;
use streamer::responder;
#[test]
fn validator_exit() {
@ -540,6 +589,7 @@ mod tests {
v.close().unwrap();
remove_dir_all(validator_ledger_path).unwrap();
}
#[test]
fn validator_parallel_exit() {
let mut ledger_paths = vec![];
@ -578,4 +628,98 @@ mod tests {
remove_dir_all(path).unwrap();
}
}
#[test]
fn test_validator_to_leader_transition() {
// Make a leader identity
let leader_keypair = Keypair::new();
let leader_node = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
let leader_id = leader_node.info.id;
let leader_ncp = leader_node.info.contact_info.ncp;
// Start the validator node
let leader_rotation_interval = 10;
let (mint, validator_ledger_path) = genesis("test_validator_to_leader_transition", 10_000);
let validator_keypair = Keypair::new();
let validator_node = Node::new_localhost_with_pubkey(validator_keypair.pubkey());
let validator_info = validator_node.info.clone();
let mut validator = Fullnode::new(
validator_node,
&validator_ledger_path,
validator_keypair,
Some(leader_ncp),
false,
Some(leader_rotation_interval),
);
// Set the leader schedule for the validator
let my_leader_begin_epoch = 2;
for i in 0..my_leader_begin_epoch {
validator.set_scheduled_leader(leader_id, leader_rotation_interval * i);
}
validator.set_scheduled_leader(
validator_info.id,
my_leader_begin_epoch * leader_rotation_interval,
);
// Send blobs to the validator from our mock leader
let resp_recycler = BlobRecycler::default();
let t_responder = {
let (s_responder, r_responder) = channel();
let blob_sockets: Vec<Arc<UdpSocket>> = leader_node
.sockets
.replicate
.into_iter()
.map(Arc::new)
.collect();
let t_responder = responder(
"test_validator_to_leader_transition",
blob_sockets[0].clone(),
r_responder,
);
// Send the blobs out of order, in reverse. Also send an extra
// "extra_blobs" number of blobs to make sure the window stops in the right place.
let extra_blobs = cmp::max(leader_rotation_interval / 3, 1);
let total_blobs_to_send =
my_leader_begin_epoch * leader_rotation_interval + extra_blobs;
let genesis_entries = mint.create_entries();
let last_id = genesis_entries
.last()
.expect("expected at least one genesis entry")
.id;
let tvu_address = &validator_info.contact_info.tvu;
let msgs = make_consecutive_blobs(
leader_id,
total_blobs_to_send,
last_id,
&tvu_address,
&resp_recycler,
).into_iter()
.rev()
.collect();
s_responder.send(msgs).expect("send");
t_responder
};
// Wait for validator to shut down tvu and restart tpu
match validator.handle_role_transition().unwrap() {
Some(FullnodeReturnType::LeaderRotation) => (),
_ => panic!("Expected reason for exit to be leader rotation"),
}
// Check the validator ledger to make sure it's the right height
let (_, entry_height, _) = Fullnode::new_bank_from_ledger(&validator_ledger_path);
assert_eq!(
entry_height,
my_leader_begin_epoch * leader_rotation_interval
);
// Shut down
t_responder.join().expect("responder thread join");
validator.close().unwrap();
remove_dir_all(&validator_ledger_path).unwrap();
}
}