Validator to leader (#1303)
* Add check in window_service to exit in checks for leader rotation, and propagate that service exit up to fullnode * Added logic to shutdown Tvu once ReplicateStage finishes * Added test for successfully shutting down validator and starting up leader * Add test for leader validator interaction * fix streamer to check for exit signal before checking socket again to prevent busy leaders from never returning * PR comments - Rewrite make_consecutive_blobs() function, revert genesis function change
This commit is contained in:
158
src/fullnode.rs
158
src/fullnode.rs
@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread::Result;
|
||||
use tpu::{Tpu, TpuReturnType};
|
||||
use tvu::Tvu;
|
||||
use tvu::{Tvu, TvuReturnType};
|
||||
use untrusted::Input;
|
||||
use window;
|
||||
|
||||
@ -58,12 +58,12 @@ impl ValidatorServices {
|
||||
ValidatorServices { tvu }
|
||||
}
|
||||
|
||||
pub fn join(self) -> Result<()> {
|
||||
pub fn join(self) -> Result<Option<TvuReturnType>> {
|
||||
self.tvu.join()
|
||||
}
|
||||
|
||||
pub fn exit(&self) -> () {
|
||||
//TODO: implement exit for Tvu
|
||||
self.tvu.exit()
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,10 +81,13 @@ pub struct Fullnode {
|
||||
bank: Arc<Bank>,
|
||||
crdt: Arc<RwLock<Crdt>>,
|
||||
ledger_path: String,
|
||||
sigverify_disabled: bool,
|
||||
shared_window: window::SharedWindow,
|
||||
replicate_socket: Vec<UdpSocket>,
|
||||
repair_socket: UdpSocket,
|
||||
retransmit_socket: UdpSocket,
|
||||
transaction_sockets: Vec<UdpSocket>,
|
||||
broadcast_socket: UdpSocket,
|
||||
requests_socket: UdpSocket,
|
||||
respond_socket: UdpSocket,
|
||||
}
|
||||
@ -307,7 +310,6 @@ impl Fullnode {
|
||||
.try_clone()
|
||||
.expect("Failed to clone retransmit socket"),
|
||||
Some(ledger_path),
|
||||
exit.clone(),
|
||||
);
|
||||
let validator_state = ValidatorServices::new(tvu);
|
||||
node_role = Some(NodeRole::Validator(validator_state));
|
||||
@ -353,6 +355,7 @@ impl Fullnode {
|
||||
crdt,
|
||||
shared_window,
|
||||
bank,
|
||||
sigverify_disabled,
|
||||
rpu,
|
||||
ncp,
|
||||
rpc_service,
|
||||
@ -362,6 +365,8 @@ impl Fullnode {
|
||||
replicate_socket: node.sockets.replicate,
|
||||
repair_socket: node.sockets.repair,
|
||||
retransmit_socket: node.sockets.retransmit,
|
||||
transaction_sockets: node.sockets.transaction,
|
||||
broadcast_socket: node.sockets.broadcast,
|
||||
requests_socket: node.sockets.requests,
|
||||
respond_socket: node.sockets.respond,
|
||||
}
|
||||
@ -417,13 +422,45 @@ impl Fullnode {
|
||||
.try_clone()
|
||||
.expect("Failed to clone retransmit socket"),
|
||||
Some(&self.ledger_path),
|
||||
self.exit.clone(),
|
||||
);
|
||||
let validator_state = ValidatorServices::new(tvu);
|
||||
self.node_role = Some(NodeRole::Validator(validator_state));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validator_to_leader(&mut self, entry_height: u64) {
|
||||
self.crdt.write().unwrap().set_leader(self.keypair.pubkey());
|
||||
let tick_duration = None;
|
||||
// TODO: To light up PoH, uncomment the following line:
|
||||
//let tick_duration = Some(Duration::from_millis(1000));
|
||||
let (tpu, blob_receiver, tpu_exit) = Tpu::new(
|
||||
self.keypair.clone(),
|
||||
&self.bank,
|
||||
&self.crdt,
|
||||
tick_duration,
|
||||
self.transaction_sockets
|
||||
.iter()
|
||||
.map(|s| s.try_clone().expect("Failed to clone transaction sockets"))
|
||||
.collect(),
|
||||
&self.ledger_path,
|
||||
self.sigverify_disabled,
|
||||
entry_height,
|
||||
);
|
||||
|
||||
let broadcast_stage = BroadcastStage::new(
|
||||
self.broadcast_socket
|
||||
.try_clone()
|
||||
.expect("Failed to clone broadcast socket"),
|
||||
self.crdt.clone(),
|
||||
self.shared_window.clone(),
|
||||
entry_height,
|
||||
blob_receiver,
|
||||
tpu_exit,
|
||||
);
|
||||
let leader_state = LeaderServices::new(tpu, broadcast_stage);
|
||||
self.node_role = Some(NodeRole::Leader(leader_state));
|
||||
}
|
||||
|
||||
pub fn handle_role_transition(&mut self) -> Result<Option<FullnodeReturnType>> {
|
||||
let node_role = self.node_role.take();
|
||||
match node_role {
|
||||
@ -435,6 +472,10 @@ impl Fullnode {
|
||||
_ => Ok(None),
|
||||
},
|
||||
Some(NodeRole::Validator(validator_services)) => match validator_services.join()? {
|
||||
Some(TvuReturnType::LeaderRotation(entry_height)) => {
|
||||
self.validator_to_leader(entry_height);
|
||||
Ok(Some(FullnodeReturnType::LeaderRotation))
|
||||
}
|
||||
_ => Ok(None),
|
||||
},
|
||||
None => Ok(None),
|
||||
@ -494,7 +535,9 @@ impl Service for Fullnode {
|
||||
|
||||
match self.node_role {
|
||||
Some(NodeRole::Validator(validator_service)) => {
|
||||
validator_service.join()?;
|
||||
if let Some(TvuReturnType::LeaderRotation(_)) = validator_service.join()? {
|
||||
return Ok(Some(FullnodeReturnType::LeaderRotation));
|
||||
}
|
||||
}
|
||||
Some(NodeRole::Leader(leader_service)) => {
|
||||
if let Some(TpuReturnType::LeaderRotation) = leader_service.join()? {
|
||||
@ -512,11 +555,17 @@ impl Service for Fullnode {
|
||||
mod tests {
|
||||
use bank::Bank;
|
||||
use crdt::Node;
|
||||
use fullnode::Fullnode;
|
||||
use fullnode::{Fullnode, FullnodeReturnType};
|
||||
use ledger::genesis;
|
||||
use packet::{make_consecutive_blobs, BlobRecycler};
|
||||
use service::Service;
|
||||
use signature::{Keypair, KeypairUtil};
|
||||
use std::cmp;
|
||||
use std::fs::remove_dir_all;
|
||||
use std::net::UdpSocket;
|
||||
use std::sync::mpsc::channel;
|
||||
use std::sync::Arc;
|
||||
use streamer::responder;
|
||||
|
||||
#[test]
|
||||
fn validator_exit() {
|
||||
@ -540,6 +589,7 @@ mod tests {
|
||||
v.close().unwrap();
|
||||
remove_dir_all(validator_ledger_path).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_parallel_exit() {
|
||||
let mut ledger_paths = vec![];
|
||||
@ -578,4 +628,98 @@ mod tests {
|
||||
remove_dir_all(path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validator_to_leader_transition() {
|
||||
// Make a leader identity
|
||||
let leader_keypair = Keypair::new();
|
||||
let leader_node = Node::new_localhost_with_pubkey(leader_keypair.pubkey());
|
||||
let leader_id = leader_node.info.id;
|
||||
let leader_ncp = leader_node.info.contact_info.ncp;
|
||||
|
||||
// Start the validator node
|
||||
let leader_rotation_interval = 10;
|
||||
let (mint, validator_ledger_path) = genesis("test_validator_to_leader_transition", 10_000);
|
||||
let validator_keypair = Keypair::new();
|
||||
let validator_node = Node::new_localhost_with_pubkey(validator_keypair.pubkey());
|
||||
let validator_info = validator_node.info.clone();
|
||||
let mut validator = Fullnode::new(
|
||||
validator_node,
|
||||
&validator_ledger_path,
|
||||
validator_keypair,
|
||||
Some(leader_ncp),
|
||||
false,
|
||||
Some(leader_rotation_interval),
|
||||
);
|
||||
|
||||
// Set the leader schedule for the validator
|
||||
let my_leader_begin_epoch = 2;
|
||||
for i in 0..my_leader_begin_epoch {
|
||||
validator.set_scheduled_leader(leader_id, leader_rotation_interval * i);
|
||||
}
|
||||
validator.set_scheduled_leader(
|
||||
validator_info.id,
|
||||
my_leader_begin_epoch * leader_rotation_interval,
|
||||
);
|
||||
|
||||
// Send blobs to the validator from our mock leader
|
||||
let resp_recycler = BlobRecycler::default();
|
||||
let t_responder = {
|
||||
let (s_responder, r_responder) = channel();
|
||||
let blob_sockets: Vec<Arc<UdpSocket>> = leader_node
|
||||
.sockets
|
||||
.replicate
|
||||
.into_iter()
|
||||
.map(Arc::new)
|
||||
.collect();
|
||||
|
||||
let t_responder = responder(
|
||||
"test_validator_to_leader_transition",
|
||||
blob_sockets[0].clone(),
|
||||
r_responder,
|
||||
);
|
||||
|
||||
// Send the blobs out of order, in reverse. Also send an extra
|
||||
// "extra_blobs" number of blobs to make sure the window stops in the right place.
|
||||
let extra_blobs = cmp::max(leader_rotation_interval / 3, 1);
|
||||
let total_blobs_to_send =
|
||||
my_leader_begin_epoch * leader_rotation_interval + extra_blobs;
|
||||
let genesis_entries = mint.create_entries();
|
||||
let last_id = genesis_entries
|
||||
.last()
|
||||
.expect("expected at least one genesis entry")
|
||||
.id;
|
||||
let tvu_address = &validator_info.contact_info.tvu;
|
||||
let msgs = make_consecutive_blobs(
|
||||
leader_id,
|
||||
total_blobs_to_send,
|
||||
last_id,
|
||||
&tvu_address,
|
||||
&resp_recycler,
|
||||
).into_iter()
|
||||
.rev()
|
||||
.collect();
|
||||
s_responder.send(msgs).expect("send");
|
||||
t_responder
|
||||
};
|
||||
|
||||
// Wait for validator to shut down tvu and restart tpu
|
||||
match validator.handle_role_transition().unwrap() {
|
||||
Some(FullnodeReturnType::LeaderRotation) => (),
|
||||
_ => panic!("Expected reason for exit to be leader rotation"),
|
||||
}
|
||||
|
||||
// Check the validator ledger to make sure it's the right height
|
||||
let (_, entry_height, _) = Fullnode::new_bank_from_ledger(&validator_ledger_path);
|
||||
|
||||
assert_eq!(
|
||||
entry_height,
|
||||
my_leader_begin_epoch * leader_rotation_interval
|
||||
);
|
||||
|
||||
// Shut down
|
||||
t_responder.join().expect("responder thread join");
|
||||
validator.close().unwrap();
|
||||
remove_dir_all(&validator_ledger_path).unwrap();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user