Speedup rotation (#2468)

Speedup leader to validator transitions
This commit is contained in:
Sagar Dhawan
2019-01-26 13:58:08 +05:30
committed by GitHub
parent 4bb6549895
commit d65e7b9fcc
17 changed files with 712 additions and 548 deletions

View File

@ -15,6 +15,8 @@ use solana::poh_service::NUM_TICKS_PER_SECOND;
use solana::result;
use solana::service::Service;
use solana::thin_client::{retry_get_balance, ThinClient};
use solana::tpu::TpuReturnType;
use solana::tvu::TvuReturnType;
use solana::vote_signer_proxy::VoteSignerProxy;
use solana_sdk::hash::Hash;
use solana_sdk::pubkey::Pubkey;
@ -160,7 +162,9 @@ fn test_multi_node_ledger_window() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
None,
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
@ -178,7 +182,9 @@ fn test_multi_node_ledger_window() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
@ -260,7 +266,9 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
None,
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
@ -292,7 +300,9 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
nodes.push(val);
@ -353,7 +363,9 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
nodes.push(val);
@ -441,7 +453,9 @@ fn test_multi_node_basic() {
Some(Arc::new(signer_proxy)),
None,
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
@ -469,7 +483,9 @@ fn test_multi_node_basic() {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
nodes.push(val);
@ -547,7 +563,9 @@ fn test_boot_validator_from_file() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
None,
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
let leader_balance =
@ -570,7 +588,9 @@ fn test_boot_validator_from_file() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
let mut client = mk_client(&validator_data);
@ -601,7 +621,9 @@ fn create_leader(
Some(signer),
None,
false,
LeaderScheduler::from_bootstrap_leader(leader_data.id),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_data.id,
))),
None,
);
(leader_data, leader_fullnode)
@ -679,7 +701,9 @@ fn test_leader_restart_validator_start_from_old_ledger() -> result::Result<()> {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
false,
LeaderScheduler::from_bootstrap_leader(leader_data.id),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_data.id,
))),
None,
);
@ -746,7 +770,9 @@ fn test_multi_node_dynamic_network() {
Some(Arc::new(signer_proxy)),
None,
true,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
@ -817,7 +843,9 @@ fn test_multi_node_dynamic_network() {
Some(Arc::new(signer_proxy)),
Some(leader_data.gossip),
true,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader(
leader_pubkey,
))),
None,
);
(rd, val)
@ -998,7 +1026,7 @@ fn test_leader_to_validator_transition() {
Some(Arc::new(signer_proxy)),
Some(leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1154,7 +1182,7 @@ fn test_leader_validator_basic() {
Some(Arc::new(signer_proxy)),
Some(leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1167,7 +1195,7 @@ fn test_leader_validator_basic() {
Some(Arc::new(signer_proxy)),
Some(leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1216,10 +1244,13 @@ fn test_leader_validator_basic() {
}
// Shut down
// stop the leader first so no more ticks/txs are created
leader.exit();
validator.exit();
leader.join().expect("Expected successful leader close");
validator
.close()
.join()
.expect("Expected successful validator close");
leader.close().expect("Expected successful leader close");
// Check the ledger of the validator to make sure the entry height is correct
// and that the old leader and the new leader's ledgers agree up to the point
@ -1242,27 +1273,31 @@ fn test_leader_validator_basic() {
}
}
fn run_node(
id: Pubkey,
fullnode: Arc<RwLock<Fullnode>>,
should_exit: Arc<AtomicBool>,
) -> JoinHandle<()> {
fn run_node(id: Pubkey, mut fullnode: Fullnode, should_exit: Arc<AtomicBool>) -> JoinHandle<()> {
Builder::new()
.name(format!("run_node-{:?}", id).to_string())
.spawn(move || loop {
if should_exit.load(Ordering::Relaxed) {
fullnode.close().expect("failed to close");
return;
}
if fullnode.read().unwrap().check_role_exited() {
match fullnode.write().unwrap().handle_role_transition().unwrap() {
Some(FullnodeReturnType::LeaderToValidatorRotation) => (),
Some(FullnodeReturnType::ValidatorToLeaderRotation) => (),
_ => {
panic!("Expected reason for exit to be leader rotation");
let should_be_fwdr = fullnode.role_notifiers.1.try_recv();
let should_be_leader = fullnode.role_notifiers.0.try_recv();
match should_be_leader {
Ok(TvuReturnType::LeaderRotation(tick_height, entry_height, last_entry_id)) => {
fullnode.validator_to_leader(tick_height, entry_height, last_entry_id);
}
Err(_) => match should_be_fwdr {
Ok(TpuReturnType::LeaderRotation) => {
fullnode
.leader_to_validator()
.expect("failed when transitioning to validator");
}
};
Err(_) => {
sleep(Duration::new(1, 0));
}
},
}
sleep(Duration::new(1, 0));
})
.unwrap()
}
@ -1358,7 +1393,7 @@ fn test_dropped_handoff_recovery() {
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1381,7 +1416,7 @@ fn test_dropped_handoff_recovery() {
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1409,7 +1444,7 @@ fn test_dropped_handoff_recovery() {
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);
@ -1535,7 +1570,7 @@ fn test_full_leader_validator_network() {
// during startup
let leader_keypair = node_keypairs.pop_front().unwrap();
let _leader_vote_keypair = vote_account_keypairs.pop_front().unwrap();
let mut nodes: Vec<Arc<RwLock<Fullnode>>> = vec![];
let mut schedules: Vec<Arc<RwLock<LeaderScheduler>>> = vec![];
let mut t_nodes = vec![];
info!("Start up the validators");
@ -1550,35 +1585,38 @@ fn test_full_leader_validator_network() {
let validator_id = kp.pubkey();
let validator_node = Node::new_localhost_with_pubkey(validator_id);
let signer_proxy = VoteSignerProxy::new(&kp, Box::new(LocalVoteSigner::default()));
let validator = Arc::new(RwLock::new(Fullnode::new(
let leader_scheduler =
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config)));
let validator = Fullnode::new(
validator_node,
&validator_ledger_path,
kp.clone(),
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
leader_scheduler.clone(),
None,
)));
);
nodes.push(validator.clone());
schedules.push(leader_scheduler);
t_nodes.push(run_node(validator_id, validator, exit.clone()));
}
info!("Start up the bootstrap leader");
let signer_proxy = VoteSignerProxy::new(&leader_keypair, Box::new(LocalVoteSigner::default()));
let bootstrap_leader = Arc::new(RwLock::new(Fullnode::new(
let leader_scheduler = Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config)));
let bootstrap_leader = Fullnode::new(
bootstrap_leader_node,
&bootstrap_leader_ledger_path,
leader_keypair.clone(),
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
leader_scheduler.clone(),
None,
)));
);
nodes.push(bootstrap_leader.clone());
schedules.push(leader_scheduler);
t_nodes.push(run_node(
bootstrap_leader_info.id,
bootstrap_leader,
@ -1600,10 +1638,9 @@ fn test_full_leader_validator_network() {
while num_reached_target_height != N + 1 {
num_reached_target_height = 0;
for n in nodes.iter() {
let node_lock = n.read().unwrap();
let ls_lock = node_lock.get_leader_scheduler();
if let Some(sh) = ls_lock.read().unwrap().last_seed_height {
for n in schedules.iter() {
let ls_lock = n.read().unwrap().last_seed_height;
if let Some(sh) = ls_lock {
if sh >= target_height {
num_reached_target_height += 1;
}
@ -1621,20 +1658,6 @@ fn test_full_leader_validator_network() {
t.join().unwrap();
}
info!("Exit all fullnodes");
for n in nodes {
let result = Arc::try_unwrap(n);
match result {
Ok(lock) => {
let f = lock
.into_inner()
.expect("RwLock for fullnode is still locked");
f.close().unwrap();
}
Err(_) => panic!("Multiple references to RwLock<FullNode> still exist"),
}
}
let mut node_entries = vec![];
info!("Check that all the ledgers match");
for ledger_path in ledger_paths.iter() {
@ -1699,6 +1722,8 @@ fn test_full_leader_validator_network() {
}
#[test]
#[ignore]
//TODO: This test relies on the tpu managing the ledger, which it no longer does. It cannot work without real tvus
fn test_broadcast_last_tick() {
solana_logger::setup();
// The number of validators
@ -1768,7 +1793,7 @@ fn test_broadcast_last_tick() {
Some(Arc::new(signer_proxy)),
Some(bootstrap_leader_info.gossip),
false,
LeaderScheduler::new(&leader_scheduler_config),
Arc::new(RwLock::new(LeaderScheduler::new(&leader_scheduler_config))),
None,
);