Skip leader slots until a vote lands (#15607) (#16147)

(cherry picked from commit b99ae8f334)

Co-authored-by: sakridge <sakridge@gmail.com>
This commit is contained in:
mergify[bot]
2021-03-26 19:07:24 +00:00
committed by GitHub
parent 1be045df94
commit d940c5b1a3
9 changed files with 117 additions and 25 deletions

View File

@ -1423,6 +1423,8 @@ pub mod test {
&AbsRequestSender::default(), &AbsRequestSender::default(),
None, None,
&mut self.heaviest_subtree_fork_choice, &mut self.heaviest_subtree_fork_choice,
&mut true,
&mut Vec::new(),
) )
} }

View File

@ -37,6 +37,7 @@ use solana_sdk::{
genesis_config::ClusterType, genesis_config::ClusterType,
hash::Hash, hash::Hash,
pubkey::Pubkey, pubkey::Pubkey,
signature::Signature,
signature::{Keypair, Signer}, signature::{Keypair, Signer},
timing::timestamp, timing::timestamp,
transaction::Transaction, transaction::Transaction,
@ -57,6 +58,7 @@ use std::{
pub const MAX_ENTRY_RECV_PER_ITER: usize = 512; pub const MAX_ENTRY_RECV_PER_ITER: usize = 512;
pub const SUPERMINORITY_THRESHOLD: f64 = 1f64 / 3f64; pub const SUPERMINORITY_THRESHOLD: f64 = 1f64 / 3f64;
pub const MAX_UNCONFIRMED_SLOTS: usize = 5; pub const MAX_UNCONFIRMED_SLOTS: usize = 5;
const MAX_VOTE_SIGNATURES: usize = 200;
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
pub(crate) enum HeaviestForkFailures { pub(crate) enum HeaviestForkFailures {
@ -105,6 +107,7 @@ pub struct ReplayStageConfig {
pub rewards_recorder_sender: Option<RewardsRecorderSender>, pub rewards_recorder_sender: Option<RewardsRecorderSender>,
pub cache_block_time_sender: Option<CacheBlockTimeSender>, pub cache_block_time_sender: Option<CacheBlockTimeSender>,
pub bank_notification_sender: Option<BankNotificationSender>, pub bank_notification_sender: Option<BankNotificationSender>,
pub wait_for_vote_to_start_leader: bool,
} }
#[derive(Default)] #[derive(Default)]
@ -265,6 +268,7 @@ impl ReplayStage {
rewards_recorder_sender, rewards_recorder_sender,
cache_block_time_sender, cache_block_time_sender,
bank_notification_sender, bank_notification_sender,
wait_for_vote_to_start_leader,
} = config; } = config;
trace!("replay stage"); trace!("replay stage");
@ -294,6 +298,8 @@ impl ReplayStage {
let mut partition_exists = false; let mut partition_exists = false;
let mut skipped_slots_info = SkippedSlotsInfo::default(); let mut skipped_slots_info = SkippedSlotsInfo::default();
let mut replay_timing = ReplayTiming::default(); let mut replay_timing = ReplayTiming::default();
let mut voted_signatures = Vec::new();
let mut has_new_vote_been_rooted = !wait_for_vote_to_start_leader;
loop { loop {
let allocated = thread_mem_usage::Allocatedp::default(); let allocated = thread_mem_usage::Allocatedp::default();
@ -481,6 +487,8 @@ impl ReplayStage {
&mut heaviest_subtree_fork_choice, &mut heaviest_subtree_fork_choice,
&cache_block_time_sender, &cache_block_time_sender,
&bank_notification_sender, &bank_notification_sender,
&mut voted_signatures,
&mut has_new_vote_been_rooted,
); );
}; };
voting_time.stop(); voting_time.stop();
@ -572,6 +580,7 @@ impl ReplayStage {
&progress, &progress,
&retransmit_slots_sender, &retransmit_slots_sender,
&mut skipped_slots_info, &mut skipped_slots_info,
has_new_vote_been_rooted,
); );
let poh_bank = poh_recorder.lock().unwrap().bank(); let poh_bank = poh_recorder.lock().unwrap().bank();
@ -887,7 +896,12 @@ impl ReplayStage {
progress_map: &ProgressMap, progress_map: &ProgressMap,
retransmit_slots_sender: &RetransmitSlotsSender, retransmit_slots_sender: &RetransmitSlotsSender,
skipped_slots_info: &mut SkippedSlotsInfo, skipped_slots_info: &mut SkippedSlotsInfo,
has_new_vote_been_rooted: bool,
) { ) {
if !has_new_vote_been_rooted {
info!("Haven't landed a vote, so skipping my leader slot");
return;
}
// all the individual calls to poh_recorder.lock() are designed to // all the individual calls to poh_recorder.lock() are designed to
// increase granularity, decrease contention // increase granularity, decrease contention
@ -1102,6 +1116,8 @@ impl ReplayStage {
heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice,
cache_block_time_sender: &Option<CacheBlockTimeSender>, cache_block_time_sender: &Option<CacheBlockTimeSender>,
bank_notification_sender: &Option<BankNotificationSender>, bank_notification_sender: &Option<BankNotificationSender>,
vote_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: &mut bool,
) { ) {
if bank.is_empty() { if bank.is_empty() {
inc_new_counter_info!("replay_stage-voted_empty_bank", 1); inc_new_counter_info!("replay_stage-voted_empty_bank", 1);
@ -1154,6 +1170,8 @@ impl ReplayStage {
accounts_background_request_sender, accounts_background_request_sender,
highest_confirmed_root, highest_confirmed_root,
heaviest_subtree_fork_choice, heaviest_subtree_fork_choice,
has_new_vote_been_rooted,
vote_signatures,
); );
subscriptions.notify_roots(rooted_slots); subscriptions.notify_roots(rooted_slots);
if let Some(sender) = bank_notification_sender { if let Some(sender) = bank_notification_sender {
@ -1183,6 +1201,8 @@ impl ReplayStage {
last_vote, last_vote,
&tower_slots, &tower_slots,
switch_fork_decision, switch_fork_decision,
vote_signatures,
*has_new_vote_been_rooted,
); );
} }
@ -1194,6 +1214,8 @@ impl ReplayStage {
vote: Vote, vote: Vote,
tower: &[Slot], tower: &[Slot],
switch_fork_decision: &SwitchForkDecision, switch_fork_decision: &SwitchForkDecision,
vote_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: bool,
) { ) {
if authorized_voter_keypairs.is_empty() { if authorized_voter_keypairs.is_empty() {
return; return;
@ -1263,6 +1285,14 @@ impl ReplayStage {
let mut vote_tx = Transaction::new_with_payer(&[vote_ix], Some(&node_keypair.pubkey())); let mut vote_tx = Transaction::new_with_payer(&[vote_ix], Some(&node_keypair.pubkey()));
if !has_new_vote_been_rooted {
vote_signatures.push(vote_tx.signatures[0]);
if vote_signatures.len() > MAX_VOTE_SIGNATURES {
vote_signatures.remove(0);
}
} else {
vote_signatures.clear();
}
let blockhash = bank.last_blockhash(); let blockhash = bank.last_blockhash();
vote_tx.partial_sign(&[node_keypair.as_ref()], blockhash); vote_tx.partial_sign(&[node_keypair.as_ref()], blockhash);
vote_tx.partial_sign(&[authorized_voter_keypair.as_ref()], blockhash); vote_tx.partial_sign(&[authorized_voter_keypair.as_ref()], blockhash);
@ -1866,6 +1896,8 @@ impl ReplayStage {
accounts_background_request_sender: &AbsRequestSender, accounts_background_request_sender: &AbsRequestSender,
highest_confirmed_root: Option<Slot>, highest_confirmed_root: Option<Slot>,
heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice,
has_new_vote_been_rooted: &mut bool,
voted_signatures: &mut Vec<Signature>,
) { ) {
bank_forks.write().unwrap().set_root( bank_forks.write().unwrap().set_root(
new_root, new_root,
@ -1873,6 +1905,18 @@ impl ReplayStage {
highest_confirmed_root, highest_confirmed_root,
); );
let r_bank_forks = bank_forks.read().unwrap(); let r_bank_forks = bank_forks.read().unwrap();
let new_root_bank = &r_bank_forks[new_root];
if !*has_new_vote_been_rooted {
for signature in voted_signatures.iter() {
if new_root_bank.get_signature_status(signature).is_some() {
*has_new_vote_been_rooted = true;
break;
}
}
if *has_new_vote_been_rooted {
std::mem::take(voted_signatures);
}
}
progress.handle_new_root(&r_bank_forks); progress.handle_new_root(&r_bank_forks);
heaviest_subtree_fork_choice.set_root(new_root); heaviest_subtree_fork_choice.set_root(new_root);
} }
@ -2280,6 +2324,8 @@ pub(crate) mod tests {
&AbsRequestSender::default(), &AbsRequestSender::default(),
None, None,
&mut heaviest_subtree_fork_choice, &mut heaviest_subtree_fork_choice,
&mut true,
&mut Vec::new(),
); );
assert_eq!(bank_forks.read().unwrap().root(), root); assert_eq!(bank_forks.read().unwrap().root(), root);
assert_eq!(progress.len(), 1); assert_eq!(progress.len(), 1);
@ -2324,6 +2370,8 @@ pub(crate) mod tests {
&AbsRequestSender::default(), &AbsRequestSender::default(),
Some(confirmed_root), Some(confirmed_root),
&mut heaviest_subtree_fork_choice, &mut heaviest_subtree_fork_choice,
&mut true,
&mut Vec::new(),
); );
assert_eq!(bank_forks.read().unwrap().root(), root); assert_eq!(bank_forks.read().unwrap().root(), root);
assert!(bank_forks.read().unwrap().get(confirmed_root).is_some()); assert!(bank_forks.read().unwrap().get(confirmed_root).is_some());

View File

@ -422,6 +422,7 @@ impl TestValidator {
warp_slot: config.warp_slot, warp_slot: config.warp_slot,
bpf_jit: !config.no_bpf_jit, bpf_jit: !config.no_bpf_jit,
validator_exit: config.validator_exit.clone(), validator_exit: config.validator_exit.clone(),
no_wait_for_vote_to_start_leader: true,
..ValidatorConfig::default() ..ValidatorConfig::default()
}; };

View File

@ -84,6 +84,7 @@ pub struct TvuConfig {
pub use_index_hash_calculation: bool, pub use_index_hash_calculation: bool,
pub rocksdb_compaction_interval: Option<u64>, pub rocksdb_compaction_interval: Option<u64>,
pub rocksdb_max_compaction_jitter: Option<u64>, pub rocksdb_max_compaction_jitter: Option<u64>,
pub wait_for_vote_to_start_leader: bool,
} }
impl Tvu { impl Tvu {
@ -254,6 +255,7 @@ impl Tvu {
rewards_recorder_sender, rewards_recorder_sender,
cache_block_time_sender, cache_block_time_sender,
bank_notification_sender, bank_notification_sender,
wait_for_vote_to_start_leader: tvu_config.wait_for_vote_to_start_leader,
}; };
let replay_stage = ReplayStage::new( let replay_stage = ReplayStage::new(

View File

@ -130,6 +130,7 @@ pub struct ValidatorConfig {
pub accounts_db_use_index_hash_calculation: bool, pub accounts_db_use_index_hash_calculation: bool,
pub tpu_coalesce_ms: u64, pub tpu_coalesce_ms: u64,
pub validator_exit: Arc<RwLock<ValidatorExit>>, pub validator_exit: Arc<RwLock<ValidatorExit>>,
pub no_wait_for_vote_to_start_leader: bool,
} }
impl Default for ValidatorConfig { impl Default for ValidatorConfig {
@ -184,6 +185,7 @@ impl Default for ValidatorConfig {
accounts_db_use_index_hash_calculation: true, accounts_db_use_index_hash_calculation: true,
tpu_coalesce_ms: DEFAULT_TPU_COALESCE_MS, tpu_coalesce_ms: DEFAULT_TPU_COALESCE_MS,
validator_exit: Arc::new(RwLock::new(ValidatorExit::default())), validator_exit: Arc::new(RwLock::new(ValidatorExit::default())),
no_wait_for_vote_to_start_leader: true,
} }
} }
} }
@ -628,15 +630,20 @@ impl Validator {
check_poh_speed(&genesis_config, None); check_poh_speed(&genesis_config, None);
} }
if wait_for_supermajority( let waited_for_supermajority = if let Ok(waited) = wait_for_supermajority(
config, config,
&bank, &bank,
&cluster_info, &cluster_info,
rpc_override_health_check, rpc_override_health_check,
&start_progress, &start_progress,
) { ) {
waited
} else {
abort(); abort();
} };
let wait_for_vote_to_start_leader =
!waited_for_supermajority && !config.no_wait_for_vote_to_start_leader;
let poh_service = PohService::new( let poh_service = PohService::new(
poh_recorder.clone(), poh_recorder.clone(),
@ -721,6 +728,7 @@ impl Validator {
use_index_hash_calculation: config.accounts_db_use_index_hash_calculation, use_index_hash_calculation: config.accounts_db_use_index_hash_calculation,
rocksdb_compaction_interval: config.rocksdb_compaction_interval, rocksdb_compaction_interval: config.rocksdb_compaction_interval,
rocksdb_max_compaction_jitter: config.rocksdb_compaction_interval, rocksdb_max_compaction_jitter: config.rocksdb_compaction_interval,
wait_for_vote_to_start_leader,
}, },
&max_slots, &max_slots,
); );
@ -1287,17 +1295,28 @@ fn initialize_rpc_transaction_history_services(
} }
} }
// Return true on error, indicating the validator should exit. #[derive(Debug, PartialEq)]
enum ValidatorError {
BadExpectedBankHash,
NotEnoughLedgerData,
}
// Return if the validator waited on other nodes to start. In this case
// it should not wait for one of it's votes to land to produce blocks
// because if the whole network is waiting, then it will stall.
//
// Error indicates that a bad hash was encountered or another condition
// that is unrecoverable and the validator should exit.
fn wait_for_supermajority( fn wait_for_supermajority(
config: &ValidatorConfig, config: &ValidatorConfig,
bank: &Bank, bank: &Bank,
cluster_info: &ClusterInfo, cluster_info: &ClusterInfo,
rpc_override_health_check: Arc<AtomicBool>, rpc_override_health_check: Arc<AtomicBool>,
start_progress: &Arc<RwLock<ValidatorStartProgress>>, start_progress: &Arc<RwLock<ValidatorStartProgress>>,
) -> bool { ) -> Result<bool, ValidatorError> {
if let Some(wait_for_supermajority) = config.wait_for_supermajority { if let Some(wait_for_supermajority) = config.wait_for_supermajority {
match wait_for_supermajority.cmp(&bank.slot()) { match wait_for_supermajority.cmp(&bank.slot()) {
std::cmp::Ordering::Less => return false, std::cmp::Ordering::Less => return Ok(false),
std::cmp::Ordering::Greater => { std::cmp::Ordering::Greater => {
error!( error!(
"Ledger does not have enough data to wait for supermajority, \ "Ledger does not have enough data to wait for supermajority, \
@ -1305,12 +1324,12 @@ fn wait_for_supermajority(
bank.slot(), bank.slot(),
wait_for_supermajority wait_for_supermajority
); );
return true; return Err(ValidatorError::NotEnoughLedgerData);
} }
_ => {} _ => {}
} }
} else { } else {
return false; return Ok(false);
} }
if let Some(expected_bank_hash) = config.expected_bank_hash { if let Some(expected_bank_hash) = config.expected_bank_hash {
@ -1320,7 +1339,7 @@ fn wait_for_supermajority(
bank.hash(), bank.hash(),
expected_bank_hash expected_bank_hash
); );
return true; return Err(ValidatorError::BadExpectedBankHash);
} }
} }
@ -1345,7 +1364,7 @@ fn wait_for_supermajority(
sleep(Duration::new(1, 0)); sleep(Duration::new(1, 0));
} }
rpc_override_health_check.store(false, Ordering::Relaxed); rpc_override_health_check.store(false, Ordering::Relaxed);
false Ok(true)
} }
fn report_target_features() { fn report_target_features() {
@ -1636,17 +1655,21 @@ mod tests {
&cluster_info, &cluster_info,
rpc_override_health_check.clone(), rpc_override_health_check.clone(),
&start_progress, &start_progress,
)); )
.unwrap());
// bank=0, wait=1, should fail // bank=0, wait=1, should fail
config.wait_for_supermajority = Some(1); config.wait_for_supermajority = Some(1);
assert!(wait_for_supermajority( assert_eq!(
&config, wait_for_supermajority(
&bank, &config,
&cluster_info, &bank,
rpc_override_health_check.clone(), &cluster_info,
&start_progress, rpc_override_health_check.clone(),
)); &start_progress,
),
Err(ValidatorError::NotEnoughLedgerData)
);
// bank=1, wait=0, should pass, bank is past the wait slot // bank=1, wait=0, should pass, bank is past the wait slot
let bank = Bank::new_from_parent(&bank, &Pubkey::default(), 1); let bank = Bank::new_from_parent(&bank, &Pubkey::default(), 1);
@ -1657,18 +1680,22 @@ mod tests {
&cluster_info, &cluster_info,
rpc_override_health_check.clone(), rpc_override_health_check.clone(),
&start_progress, &start_progress,
)); )
.unwrap());
// bank=1, wait=1, equal, but bad hash provided // bank=1, wait=1, equal, but bad hash provided
config.wait_for_supermajority = Some(1); config.wait_for_supermajority = Some(1);
config.expected_bank_hash = Some(hash(&[1])); config.expected_bank_hash = Some(hash(&[1]));
assert!(wait_for_supermajority( assert_eq!(
&config, wait_for_supermajority(
&bank, &config,
&cluster_info, &bank,
rpc_override_health_check, &cluster_info,
&start_progress, rpc_override_health_check,
)); &start_progress,
),
Err(ValidatorError::BadExpectedBankHash)
);
} }
#[test] #[test]

View File

@ -53,6 +53,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
tpu_coalesce_ms: config.tpu_coalesce_ms, tpu_coalesce_ms: config.tpu_coalesce_ms,
validator_exit: Arc::new(RwLock::new(ValidatorExit::default())), validator_exit: Arc::new(RwLock::new(ValidatorExit::default())),
poh_hashes_per_batch: config.poh_hashes_per_batch, poh_hashes_per_batch: config.poh_hashes_per_batch,
no_wait_for_vote_to_start_leader: config.no_wait_for_vote_to_start_leader,
} }
} }

View File

@ -105,6 +105,7 @@ args+=(
--vote-account "$vote_account" --vote-account "$vote_account"
--rpc-faucet-address 127.0.0.1:9900 --rpc-faucet-address 127.0.0.1:9900
--no-poh-speed-test --no-poh-speed-test
--no-wait-for-vote-to-start-leader
) )
default_arg --gossip-port 8001 default_arg --gossip-port 8001
default_arg --log - default_arg --log -

1
run.sh
View File

@ -105,6 +105,7 @@ args=(
--init-complete-file "$dataDir"/init-completed --init-complete-file "$dataDir"/init-completed
--snapshot-compression none --snapshot-compression none
--require-tower --require-tower
--no-wait-for-vote-to-start-leader
) )
# shellcheck disable=SC2086 # shellcheck disable=SC2086
solana-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS & solana-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS &

View File

@ -1368,6 +1368,14 @@ pub fn main() {
.help("After processing the ledger and the next slot is SLOT, wait until a \ .help("After processing the ledger and the next slot is SLOT, wait until a \
supermajority of stake is visible on gossip before starting PoH"), supermajority of stake is visible on gossip before starting PoH"),
) )
.arg(
Arg::with_name("no_wait_for_vote_to_start_leader")
.hidden(true)
.long("no-wait-for-vote-to-start-leader")
.help("If the validator starts up with no ledger, it will wait to start block
production until it sees a vote land in a rooted slot. This prevents
double signing. Turn off to risk double signing a block."),
)
.arg( .arg(
Arg::with_name("hard_forks") Arg::with_name("hard_forks")
.long("hard-fork") .long("hard-fork")
@ -1997,6 +2005,7 @@ pub fn main() {
accounts_db_test_hash_calculation: matches.is_present("accounts_db_test_hash_calculation"), accounts_db_test_hash_calculation: matches.is_present("accounts_db_test_hash_calculation"),
accounts_db_use_index_hash_calculation: matches.is_present("accounts_db_index_hashing"), accounts_db_use_index_hash_calculation: matches.is_present("accounts_db_index_hashing"),
tpu_coalesce_ms, tpu_coalesce_ms,
no_wait_for_vote_to_start_leader: matches.is_present("no_wait_for_vote_to_start_leader"),
..ValidatorConfig::default() ..ValidatorConfig::default()
}; };