diff --git a/book/src/implemented-proposals/tower-bft.md b/book/src/implemented-proposals/tower-bft.md index 8acf2f12ba..d991a75c9e 100644 --- a/book/src/implemented-proposals/tower-bft.md +++ b/book/src/implemented-proposals/tower-bft.md @@ -2,7 +2,7 @@ This design describes Solana's _Tower BFT_ algorithm. It addresses the following problems: -* Some forks may not end up accepted by the super-majority of the cluster, and voters need to recover from voting on such forks. +* Some forks may not end up accepted by the supermajority of the cluster, and voters need to recover from voting on such forks. * Many forks may be votable by different voters, and each voter may see a different set of votable forks. The selected forks should eventually converge for the cluster. * Reward based votes have an associated risk. Voters should have the ability to configure how much risk they take on. * The [cost of rollback](tower-bft.md#cost-of-rollback) needs to be computable. It is important to clients that rely on some measurable form of Consistency. The costs to break consistency need to be computable, and increase super-linearly for older votes. diff --git a/book/src/proposals/staking-rewards.md b/book/src/proposals/staking-rewards.md index 4ce69cce11..d6465f6985 100644 --- a/book/src/proposals/staking-rewards.md +++ b/book/src/proposals/staking-rewards.md @@ -91,5 +91,5 @@ This is an area currently under exploration As discussed in the [Economic Design](../implemented-proposals/ed_overview/) section, annual validator interest rates are to be specified as a function of total percentage of circulating supply that has been staked. The cluster rewards validators who are online and actively participating in the validation process throughout the entirety of their _validation period_. For validators that go offline/fail to validate transactions during this period, their annual reward is effectively reduced. -Similarly, we may consider an algorithmic reduction in a validator's active amount staked amount in the case that they are offline. I.e. if a validator is inactive for some amount of time, either due to a partition or otherwise, the amount of their stake that is considered ‘active’ \(eligible to earn rewards\) may be reduced. This design would be structured to help long-lived partitions to eventually reach finality on their respective chains as the % of non-voting total stake is reduced over time until a super-majority can be achieved by the active validators in each partition. Similarly, upon re-engaging, the ‘active’ amount staked will come back online at some defined rate. Different rates of stake reduction may be considered depending on the size of the partition/active set. +Similarly, we may consider an algorithmic reduction in a validator's active amount staked amount in the case that they are offline. I.e. if a validator is inactive for some amount of time, either due to a partition or otherwise, the amount of their stake that is considered ‘active’ \(eligible to earn rewards\) may be reduced. This design would be structured to help long-lived partitions to eventually reach finality on their respective chains as the % of non-voting total stake is reduced over time until a supermajority can be achieved by the active validators in each partition. Similarly, upon re-engaging, the ‘active’ amount staked will come back online at some defined rate. Different rates of stake reduction may be considered depending on the size of the partition/active set. diff --git a/core/src/validator.rs b/core/src/validator.rs index af504ebfa1..70b7e0d428 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -48,7 +48,8 @@ use std::{ sync::atomic::{AtomicBool, Ordering}, sync::mpsc::Receiver, sync::{Arc, Mutex, RwLock}, - thread::Result, + thread::{sleep, Result}, + time::Duration, }; #[derive(Clone, Debug)] @@ -67,6 +68,7 @@ pub struct ValidatorConfig { pub broadcast_stage_type: BroadcastStageType, pub partition_cfg: Option, pub fixed_leader_schedule: Option, + pub wait_for_supermajority: bool, } impl Default for ValidatorConfig { @@ -86,6 +88,7 @@ impl Default for ValidatorConfig { broadcast_stage_type: BroadcastStageType::Standard, partition_cfg: None, fixed_leader_schedule: None, + wait_for_supermajority: false, } } } @@ -138,27 +141,7 @@ impl Validator { warn!("identity pubkey: {:?}", id); warn!("vote pubkey: {:?}", vote_account); - warn!( - "CUDA is {}abled", - if solana_perf::perf_libs::api().is_some() { - "en" - } else { - "dis" - } - ); - - // Validator binaries built on a machine with AVX support will generate invalid opcodes - // when run on machines without AVX causing a non-obvious process abort. Instead detect - // the mismatch and error cleanly. - #[target_feature(enable = "avx")] - { - if is_x86_feature_detected!("avx") { - info!("AVX detected"); - } else { - error!("Your machine does not have AVX support, please rebuild from source on your machine"); - process::exit(1); - } - } + report_target_features(); info!("entrypoint: {:?}", entrypoint_info_option); @@ -293,14 +276,7 @@ impl Validator { if config.snapshot_config.is_some() { poh_recorder.set_bank(&bank); } - let poh_recorder = Arc::new(Mutex::new(poh_recorder)); - let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit); - assert_eq!( - blocktree.new_shreds_signals.len(), - 1, - "New shred signal for the TVU should be the same as the clear bank signal." - ); let ip_echo_server = solana_net_utils::ip_echo_server(node.sockets.ip_echo.unwrap()); @@ -321,6 +297,22 @@ impl Validator { .set_entrypoint(entrypoint_info.clone()); } + if config.wait_for_supermajority { + info!( + "Waiting more than 66% of activated stake at slot {} to be in gossip...", + bank.slot() + ); + loop { + let gossip_stake_percent = get_stake_percent_in_gossip(&bank, &cluster_info); + + info!("{}% of activated stake in gossip", gossip_stake_percent,); + if gossip_stake_percent > 66 { + break; + } + sleep(Duration::new(1, 0)); + } + } + let sockets = Sockets { repair: node .sockets @@ -353,6 +345,13 @@ impl Validator { Some(voting_keypair) }; + let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit); + assert_eq!( + blocktree.new_shreds_signals.len(), + 1, + "New shred signal for the TVU should be the same as the clear bank signal." + ); + let tvu = Tvu::new( vote_account, voting_keypair, @@ -579,6 +578,59 @@ pub fn new_validator_for_tests() -> (Validator, ContactInfo, Keypair, PathBuf) { (node, contact_info, mint_keypair, ledger_path) } +fn report_target_features() { + warn!( + "CUDA is {}abled", + if solana_perf::perf_libs::api().is_some() { + "en" + } else { + "dis" + } + ); + + // Validator binaries built on a machine with AVX support will generate invalid opcodes + // when run on machines without AVX causing a non-obvious process abort. Instead detect + // the mismatch and error cleanly. + #[target_feature(enable = "avx")] + { + if is_x86_feature_detected!("avx") { + info!("AVX detected"); + } else { + error!("Your machine does not have AVX support, please rebuild from source on your machine"); + process::exit(1); + } + } +} + +// Get the activated stake percentage (based on the provided bank) that is visible in gossip +fn get_stake_percent_in_gossip( + bank: &Arc, + cluster_info: &Arc>, +) -> u64 { + let mut gossip_stake = 0; + let mut total_activated_stake = 0; + let tvu_peers = cluster_info.read().unwrap().tvu_peers(); + + for (activated_stake, vote_account) in bank.vote_accounts().values() { + let vote_state = + solana_vote_program::vote_state::VoteState::from(&vote_account).unwrap_or_default(); + total_activated_stake += activated_stake; + if tvu_peers + .iter() + .any(|peer| peer.id == vote_state.node_pubkey) + { + trace!( + "observed {} in gossip, (activated_stake={})", + vote_state.node_pubkey, + activated_stake + ); + gossip_stake += activated_stake; + } + } + + gossip_stake * 100 / total_activated_stake +} + #[cfg(test)] mod tests { use super::*; diff --git a/validator/src/main.rs b/validator/src/main.rs index 59b1ee3f9f..86fc793d0e 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -537,6 +537,12 @@ pub fn main() { .takes_value(true) .help("Redirect logging to the specified file, '-' for standard error"), ) + .arg( + Arg::with_name("wait_for_supermajority") + .long("wait-for-supermajority") + .takes_value(false) + .help("After processing the ledger, wait until a supermajority of stake is visible on gossip before starting PoH"), + ) .get_matches(); let identity_keypair = Arc::new( @@ -582,6 +588,7 @@ pub fn main() { validator_config.dev_halt_at_slot = value_t!(matches, "dev_halt_at_slot", Slot).ok(); validator_config.rpc_config.enable_validator_exit = matches.is_present("enable_rpc_exit"); + validator_config.wait_for_supermajority = matches.is_present("wait_for_supermajority"); validator_config.rpc_config.drone_addr = matches.value_of("rpc_drone_addr").map(|address| { solana_net_utils::parse_host_port(address).expect("failed to parse drone address")