filters for recent contact-infos when checking for live stake (#19204)
Contact-infos are saved to disk: https://github.com/solana-labs/solana/blob/9dfeee299/gossip/src/cluster_info.rs#L1678-L1683 and restored on validator start-up: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L450 Staked nodes entries will not expire until an epoch after. So when the validator checks for online stake it is erroneously picking up contact-infos restored from disk, which breaks the entire wait-for-supermajority logic: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L1515-L1561 This commit adds an extra check for the age of contact-info entries and filters out old ones.
This commit is contained in:
		| @@ -1,6 +1,7 @@ | |||||||
| //! The `validator` module hosts all the validator microservices. | //! The `validator` module hosts all the validator microservices. | ||||||
|  |  | ||||||
| use crate::{ | use { | ||||||
|  |     crate::{ | ||||||
|         broadcast_stage::BroadcastStageType, |         broadcast_stage::BroadcastStageType, | ||||||
|         cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService}, |         cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService}, | ||||||
|         cluster_info_vote_listener::VoteTracker, |         cluster_info_vote_listener::VoteTracker, | ||||||
| @@ -16,33 +17,34 @@ use crate::{ | |||||||
|         tower_storage::TowerStorage, |         tower_storage::TowerStorage, | ||||||
|         tpu::{Tpu, DEFAULT_TPU_COALESCE_MS}, |         tpu::{Tpu, DEFAULT_TPU_COALESCE_MS}, | ||||||
|         tvu::{Sockets, Tvu, TvuConfig}, |         tvu::{Sockets, Tvu, TvuConfig}, | ||||||
| }; |     }, | ||||||
| use crossbeam_channel::{bounded, unbounded}; |     crossbeam_channel::{bounded, unbounded}, | ||||||
| use rand::{thread_rng, Rng}; |     rand::{thread_rng, Rng}, | ||||||
| use solana_entry::poh::compute_hash_time_ns; |     solana_entry::poh::compute_hash_time_ns, | ||||||
| use solana_gossip::{ |     solana_gossip::{ | ||||||
|         cluster_info::{ |         cluster_info::{ | ||||||
|             ClusterInfo, Node, DEFAULT_CONTACT_DEBUG_INTERVAL_MILLIS, |             ClusterInfo, Node, DEFAULT_CONTACT_DEBUG_INTERVAL_MILLIS, | ||||||
|             DEFAULT_CONTACT_SAVE_INTERVAL_MILLIS, |             DEFAULT_CONTACT_SAVE_INTERVAL_MILLIS, | ||||||
|         }, |         }, | ||||||
|         contact_info::ContactInfo, |         contact_info::ContactInfo, | ||||||
|  |         crds_gossip_pull::CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, | ||||||
|         gossip_service::GossipService, |         gossip_service::GossipService, | ||||||
| }; |     }, | ||||||
| use solana_ledger::{ |     solana_ledger::{ | ||||||
|         bank_forks_utils, |         bank_forks_utils, | ||||||
|         blockstore::{Blockstore, BlockstoreSignals, CompletedSlotsReceiver, PurgeType}, |         blockstore::{Blockstore, BlockstoreSignals, CompletedSlotsReceiver, PurgeType}, | ||||||
|         blockstore_db::BlockstoreRecoveryMode, |         blockstore_db::BlockstoreRecoveryMode, | ||||||
|         blockstore_processor::{self, TransactionStatusSender}, |         blockstore_processor::{self, TransactionStatusSender}, | ||||||
|         leader_schedule::FixedSchedule, |         leader_schedule::FixedSchedule, | ||||||
|         leader_schedule_cache::LeaderScheduleCache, |         leader_schedule_cache::LeaderScheduleCache, | ||||||
| }; |     }, | ||||||
| use solana_measure::measure::Measure; |     solana_measure::measure::Measure, | ||||||
| use solana_metrics::datapoint_info; |     solana_metrics::datapoint_info, | ||||||
| use solana_poh::{ |     solana_poh::{ | ||||||
|         poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS}, |         poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS}, | ||||||
|         poh_service::{self, PohService}, |         poh_service::{self, PohService}, | ||||||
| }; |     }, | ||||||
| use solana_rpc::{ |     solana_rpc::{ | ||||||
|         max_slots::MaxSlots, |         max_slots::MaxSlots, | ||||||
|         optimistically_confirmed_bank_tracker::{ |         optimistically_confirmed_bank_tracker::{ | ||||||
|             OptimisticallyConfirmedBank, OptimisticallyConfirmedBankTracker, |             OptimisticallyConfirmedBank, OptimisticallyConfirmedBankTracker, | ||||||
| @@ -53,8 +55,8 @@ use solana_rpc::{ | |||||||
|         rpc_service::JsonRpcService, |         rpc_service::JsonRpcService, | ||||||
|         rpc_subscriptions::RpcSubscriptions, |         rpc_subscriptions::RpcSubscriptions, | ||||||
|         transaction_status_service::TransactionStatusService, |         transaction_status_service::TransactionStatusService, | ||||||
| }; |     }, | ||||||
| use solana_runtime::{ |     solana_runtime::{ | ||||||
|         accounts_db::AccountShrinkThreshold, |         accounts_db::AccountShrinkThreshold, | ||||||
|         accounts_index::AccountSecondaryIndexes, |         accounts_index::AccountSecondaryIndexes, | ||||||
|         bank::Bank, |         bank::Bank, | ||||||
| @@ -64,8 +66,8 @@ use solana_runtime::{ | |||||||
|         snapshot_archive_info::SnapshotArchiveInfoGetter, |         snapshot_archive_info::SnapshotArchiveInfoGetter, | ||||||
|         snapshot_config::SnapshotConfig, |         snapshot_config::SnapshotConfig, | ||||||
|         snapshot_utils, |         snapshot_utils, | ||||||
| }; |     }, | ||||||
| use solana_sdk::{ |     solana_sdk::{ | ||||||
|         clock::Slot, |         clock::Slot, | ||||||
|         epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET, |         epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET, | ||||||
|         exit::Exit, |         exit::Exit, | ||||||
| @@ -75,19 +77,22 @@ use solana_sdk::{ | |||||||
|         shred_version::compute_shred_version, |         shred_version::compute_shred_version, | ||||||
|         signature::{Keypair, Signer}, |         signature::{Keypair, Signer}, | ||||||
|         timing::timestamp, |         timing::timestamp, | ||||||
| }; |     }, | ||||||
| use solana_streamer::socket::SocketAddrSpace; |     solana_streamer::socket::SocketAddrSpace, | ||||||
| use solana_vote_program::vote_state::VoteState; |     solana_vote_program::vote_state::VoteState, | ||||||
| use std::{ |     std::{ | ||||||
|     collections::HashSet, |         collections::{HashMap, HashSet}, | ||||||
|         net::SocketAddr, |         net::SocketAddr, | ||||||
|         ops::Deref, |         ops::Deref, | ||||||
|         path::{Path, PathBuf}, |         path::{Path, PathBuf}, | ||||||
|     sync::atomic::{AtomicBool, AtomicU64, Ordering}, |         sync::{ | ||||||
|     sync::mpsc::Receiver, |             atomic::{AtomicBool, AtomicU64, Ordering}, | ||||||
|     sync::{Arc, Mutex, RwLock}, |             mpsc::Receiver, | ||||||
|  |             Arc, Mutex, RwLock, | ||||||
|  |         }, | ||||||
|         thread::{sleep, Builder, JoinHandle}, |         thread::{sleep, Builder, JoinHandle}, | ||||||
|         time::{Duration, Instant}, |         time::{Duration, Instant}, | ||||||
|  |     }, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000; | const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000; | ||||||
| @@ -1521,7 +1526,20 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo | |||||||
|     let mut offline_nodes = vec![]; |     let mut offline_nodes = vec![]; | ||||||
|  |  | ||||||
|     let mut total_activated_stake = 0; |     let mut total_activated_stake = 0; | ||||||
|     let all_tvu_peers = cluster_info.all_tvu_peers(); |     let now = timestamp(); | ||||||
|  |     // Nodes contact infos are saved to disk and restored on validator startup. | ||||||
|  |     // Staked nodes entries will not expire until an epoch after. So it | ||||||
|  |     // is necessary here to filter for recent entries to establish liveness. | ||||||
|  |     let peers: HashMap<_, _> = cluster_info | ||||||
|  |         .all_tvu_peers() | ||||||
|  |         .into_iter() | ||||||
|  |         .filter(|node| { | ||||||
|  |             let age = now.saturating_sub(node.wallclock); | ||||||
|  |             // Contact infos are refreshed twice during this period. | ||||||
|  |             age < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS | ||||||
|  |         }) | ||||||
|  |         .map(|node| (node.id, node)) | ||||||
|  |         .collect(); | ||||||
|     let my_shred_version = cluster_info.my_shred_version(); |     let my_shred_version = cluster_info.my_shred_version(); | ||||||
|     let my_id = cluster_info.id(); |     let my_id = cluster_info.id(); | ||||||
|  |  | ||||||
| @@ -1537,10 +1555,7 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo | |||||||
|             .map(|vote_state| vote_state.node_pubkey) |             .map(|vote_state| vote_state.node_pubkey) | ||||||
|             .unwrap_or_default(); |             .unwrap_or_default(); | ||||||
|  |  | ||||||
|         if let Some(peer) = all_tvu_peers |         if let Some(peer) = peers.get(&vote_state_node_pubkey) { | ||||||
|             .iter() |  | ||||||
|             .find(|peer| peer.id == vote_state_node_pubkey) |  | ||||||
|         { |  | ||||||
|             if peer.shred_version == my_shred_version { |             if peer.shred_version == my_shred_version { | ||||||
|                 trace!( |                 trace!( | ||||||
|                     "observed {} in gossip, (activated_stake={})", |                     "observed {} in gossip, (activated_stake={})", | ||||||
|   | |||||||
| @@ -265,7 +265,7 @@ fn spy( | |||||||
|             .into_iter() |             .into_iter() | ||||||
|             .map(|x| x.0) |             .map(|x| x.0) | ||||||
|             .collect::<Vec<_>>(); |             .collect::<Vec<_>>(); | ||||||
|         tvu_peers = spy_ref.all_tvu_peers().into_iter().collect::<Vec<_>>(); |         tvu_peers = spy_ref.all_tvu_peers(); | ||||||
|  |  | ||||||
|         let found_node_by_pubkey = if let Some(pubkey) = find_node_by_pubkey { |         let found_node_by_pubkey = if let Some(pubkey) = find_node_by_pubkey { | ||||||
|             all_peers.iter().any(|x| x.id == pubkey) |             all_peers.iter().any(|x| x.id == pubkey) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user