filters for recent contact-infos when checking for live stake (#19204)
Contact-infos are saved to disk: https://github.com/solana-labs/solana/blob/9dfeee299/gossip/src/cluster_info.rs#L1678-L1683 and restored on validator start-up: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L450 Staked nodes entries will not expire until an epoch after. So when the validator checks for online stake it is erroneously picking up contact-infos restored from disk, which breaks the entire wait-for-supermajority logic: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L1515-L1561 This commit adds an extra check for the age of contact-info entries and filters out old ones.
This commit is contained in:
		@@ -1,93 +1,98 @@
 | 
			
		||||
//! The `validator` module hosts all the validator microservices.
 | 
			
		||||
 | 
			
		||||
use crate::{
 | 
			
		||||
    broadcast_stage::BroadcastStageType,
 | 
			
		||||
    cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService},
 | 
			
		||||
    cluster_info_vote_listener::VoteTracker,
 | 
			
		||||
    completed_data_sets_service::CompletedDataSetsService,
 | 
			
		||||
    consensus::{reconcile_blockstore_roots_with_tower, Tower},
 | 
			
		||||
    cost_model::CostModel,
 | 
			
		||||
    rewards_recorder_service::{RewardsRecorderSender, RewardsRecorderService},
 | 
			
		||||
    sample_performance_service::SamplePerformanceService,
 | 
			
		||||
    serve_repair::ServeRepair,
 | 
			
		||||
    serve_repair_service::ServeRepairService,
 | 
			
		||||
    sigverify,
 | 
			
		||||
    snapshot_packager_service::{PendingSnapshotPackage, SnapshotPackagerService},
 | 
			
		||||
    tower_storage::TowerStorage,
 | 
			
		||||
    tpu::{Tpu, DEFAULT_TPU_COALESCE_MS},
 | 
			
		||||
    tvu::{Sockets, Tvu, TvuConfig},
 | 
			
		||||
};
 | 
			
		||||
use crossbeam_channel::{bounded, unbounded};
 | 
			
		||||
use rand::{thread_rng, Rng};
 | 
			
		||||
use solana_entry::poh::compute_hash_time_ns;
 | 
			
		||||
use solana_gossip::{
 | 
			
		||||
    cluster_info::{
 | 
			
		||||
        ClusterInfo, Node, DEFAULT_CONTACT_DEBUG_INTERVAL_MILLIS,
 | 
			
		||||
        DEFAULT_CONTACT_SAVE_INTERVAL_MILLIS,
 | 
			
		||||
use {
 | 
			
		||||
    crate::{
 | 
			
		||||
        broadcast_stage::BroadcastStageType,
 | 
			
		||||
        cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService},
 | 
			
		||||
        cluster_info_vote_listener::VoteTracker,
 | 
			
		||||
        completed_data_sets_service::CompletedDataSetsService,
 | 
			
		||||
        consensus::{reconcile_blockstore_roots_with_tower, Tower},
 | 
			
		||||
        cost_model::CostModel,
 | 
			
		||||
        rewards_recorder_service::{RewardsRecorderSender, RewardsRecorderService},
 | 
			
		||||
        sample_performance_service::SamplePerformanceService,
 | 
			
		||||
        serve_repair::ServeRepair,
 | 
			
		||||
        serve_repair_service::ServeRepairService,
 | 
			
		||||
        sigverify,
 | 
			
		||||
        snapshot_packager_service::{PendingSnapshotPackage, SnapshotPackagerService},
 | 
			
		||||
        tower_storage::TowerStorage,
 | 
			
		||||
        tpu::{Tpu, DEFAULT_TPU_COALESCE_MS},
 | 
			
		||||
        tvu::{Sockets, Tvu, TvuConfig},
 | 
			
		||||
    },
 | 
			
		||||
    contact_info::ContactInfo,
 | 
			
		||||
    gossip_service::GossipService,
 | 
			
		||||
};
 | 
			
		||||
use solana_ledger::{
 | 
			
		||||
    bank_forks_utils,
 | 
			
		||||
    blockstore::{Blockstore, BlockstoreSignals, CompletedSlotsReceiver, PurgeType},
 | 
			
		||||
    blockstore_db::BlockstoreRecoveryMode,
 | 
			
		||||
    blockstore_processor::{self, TransactionStatusSender},
 | 
			
		||||
    leader_schedule::FixedSchedule,
 | 
			
		||||
    leader_schedule_cache::LeaderScheduleCache,
 | 
			
		||||
};
 | 
			
		||||
use solana_measure::measure::Measure;
 | 
			
		||||
use solana_metrics::datapoint_info;
 | 
			
		||||
use solana_poh::{
 | 
			
		||||
    poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS},
 | 
			
		||||
    poh_service::{self, PohService},
 | 
			
		||||
};
 | 
			
		||||
use solana_rpc::{
 | 
			
		||||
    max_slots::MaxSlots,
 | 
			
		||||
    optimistically_confirmed_bank_tracker::{
 | 
			
		||||
        OptimisticallyConfirmedBank, OptimisticallyConfirmedBankTracker,
 | 
			
		||||
    crossbeam_channel::{bounded, unbounded},
 | 
			
		||||
    rand::{thread_rng, Rng},
 | 
			
		||||
    solana_entry::poh::compute_hash_time_ns,
 | 
			
		||||
    solana_gossip::{
 | 
			
		||||
        cluster_info::{
 | 
			
		||||
            ClusterInfo, Node, DEFAULT_CONTACT_DEBUG_INTERVAL_MILLIS,
 | 
			
		||||
            DEFAULT_CONTACT_SAVE_INTERVAL_MILLIS,
 | 
			
		||||
        },
 | 
			
		||||
        contact_info::ContactInfo,
 | 
			
		||||
        crds_gossip_pull::CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS,
 | 
			
		||||
        gossip_service::GossipService,
 | 
			
		||||
    },
 | 
			
		||||
    solana_ledger::{
 | 
			
		||||
        bank_forks_utils,
 | 
			
		||||
        blockstore::{Blockstore, BlockstoreSignals, CompletedSlotsReceiver, PurgeType},
 | 
			
		||||
        blockstore_db::BlockstoreRecoveryMode,
 | 
			
		||||
        blockstore_processor::{self, TransactionStatusSender},
 | 
			
		||||
        leader_schedule::FixedSchedule,
 | 
			
		||||
        leader_schedule_cache::LeaderScheduleCache,
 | 
			
		||||
    },
 | 
			
		||||
    solana_measure::measure::Measure,
 | 
			
		||||
    solana_metrics::datapoint_info,
 | 
			
		||||
    solana_poh::{
 | 
			
		||||
        poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS},
 | 
			
		||||
        poh_service::{self, PohService},
 | 
			
		||||
    },
 | 
			
		||||
    solana_rpc::{
 | 
			
		||||
        max_slots::MaxSlots,
 | 
			
		||||
        optimistically_confirmed_bank_tracker::{
 | 
			
		||||
            OptimisticallyConfirmedBank, OptimisticallyConfirmedBankTracker,
 | 
			
		||||
        },
 | 
			
		||||
        rpc::JsonRpcConfig,
 | 
			
		||||
        rpc_completed_slots_service::RpcCompletedSlotsService,
 | 
			
		||||
        rpc_pubsub_service::{PubSubConfig, PubSubService},
 | 
			
		||||
        rpc_service::JsonRpcService,
 | 
			
		||||
        rpc_subscriptions::RpcSubscriptions,
 | 
			
		||||
        transaction_status_service::TransactionStatusService,
 | 
			
		||||
    },
 | 
			
		||||
    solana_runtime::{
 | 
			
		||||
        accounts_db::AccountShrinkThreshold,
 | 
			
		||||
        accounts_index::AccountSecondaryIndexes,
 | 
			
		||||
        bank::Bank,
 | 
			
		||||
        bank_forks::BankForks,
 | 
			
		||||
        commitment::BlockCommitmentCache,
 | 
			
		||||
        hardened_unpack::{open_genesis_config, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
 | 
			
		||||
        snapshot_archive_info::SnapshotArchiveInfoGetter,
 | 
			
		||||
        snapshot_config::SnapshotConfig,
 | 
			
		||||
        snapshot_utils,
 | 
			
		||||
    },
 | 
			
		||||
    solana_sdk::{
 | 
			
		||||
        clock::Slot,
 | 
			
		||||
        epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET,
 | 
			
		||||
        exit::Exit,
 | 
			
		||||
        genesis_config::GenesisConfig,
 | 
			
		||||
        hash::Hash,
 | 
			
		||||
        pubkey::Pubkey,
 | 
			
		||||
        shred_version::compute_shred_version,
 | 
			
		||||
        signature::{Keypair, Signer},
 | 
			
		||||
        timing::timestamp,
 | 
			
		||||
    },
 | 
			
		||||
    solana_streamer::socket::SocketAddrSpace,
 | 
			
		||||
    solana_vote_program::vote_state::VoteState,
 | 
			
		||||
    std::{
 | 
			
		||||
        collections::{HashMap, HashSet},
 | 
			
		||||
        net::SocketAddr,
 | 
			
		||||
        ops::Deref,
 | 
			
		||||
        path::{Path, PathBuf},
 | 
			
		||||
        sync::{
 | 
			
		||||
            atomic::{AtomicBool, AtomicU64, Ordering},
 | 
			
		||||
            mpsc::Receiver,
 | 
			
		||||
            Arc, Mutex, RwLock,
 | 
			
		||||
        },
 | 
			
		||||
        thread::{sleep, Builder, JoinHandle},
 | 
			
		||||
        time::{Duration, Instant},
 | 
			
		||||
    },
 | 
			
		||||
    rpc::JsonRpcConfig,
 | 
			
		||||
    rpc_completed_slots_service::RpcCompletedSlotsService,
 | 
			
		||||
    rpc_pubsub_service::{PubSubConfig, PubSubService},
 | 
			
		||||
    rpc_service::JsonRpcService,
 | 
			
		||||
    rpc_subscriptions::RpcSubscriptions,
 | 
			
		||||
    transaction_status_service::TransactionStatusService,
 | 
			
		||||
};
 | 
			
		||||
use solana_runtime::{
 | 
			
		||||
    accounts_db::AccountShrinkThreshold,
 | 
			
		||||
    accounts_index::AccountSecondaryIndexes,
 | 
			
		||||
    bank::Bank,
 | 
			
		||||
    bank_forks::BankForks,
 | 
			
		||||
    commitment::BlockCommitmentCache,
 | 
			
		||||
    hardened_unpack::{open_genesis_config, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
 | 
			
		||||
    snapshot_archive_info::SnapshotArchiveInfoGetter,
 | 
			
		||||
    snapshot_config::SnapshotConfig,
 | 
			
		||||
    snapshot_utils,
 | 
			
		||||
};
 | 
			
		||||
use solana_sdk::{
 | 
			
		||||
    clock::Slot,
 | 
			
		||||
    epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET,
 | 
			
		||||
    exit::Exit,
 | 
			
		||||
    genesis_config::GenesisConfig,
 | 
			
		||||
    hash::Hash,
 | 
			
		||||
    pubkey::Pubkey,
 | 
			
		||||
    shred_version::compute_shred_version,
 | 
			
		||||
    signature::{Keypair, Signer},
 | 
			
		||||
    timing::timestamp,
 | 
			
		||||
};
 | 
			
		||||
use solana_streamer::socket::SocketAddrSpace;
 | 
			
		||||
use solana_vote_program::vote_state::VoteState;
 | 
			
		||||
use std::{
 | 
			
		||||
    collections::HashSet,
 | 
			
		||||
    net::SocketAddr,
 | 
			
		||||
    ops::Deref,
 | 
			
		||||
    path::{Path, PathBuf},
 | 
			
		||||
    sync::atomic::{AtomicBool, AtomicU64, Ordering},
 | 
			
		||||
    sync::mpsc::Receiver,
 | 
			
		||||
    sync::{Arc, Mutex, RwLock},
 | 
			
		||||
    thread::{sleep, Builder, JoinHandle},
 | 
			
		||||
    time::{Duration, Instant},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000;
 | 
			
		||||
@@ -1521,7 +1526,20 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo
 | 
			
		||||
    let mut offline_nodes = vec![];
 | 
			
		||||
 | 
			
		||||
    let mut total_activated_stake = 0;
 | 
			
		||||
    let all_tvu_peers = cluster_info.all_tvu_peers();
 | 
			
		||||
    let now = timestamp();
 | 
			
		||||
    // Nodes contact infos are saved to disk and restored on validator startup.
 | 
			
		||||
    // Staked nodes entries will not expire until an epoch after. So it
 | 
			
		||||
    // is necessary here to filter for recent entries to establish liveness.
 | 
			
		||||
    let peers: HashMap<_, _> = cluster_info
 | 
			
		||||
        .all_tvu_peers()
 | 
			
		||||
        .into_iter()
 | 
			
		||||
        .filter(|node| {
 | 
			
		||||
            let age = now.saturating_sub(node.wallclock);
 | 
			
		||||
            // Contact infos are refreshed twice during this period.
 | 
			
		||||
            age < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS
 | 
			
		||||
        })
 | 
			
		||||
        .map(|node| (node.id, node))
 | 
			
		||||
        .collect();
 | 
			
		||||
    let my_shred_version = cluster_info.my_shred_version();
 | 
			
		||||
    let my_id = cluster_info.id();
 | 
			
		||||
 | 
			
		||||
@@ -1537,10 +1555,7 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo
 | 
			
		||||
            .map(|vote_state| vote_state.node_pubkey)
 | 
			
		||||
            .unwrap_or_default();
 | 
			
		||||
 | 
			
		||||
        if let Some(peer) = all_tvu_peers
 | 
			
		||||
            .iter()
 | 
			
		||||
            .find(|peer| peer.id == vote_state_node_pubkey)
 | 
			
		||||
        {
 | 
			
		||||
        if let Some(peer) = peers.get(&vote_state_node_pubkey) {
 | 
			
		||||
            if peer.shred_version == my_shred_version {
 | 
			
		||||
                trace!(
 | 
			
		||||
                    "observed {} in gossip, (activated_stake={})",
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user