Implement QUIC connection warmup service for future leaders (#24054)

* Increase connection timeouts

* Bump quic connection cache to 1024

* Use constant for quic connection timeout and add warm cache service

* Fixes to QUIC warmup service

* fix check failure

* fixes after rebase

* fix timeout test

Co-authored-by: Pankaj Garg <pankaj@solana.com>
This commit is contained in:
sakridge
2022-04-15 21:09:24 +02:00
committed by GitHub
parent 43d3f049e9
commit 1b7d1f78de
7 changed files with 153 additions and 11 deletions

View File

@ -73,6 +73,7 @@ pub mod verified_vote_packets;
pub mod vote_simulator;
pub mod vote_stake_tracker;
pub mod voting_service;
pub mod warm_quic_cache_service;
pub mod window_service;
#[macro_use]

View File

@ -25,6 +25,7 @@ use {
sigverify_stage::SigVerifyStage,
tower_storage::TowerStorage,
voting_service::VotingService,
warm_quic_cache_service::WarmQuicCacheService,
},
crossbeam_channel::{bounded, unbounded, Receiver, RecvTimeoutError},
solana_geyser_plugin_manager::block_metadata_notifier_interface::BlockMetadataNotifierLock,
@ -78,6 +79,7 @@ pub struct Tvu {
accounts_hash_verifier: AccountsHashVerifier,
cost_update_service: CostUpdateService,
voting_service: VotingService,
warm_quic_cache_service: WarmQuicCacheService,
drop_bank_service: DropBankService,
transaction_cost_metrics_service: TransactionCostMetricsService,
}
@ -283,6 +285,9 @@ impl Tvu {
bank_forks.clone(),
);
let warm_quic_cache_service =
WarmQuicCacheService::new(cluster_info.clone(), poh_recorder.clone(), exit.clone());
let (cost_update_sender, cost_update_receiver) = unbounded();
let cost_update_service =
CostUpdateService::new(blockstore.clone(), cost_model.clone(), cost_update_receiver);
@ -356,6 +361,7 @@ impl Tvu {
accounts_hash_verifier,
cost_update_service,
voting_service,
warm_quic_cache_service,
drop_bank_service,
transaction_cost_metrics_service,
}
@ -390,6 +396,7 @@ impl Tvu {
self.accounts_hash_verifier.join()?;
self.cost_update_service.join()?;
self.voting_service.join()?;
self.warm_quic_cache_service.join()?;
self.drop_bank_service.join()?;
self.transaction_cost_metrics_service.join()?;
Ok(())

View File

@ -0,0 +1,70 @@
// Connect to future leaders with some jitter so the quic connection is warm
// by the time we need it.
use {
rand::{thread_rng, Rng},
solana_client::connection_cache::send_wire_transaction,
solana_gossip::cluster_info::ClusterInfo,
solana_poh::poh_recorder::PohRecorder,
std::{
sync::{
atomic::{AtomicBool, Ordering},
Arc, Mutex,
},
thread::{self, sleep, Builder, JoinHandle},
time::Duration,
},
};
pub struct WarmQuicCacheService {
thread_hdl: JoinHandle<()>,
}
// ~50 seconds
const CACHE_OFFSET_SLOT: i64 = 100;
const CACHE_JITTER_SLOT: i64 = 20;
impl WarmQuicCacheService {
pub fn new(
cluster_info: Arc<ClusterInfo>,
poh_recorder: Arc<Mutex<PohRecorder>>,
exit: Arc<AtomicBool>,
) -> Self {
let thread_hdl = Builder::new()
.name("sol-warm-quic-service".to_string())
.spawn(move || {
let slot_jitter = thread_rng().gen_range(-CACHE_JITTER_SLOT, CACHE_JITTER_SLOT);
let mut maybe_last_leader = None;
while !exit.load(Ordering::Relaxed) {
if let Some(leader_pubkey) = poh_recorder
.lock()
.unwrap()
.leader_after_n_slots((CACHE_OFFSET_SLOT + slot_jitter) as u64)
{
if maybe_last_leader
.map_or(true, |last_leader| last_leader != leader_pubkey)
{
maybe_last_leader = Some(leader_pubkey);
if let Some(addr) = cluster_info
.lookup_contact_info(&leader_pubkey, |leader| leader.tpu)
{
if let Err(err) = send_wire_transaction(&[0u8], &addr) {
warn!(
"Failed to warmup QUIC connection to the leader {:?}, Error {:?}",
leader_pubkey, err
);
}
}
}
}
sleep(Duration::from_millis(200));
}
})
.unwrap();
Self { thread_hdl }
}
pub fn join(self) -> thread::Result<()> {
self.thread_hdl.join()
}
}