Throttle PoH ticks by cumulative slot time (#16139)
* Throttle PoH ticks by cumulative slot time * respond to pr feedback * saturating sub * updated comment
This commit is contained in:
committed by
GitHub
parent
a1f1f573d5
commit
4f4cffbd03
@ -10,6 +10,7 @@
|
||||
//! For Entries:
|
||||
//! * recorded entry must be >= WorkingBank::min_tick_height && entry must be < WorkingBank::max_tick_height
|
||||
//!
|
||||
use crate::poh_service::PohService;
|
||||
use solana_ledger::blockstore::Blockstore;
|
||||
use solana_ledger::entry::Entry;
|
||||
use solana_ledger::leader_schedule_cache::LeaderScheduleCache;
|
||||
@ -151,6 +152,7 @@ pub struct PohRecorder {
|
||||
leader_schedule_cache: Arc<LeaderScheduleCache>,
|
||||
poh_config: Arc<PohConfig>,
|
||||
ticks_per_slot: u64,
|
||||
target_ns_per_tick: u64,
|
||||
record_lock_contention_us: u64,
|
||||
flush_cache_no_tick_us: u64,
|
||||
flush_cache_tick_us: u64,
|
||||
@ -158,6 +160,7 @@ pub struct PohRecorder {
|
||||
send_us: u64,
|
||||
tick_lock_contention_us: u64,
|
||||
tick_overhead_us: u64,
|
||||
total_sleep_us: u64,
|
||||
record_us: u64,
|
||||
ticks_from_record: u64,
|
||||
last_metric: Instant,
|
||||
@ -462,7 +465,17 @@ impl PohRecorder {
|
||||
|
||||
pub fn tick(&mut self) {
|
||||
let now = Instant::now();
|
||||
let poh_entry = self.poh.lock().unwrap().tick();
|
||||
let (poh_entry, target_time) = {
|
||||
let mut poh_l = self.poh.lock().unwrap();
|
||||
let poh_entry = poh_l.tick();
|
||||
let target_time = if poh_entry.is_some() {
|
||||
Some(poh_l.target_poh_time(self.target_ns_per_tick))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(poh_entry, target_time)
|
||||
};
|
||||
self.tick_lock_contention_us += timing::duration_as_us(&now.elapsed());
|
||||
let now = Instant::now();
|
||||
if let Some(poh_entry) = poh_entry {
|
||||
@ -485,6 +498,15 @@ impl PohRecorder {
|
||||
self.tick_cache.push((entry, self.tick_height));
|
||||
let _ = self.flush_cache(true);
|
||||
self.flush_cache_tick_us += timing::duration_as_us(&now.elapsed());
|
||||
let target_time = target_time.unwrap();
|
||||
// sleep is not accurate enough to get a predictable time.
|
||||
// Kernel can not schedule the thread for a while.
|
||||
let started_waiting = Instant::now();
|
||||
while Instant::now() < target_time {
|
||||
// TODO: a caller could possibly desire to reset or record while we're spinning here
|
||||
std::hint::spin_loop();
|
||||
}
|
||||
self.total_sleep_us += started_waiting.elapsed().as_nanos() as u64 / 1000;
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,6 +522,7 @@ impl PohRecorder {
|
||||
("prepare_send_us", self.prepare_send_us, i64),
|
||||
("send_us", self.send_us, i64),
|
||||
("ticks_from_record", self.ticks_from_record, i64),
|
||||
("total_sleep_us", self.total_sleep_us, i64),
|
||||
("tick_overhead", self.tick_overhead_us, i64),
|
||||
(
|
||||
"record_lock_contention",
|
||||
@ -511,6 +534,7 @@ impl PohRecorder {
|
||||
self.tick_lock_contention_us = 0;
|
||||
self.record_us = 0;
|
||||
self.tick_overhead_us = 0;
|
||||
self.total_sleep_us = 0;
|
||||
self.record_lock_contention_us = 0;
|
||||
self.flush_cache_no_tick_us = 0;
|
||||
self.flush_cache_tick_us = 0;
|
||||
@ -588,10 +612,18 @@ impl PohRecorder {
|
||||
leader_schedule_cache: &Arc<LeaderScheduleCache>,
|
||||
poh_config: &Arc<PohConfig>,
|
||||
) -> (Self, Receiver<WorkingBankEntry>, Receiver<Record>) {
|
||||
let poh = Arc::new(Mutex::new(Poh::new(
|
||||
let tick_number = 0;
|
||||
let poh = Arc::new(Mutex::new(Poh::new_with_slot_info(
|
||||
last_entry_hash,
|
||||
poh_config.hashes_per_tick,
|
||||
ticks_per_slot,
|
||||
tick_number,
|
||||
)));
|
||||
|
||||
let target_ns_per_tick = PohService::target_ns_per_tick(
|
||||
ticks_per_slot,
|
||||
poh_config.target_tick_duration.as_nanos() as u64,
|
||||
);
|
||||
let (sender, receiver) = channel();
|
||||
let (record_sender, record_receiver) = channel();
|
||||
let (leader_first_tick_height, leader_last_tick_height, grace_ticks) =
|
||||
@ -613,6 +645,7 @@ impl PohRecorder {
|
||||
blockstore: blockstore.clone(),
|
||||
leader_schedule_cache: leader_schedule_cache.clone(),
|
||||
ticks_per_slot,
|
||||
target_ns_per_tick,
|
||||
poh_config: poh_config.clone(),
|
||||
record_lock_contention_us: 0,
|
||||
flush_cache_tick_us: 0,
|
||||
@ -622,6 +655,7 @@ impl PohRecorder {
|
||||
tick_lock_contention_us: 0,
|
||||
record_us: 0,
|
||||
tick_overhead_us: 0,
|
||||
total_sleep_us: 0,
|
||||
ticks_from_record: 0,
|
||||
last_metric: Instant::now(),
|
||||
record_sender,
|
||||
|
@ -116,17 +116,9 @@ impl PohService {
|
||||
if let Some(cores) = core_affinity::get_core_ids() {
|
||||
core_affinity::set_for_current(cores[pinned_cpu_core]);
|
||||
}
|
||||
// Account for some extra time outside of PoH generation to account
|
||||
// for processing time outside PoH.
|
||||
let adjustment_per_tick = if ticks_per_slot > 0 {
|
||||
TARGET_SLOT_ADJUSTMENT_NS / ticks_per_slot
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Self::tick_producer(
|
||||
poh_recorder,
|
||||
&poh_exit_,
|
||||
poh_config.target_tick_duration.as_nanos() as u64 - adjustment_per_tick,
|
||||
ticks_per_slot,
|
||||
hashes_per_batch,
|
||||
record_receiver,
|
||||
@ -139,6 +131,17 @@ impl PohService {
|
||||
Self { tick_producer }
|
||||
}
|
||||
|
||||
pub fn target_ns_per_tick(ticks_per_slot: u64, target_tick_duration_ns: u64) -> u64 {
|
||||
// Account for some extra time outside of PoH generation to account
|
||||
// for processing time outside PoH.
|
||||
let adjustment_per_tick = if ticks_per_slot > 0 {
|
||||
TARGET_SLOT_ADJUSTMENT_NS / ticks_per_slot
|
||||
} else {
|
||||
0
|
||||
};
|
||||
target_tick_duration_ns.saturating_sub(adjustment_per_tick)
|
||||
}
|
||||
|
||||
fn sleepy_tick_producer(
|
||||
poh_recorder: Arc<Mutex<PohRecorder>>,
|
||||
poh_config: &PohConfig,
|
||||
@ -199,6 +202,7 @@ impl PohService {
|
||||
}
|
||||
}
|
||||
|
||||
// returns true if we need to tick
|
||||
fn record_or_hash(
|
||||
next_record: &mut Option<Record>,
|
||||
poh_recorder: &Arc<Mutex<PohRecorder>>,
|
||||
@ -253,7 +257,8 @@ impl PohService {
|
||||
hash_time.stop();
|
||||
timing.total_hash_time_ns += hash_time.as_ns();
|
||||
if should_tick {
|
||||
return true; // nothing else can be done. tick required.
|
||||
// nothing else can be done. tick required.
|
||||
return true;
|
||||
}
|
||||
// check to see if a record request has been sent
|
||||
let get_again = record_receiver.try_recv();
|
||||
@ -276,13 +281,11 @@ impl PohService {
|
||||
fn tick_producer(
|
||||
poh_recorder: Arc<Mutex<PohRecorder>>,
|
||||
poh_exit: &AtomicBool,
|
||||
target_tick_ns: u64,
|
||||
ticks_per_slot: u64,
|
||||
hashes_per_batch: u64,
|
||||
record_receiver: Receiver<Record>,
|
||||
) {
|
||||
let poh = poh_recorder.lock().unwrap().poh.clone();
|
||||
let mut now = Instant::now();
|
||||
let mut timing = PohTiming::new();
|
||||
let mut next_record = None;
|
||||
loop {
|
||||
@ -307,14 +310,6 @@ impl PohService {
|
||||
timing.total_tick_time_ns += tick_time.as_ns();
|
||||
}
|
||||
timing.num_ticks += 1;
|
||||
let elapsed_ns = now.elapsed().as_nanos() as u64;
|
||||
// sleep is not accurate enough to get a predictable time.
|
||||
// Kernel can not schedule the thread for a while.
|
||||
while (now.elapsed().as_nanos() as u64) < target_tick_ns {
|
||||
std::hint::spin_loop();
|
||||
}
|
||||
timing.total_sleep_us += (now.elapsed().as_nanos() as u64 - elapsed_ns) / 1000;
|
||||
now = Instant::now();
|
||||
|
||||
timing.report(ticks_per_slot);
|
||||
if poh_exit.load(Ordering::Relaxed) {
|
||||
|
Reference in New Issue
Block a user