Add poh speed check and tick speed calibration (#14292)
This commit is contained in:
		| @@ -1,7 +1,6 @@ | ||||
| //! The `poh_service` module implements a service that records the passing of | ||||
| //! "ticks", a measure of time in the PoH stream | ||||
| use crate::poh_recorder::PohRecorder; | ||||
| use solana_sdk::clock::DEFAULT_TICKS_PER_SLOT; | ||||
| use solana_sdk::poh_config::PohConfig; | ||||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||||
| use std::sync::{Arc, Mutex}; | ||||
| @@ -25,6 +24,7 @@ impl PohService { | ||||
|         poh_recorder: Arc<Mutex<PohRecorder>>, | ||||
|         poh_config: &Arc<PohConfig>, | ||||
|         poh_exit: &Arc<AtomicBool>, | ||||
|         ticks_per_slot: u64, | ||||
|     ) -> Self { | ||||
|         let poh_exit_ = poh_exit.clone(); | ||||
|         let poh_config = poh_config.clone(); | ||||
| @@ -49,7 +49,12 @@ impl PohService { | ||||
|                     if let Some(cores) = core_affinity::get_core_ids() { | ||||
|                         core_affinity::set_for_current(cores[0]); | ||||
|                     } | ||||
|                     Self::tick_producer(poh_recorder, &poh_exit_); | ||||
|                     Self::tick_producer( | ||||
|                         poh_recorder, | ||||
|                         &poh_exit_, | ||||
|                         poh_config.target_tick_duration.as_nanos() as u64, | ||||
|                         ticks_per_slot, | ||||
|                     ); | ||||
|                 } | ||||
|                 poh_exit_.store(true, Ordering::Relaxed); | ||||
|             }) | ||||
| @@ -85,27 +90,48 @@ impl PohService { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn tick_producer(poh_recorder: Arc<Mutex<PohRecorder>>, poh_exit: &AtomicBool) { | ||||
|     fn tick_producer( | ||||
|         poh_recorder: Arc<Mutex<PohRecorder>>, | ||||
|         poh_exit: &AtomicBool, | ||||
|         target_tick_ns: u64, | ||||
|         ticks_per_slot: u64, | ||||
|     ) { | ||||
|         info!("starting with target ns: {}", target_tick_ns); | ||||
|         let poh = poh_recorder.lock().unwrap().poh.clone(); | ||||
|         let mut now = Instant::now(); | ||||
|         let mut last_metric = Instant::now(); | ||||
|         let mut num_ticks = 0; | ||||
|         let mut num_hashes = 0; | ||||
|         let mut total_sleep_us = 0; | ||||
|         loop { | ||||
|             num_hashes += NUM_HASHES_PER_BATCH; | ||||
|             if poh.lock().unwrap().hash(NUM_HASHES_PER_BATCH) { | ||||
|                 // Lock PohRecorder only for the final hash... | ||||
|                 poh_recorder.lock().unwrap().tick(); | ||||
|                 num_ticks += 1; | ||||
|                 if num_ticks >= DEFAULT_TICKS_PER_SLOT * 2 { | ||||
|                 let elapsed_ns = now.elapsed().as_nanos() as u64; | ||||
|                 // sleep is not accurate enough to get a predictable time. | ||||
|                 // Kernel can not schedule the thread for a while. | ||||
|                 while (now.elapsed().as_nanos() as u64) < target_tick_ns { | ||||
|                     std::sync::atomic::spin_loop_hint(); | ||||
|                 } | ||||
|                 total_sleep_us += (now.elapsed().as_nanos() as u64 - elapsed_ns) / 1000; | ||||
|                 now = Instant::now(); | ||||
|  | ||||
|                 if last_metric.elapsed().as_millis() > 1000 { | ||||
|                     let elapsed_ms = last_metric.elapsed().as_millis() as u64; | ||||
|                     let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks; | ||||
|                     datapoint_info!( | ||||
|                         "poh-service", | ||||
|                         ("ticks", num_ticks as i64, i64), | ||||
|                         ("hashes", num_hashes as i64, i64), | ||||
|                         ("elapsed_ms", now.elapsed().as_millis() as i64, i64), | ||||
|                         ("elapsed_ms", ms_per_slot, i64), | ||||
|                         ("total_sleep_ms", total_sleep_us / 1000, i64), | ||||
|                     ); | ||||
|                     total_sleep_us = 0; | ||||
|                     num_ticks = 0; | ||||
|                     num_hashes = 0; | ||||
|                     now = Instant::now(); | ||||
|                     last_metric = Instant::now(); | ||||
|                 } | ||||
|                 if poh_exit.load(Ordering::Relaxed) { | ||||
|                     break; | ||||
| @@ -189,7 +215,7 @@ mod tests { | ||||
|                     .unwrap() | ||||
|             }; | ||||
|  | ||||
|             let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit); | ||||
|             let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit, 0); | ||||
|             poh_recorder.lock().unwrap().set_working_bank(working_bank); | ||||
|  | ||||
|             // get some events | ||||
|   | ||||
		Reference in New Issue
	
	Block a user