diff --git a/core/src/poh_recorder.rs b/core/src/poh_recorder.rs index 349e84ae81..432aabce83 100644 --- a/core/src/poh_recorder.rs +++ b/core/src/poh_recorder.rs @@ -80,6 +80,7 @@ pub struct PohRecorder { tick_lock_contention_us: u64, tick_overhead_us: u64, record_us: u64, + last_metric: Instant, } impl PohRecorder { @@ -262,14 +263,15 @@ impl PohRecorder { ) { self.clear_bank(); let mut cache = vec![]; - { + let poh_hash = { let mut poh = self.poh.lock().unwrap(); - info!( - "reset poh from: {},{},{} to: {},{}", - poh.hash, self.tick_height, self.start_slot, blockhash, start_slot - ); poh.reset(blockhash, self.poh_config.hashes_per_tick); - } + poh.hash + }; + info!( + "reset poh from: {},{},{} to: {},{}", + poh_hash, self.tick_height, self.start_slot, blockhash, start_slot + ); std::mem::swap(&mut cache, &mut self.tick_cache); @@ -396,23 +398,26 @@ impl PohRecorder { } fn report_metrics(&mut self, bank_slot: Slot) { - datapoint_info!( - "poh_recorder", - ("slot", bank_slot, i64), - ("tick_lock_contention", self.tick_lock_contention_us, i64), - ("record_us", self.record_us, i64), - ("tick_overhead", self.tick_overhead_us, i64), - ( - "record_lock_contention", - self.record_lock_contention_us, - i64 - ), - ); + if self.last_metric.elapsed().as_millis() > 1000 { + datapoint_info!( + "poh_recorder", + ("slot", bank_slot, i64), + ("tick_lock_contention", self.tick_lock_contention_us, i64), + ("record_us", self.record_us, i64), + ("tick_overhead", self.tick_overhead_us, i64), + ( + "record_lock_contention", + self.record_lock_contention_us, + i64 + ), + ); - self.tick_lock_contention_us = 0; - self.record_us = 0; - self.tick_overhead_us = 0; - self.record_lock_contention_us = 0; + self.tick_lock_contention_us = 0; + self.record_us = 0; + self.tick_overhead_us = 0; + self.record_lock_contention_us = 0; + self.last_metric = Instant::now(); + } } pub fn record( @@ -424,6 +429,7 @@ impl PohRecorder { // Entries without transactions are used to track real-time passing in the ledger and // cannot be generated by `record()` assert!(!transactions.is_empty(), "No transactions provided"); + self.report_metrics(bank_slot); loop { self.flush_cache(false)?; @@ -432,7 +438,6 @@ impl PohRecorder { .as_ref() .ok_or(PohRecorderError::MaxHeightReached)?; if bank_slot != working_bank.bank.slot() { - self.report_metrics(bank_slot); return Err(PohRecorderError::MaxHeightReached); } @@ -504,6 +509,7 @@ impl PohRecorder { tick_lock_contention_us: 0, record_us: 0, tick_overhead_us: 0, + last_metric: Instant::now(), }, receiver, ) diff --git a/core/src/poh_service.rs b/core/src/poh_service.rs index de152a68ef..cb3594c5ba 100644 --- a/core/src/poh_service.rs +++ b/core/src/poh_service.rs @@ -1,6 +1,7 @@ //! The `poh_service` module implements a service that records the passing of //! "ticks", a measure of time in the PoH stream use crate::poh_recorder::PohRecorder; +use solana_measure::measure::Measure; use solana_sdk::poh_config::PohConfig; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; @@ -117,11 +118,34 @@ impl PohService { let mut num_ticks = 0; let mut num_hashes = 0; let mut total_sleep_us = 0; + let mut total_lock_time_ns = 0; + let mut total_hash_time_ns = 0; + let mut total_tick_time_ns = 0; loop { num_hashes += hashes_per_batch; - if poh.lock().unwrap().hash(hashes_per_batch) { + let should_tick = { + let mut lock_time = Measure::start("lock"); + let mut poh_l = poh.lock().unwrap(); + lock_time.stop(); + total_lock_time_ns += lock_time.as_ns(); + let mut hash_time = Measure::start("hash"); + let r = poh_l.hash(hashes_per_batch); + hash_time.stop(); + total_hash_time_ns += hash_time.as_ns(); + r + }; + if should_tick { // Lock PohRecorder only for the final hash... - poh_recorder.lock().unwrap().tick(); + { + let mut lock_time = Measure::start("lock"); + let mut poh_recorder_l = poh_recorder.lock().unwrap(); + lock_time.stop(); + total_lock_time_ns += lock_time.as_ns(); + let mut tick_time = Measure::start("tick"); + poh_recorder_l.tick(); + tick_time.stop(); + total_tick_time_ns += tick_time.as_ns(); + } num_ticks += 1; let elapsed_ns = now.elapsed().as_nanos() as u64; // sleep is not accurate enough to get a predictable time. @@ -133,18 +157,24 @@ impl PohService { now = Instant::now(); if last_metric.elapsed().as_millis() > 1000 { - let elapsed_ms = last_metric.elapsed().as_millis() as u64; - let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks; + let elapsed_us = last_metric.elapsed().as_micros() as u64; + let us_per_slot = (elapsed_us * ticks_per_slot) / num_ticks; datapoint_info!( "poh-service", ("ticks", num_ticks as i64, i64), ("hashes", num_hashes as i64, i64), - ("elapsed_ms", ms_per_slot, i64), - ("total_sleep_ms", total_sleep_us / 1000, i64), + ("elapsed_us", us_per_slot, i64), + ("total_sleep_us", total_sleep_us, i64), + ("total_tick_time_us", total_tick_time_ns / 1000, i64), + ("total_lock_time_us", total_lock_time_ns / 1000, i64), + ("total_hash_time_us", total_hash_time_ns / 1000, i64), ); total_sleep_us = 0; num_ticks = 0; num_hashes = 0; + total_tick_time_ns = 0; + total_lock_time_ns = 0; + total_hash_time_ns = 0; last_metric = Instant::now(); } if poh_exit.load(Ordering::Relaxed) { diff --git a/measure/src/measure.rs b/measure/src/measure.rs index f8c9c92502..b44fb6ec03 100644 --- a/measure/src/measure.rs +++ b/measure/src/measure.rs @@ -20,6 +20,10 @@ impl Measure { self.duration = duration_as_ns(&self.start.elapsed()); } + pub fn as_ns(&self) -> u64 { + self.duration + } + pub fn as_us(&self) -> u64 { self.duration / 1000 }