add metrics for tick producer and poh_recorder (#15931)

This commit is contained in:
Jeff Washington (jwash)
2021-03-17 10:38:26 -05:00
committed by GitHub
parent 5460fb10a2
commit 40997d0aef
3 changed files with 69 additions and 29 deletions

View File

@ -80,6 +80,7 @@ pub struct PohRecorder {
tick_lock_contention_us: u64, tick_lock_contention_us: u64,
tick_overhead_us: u64, tick_overhead_us: u64,
record_us: u64, record_us: u64,
last_metric: Instant,
} }
impl PohRecorder { impl PohRecorder {
@ -262,14 +263,15 @@ impl PohRecorder {
) { ) {
self.clear_bank(); self.clear_bank();
let mut cache = vec![]; let mut cache = vec![];
{ let poh_hash = {
let mut poh = self.poh.lock().unwrap(); let mut poh = self.poh.lock().unwrap();
info!(
"reset poh from: {},{},{} to: {},{}",
poh.hash, self.tick_height, self.start_slot, blockhash, start_slot
);
poh.reset(blockhash, self.poh_config.hashes_per_tick); poh.reset(blockhash, self.poh_config.hashes_per_tick);
} poh.hash
};
info!(
"reset poh from: {},{},{} to: {},{}",
poh_hash, self.tick_height, self.start_slot, blockhash, start_slot
);
std::mem::swap(&mut cache, &mut self.tick_cache); std::mem::swap(&mut cache, &mut self.tick_cache);
@ -396,23 +398,26 @@ impl PohRecorder {
} }
fn report_metrics(&mut self, bank_slot: Slot) { fn report_metrics(&mut self, bank_slot: Slot) {
datapoint_info!( if self.last_metric.elapsed().as_millis() > 1000 {
"poh_recorder", datapoint_info!(
("slot", bank_slot, i64), "poh_recorder",
("tick_lock_contention", self.tick_lock_contention_us, i64), ("slot", bank_slot, i64),
("record_us", self.record_us, i64), ("tick_lock_contention", self.tick_lock_contention_us, i64),
("tick_overhead", self.tick_overhead_us, i64), ("record_us", self.record_us, i64),
( ("tick_overhead", self.tick_overhead_us, i64),
"record_lock_contention", (
self.record_lock_contention_us, "record_lock_contention",
i64 self.record_lock_contention_us,
), i64
); ),
);
self.tick_lock_contention_us = 0; self.tick_lock_contention_us = 0;
self.record_us = 0; self.record_us = 0;
self.tick_overhead_us = 0; self.tick_overhead_us = 0;
self.record_lock_contention_us = 0; self.record_lock_contention_us = 0;
self.last_metric = Instant::now();
}
} }
pub fn record( pub fn record(
@ -424,6 +429,7 @@ impl PohRecorder {
// Entries without transactions are used to track real-time passing in the ledger and // Entries without transactions are used to track real-time passing in the ledger and
// cannot be generated by `record()` // cannot be generated by `record()`
assert!(!transactions.is_empty(), "No transactions provided"); assert!(!transactions.is_empty(), "No transactions provided");
self.report_metrics(bank_slot);
loop { loop {
self.flush_cache(false)?; self.flush_cache(false)?;
@ -432,7 +438,6 @@ impl PohRecorder {
.as_ref() .as_ref()
.ok_or(PohRecorderError::MaxHeightReached)?; .ok_or(PohRecorderError::MaxHeightReached)?;
if bank_slot != working_bank.bank.slot() { if bank_slot != working_bank.bank.slot() {
self.report_metrics(bank_slot);
return Err(PohRecorderError::MaxHeightReached); return Err(PohRecorderError::MaxHeightReached);
} }
@ -504,6 +509,7 @@ impl PohRecorder {
tick_lock_contention_us: 0, tick_lock_contention_us: 0,
record_us: 0, record_us: 0,
tick_overhead_us: 0, tick_overhead_us: 0,
last_metric: Instant::now(),
}, },
receiver, receiver,
) )

View File

@ -1,6 +1,7 @@
//! The `poh_service` module implements a service that records the passing of //! The `poh_service` module implements a service that records the passing of
//! "ticks", a measure of time in the PoH stream //! "ticks", a measure of time in the PoH stream
use crate::poh_recorder::PohRecorder; use crate::poh_recorder::PohRecorder;
use solana_measure::measure::Measure;
use solana_sdk::poh_config::PohConfig; use solana_sdk::poh_config::PohConfig;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
@ -117,11 +118,34 @@ impl PohService {
let mut num_ticks = 0; let mut num_ticks = 0;
let mut num_hashes = 0; let mut num_hashes = 0;
let mut total_sleep_us = 0; let mut total_sleep_us = 0;
let mut total_lock_time_ns = 0;
let mut total_hash_time_ns = 0;
let mut total_tick_time_ns = 0;
loop { loop {
num_hashes += hashes_per_batch; num_hashes += hashes_per_batch;
if poh.lock().unwrap().hash(hashes_per_batch) { let should_tick = {
let mut lock_time = Measure::start("lock");
let mut poh_l = poh.lock().unwrap();
lock_time.stop();
total_lock_time_ns += lock_time.as_ns();
let mut hash_time = Measure::start("hash");
let r = poh_l.hash(hashes_per_batch);
hash_time.stop();
total_hash_time_ns += hash_time.as_ns();
r
};
if should_tick {
// Lock PohRecorder only for the final hash... // Lock PohRecorder only for the final hash...
poh_recorder.lock().unwrap().tick(); {
let mut lock_time = Measure::start("lock");
let mut poh_recorder_l = poh_recorder.lock().unwrap();
lock_time.stop();
total_lock_time_ns += lock_time.as_ns();
let mut tick_time = Measure::start("tick");
poh_recorder_l.tick();
tick_time.stop();
total_tick_time_ns += tick_time.as_ns();
}
num_ticks += 1; num_ticks += 1;
let elapsed_ns = now.elapsed().as_nanos() as u64; let elapsed_ns = now.elapsed().as_nanos() as u64;
// sleep is not accurate enough to get a predictable time. // sleep is not accurate enough to get a predictable time.
@ -133,18 +157,24 @@ impl PohService {
now = Instant::now(); now = Instant::now();
if last_metric.elapsed().as_millis() > 1000 { if last_metric.elapsed().as_millis() > 1000 {
let elapsed_ms = last_metric.elapsed().as_millis() as u64; let elapsed_us = last_metric.elapsed().as_micros() as u64;
let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks; let us_per_slot = (elapsed_us * ticks_per_slot) / num_ticks;
datapoint_info!( datapoint_info!(
"poh-service", "poh-service",
("ticks", num_ticks as i64, i64), ("ticks", num_ticks as i64, i64),
("hashes", num_hashes as i64, i64), ("hashes", num_hashes as i64, i64),
("elapsed_ms", ms_per_slot, i64), ("elapsed_us", us_per_slot, i64),
("total_sleep_ms", total_sleep_us / 1000, i64), ("total_sleep_us", total_sleep_us, i64),
("total_tick_time_us", total_tick_time_ns / 1000, i64),
("total_lock_time_us", total_lock_time_ns / 1000, i64),
("total_hash_time_us", total_hash_time_ns / 1000, i64),
); );
total_sleep_us = 0; total_sleep_us = 0;
num_ticks = 0; num_ticks = 0;
num_hashes = 0; num_hashes = 0;
total_tick_time_ns = 0;
total_lock_time_ns = 0;
total_hash_time_ns = 0;
last_metric = Instant::now(); last_metric = Instant::now();
} }
if poh_exit.load(Ordering::Relaxed) { if poh_exit.load(Ordering::Relaxed) {

View File

@ -20,6 +20,10 @@ impl Measure {
self.duration = duration_as_ns(&self.start.elapsed()); self.duration = duration_as_ns(&self.start.elapsed());
} }
pub fn as_ns(&self) -> u64 {
self.duration
}
pub fn as_us(&self) -> u64 { pub fn as_us(&self) -> u64 {
self.duration / 1000 self.duration / 1000
} }