add metrics for tick producer and poh_recorder (#15931)
This commit is contained in:
committed by
GitHub
parent
5460fb10a2
commit
40997d0aef
@ -80,6 +80,7 @@ pub struct PohRecorder {
|
|||||||
tick_lock_contention_us: u64,
|
tick_lock_contention_us: u64,
|
||||||
tick_overhead_us: u64,
|
tick_overhead_us: u64,
|
||||||
record_us: u64,
|
record_us: u64,
|
||||||
|
last_metric: Instant,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PohRecorder {
|
impl PohRecorder {
|
||||||
@ -262,14 +263,15 @@ impl PohRecorder {
|
|||||||
) {
|
) {
|
||||||
self.clear_bank();
|
self.clear_bank();
|
||||||
let mut cache = vec![];
|
let mut cache = vec![];
|
||||||
{
|
let poh_hash = {
|
||||||
let mut poh = self.poh.lock().unwrap();
|
let mut poh = self.poh.lock().unwrap();
|
||||||
info!(
|
|
||||||
"reset poh from: {},{},{} to: {},{}",
|
|
||||||
poh.hash, self.tick_height, self.start_slot, blockhash, start_slot
|
|
||||||
);
|
|
||||||
poh.reset(blockhash, self.poh_config.hashes_per_tick);
|
poh.reset(blockhash, self.poh_config.hashes_per_tick);
|
||||||
}
|
poh.hash
|
||||||
|
};
|
||||||
|
info!(
|
||||||
|
"reset poh from: {},{},{} to: {},{}",
|
||||||
|
poh_hash, self.tick_height, self.start_slot, blockhash, start_slot
|
||||||
|
);
|
||||||
|
|
||||||
std::mem::swap(&mut cache, &mut self.tick_cache);
|
std::mem::swap(&mut cache, &mut self.tick_cache);
|
||||||
|
|
||||||
@ -396,23 +398,26 @@ impl PohRecorder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn report_metrics(&mut self, bank_slot: Slot) {
|
fn report_metrics(&mut self, bank_slot: Slot) {
|
||||||
datapoint_info!(
|
if self.last_metric.elapsed().as_millis() > 1000 {
|
||||||
"poh_recorder",
|
datapoint_info!(
|
||||||
("slot", bank_slot, i64),
|
"poh_recorder",
|
||||||
("tick_lock_contention", self.tick_lock_contention_us, i64),
|
("slot", bank_slot, i64),
|
||||||
("record_us", self.record_us, i64),
|
("tick_lock_contention", self.tick_lock_contention_us, i64),
|
||||||
("tick_overhead", self.tick_overhead_us, i64),
|
("record_us", self.record_us, i64),
|
||||||
(
|
("tick_overhead", self.tick_overhead_us, i64),
|
||||||
"record_lock_contention",
|
(
|
||||||
self.record_lock_contention_us,
|
"record_lock_contention",
|
||||||
i64
|
self.record_lock_contention_us,
|
||||||
),
|
i64
|
||||||
);
|
),
|
||||||
|
);
|
||||||
|
|
||||||
self.tick_lock_contention_us = 0;
|
self.tick_lock_contention_us = 0;
|
||||||
self.record_us = 0;
|
self.record_us = 0;
|
||||||
self.tick_overhead_us = 0;
|
self.tick_overhead_us = 0;
|
||||||
self.record_lock_contention_us = 0;
|
self.record_lock_contention_us = 0;
|
||||||
|
self.last_metric = Instant::now();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn record(
|
pub fn record(
|
||||||
@ -424,6 +429,7 @@ impl PohRecorder {
|
|||||||
// Entries without transactions are used to track real-time passing in the ledger and
|
// Entries without transactions are used to track real-time passing in the ledger and
|
||||||
// cannot be generated by `record()`
|
// cannot be generated by `record()`
|
||||||
assert!(!transactions.is_empty(), "No transactions provided");
|
assert!(!transactions.is_empty(), "No transactions provided");
|
||||||
|
self.report_metrics(bank_slot);
|
||||||
loop {
|
loop {
|
||||||
self.flush_cache(false)?;
|
self.flush_cache(false)?;
|
||||||
|
|
||||||
@ -432,7 +438,6 @@ impl PohRecorder {
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or(PohRecorderError::MaxHeightReached)?;
|
.ok_or(PohRecorderError::MaxHeightReached)?;
|
||||||
if bank_slot != working_bank.bank.slot() {
|
if bank_slot != working_bank.bank.slot() {
|
||||||
self.report_metrics(bank_slot);
|
|
||||||
return Err(PohRecorderError::MaxHeightReached);
|
return Err(PohRecorderError::MaxHeightReached);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -504,6 +509,7 @@ impl PohRecorder {
|
|||||||
tick_lock_contention_us: 0,
|
tick_lock_contention_us: 0,
|
||||||
record_us: 0,
|
record_us: 0,
|
||||||
tick_overhead_us: 0,
|
tick_overhead_us: 0,
|
||||||
|
last_metric: Instant::now(),
|
||||||
},
|
},
|
||||||
receiver,
|
receiver,
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
//! The `poh_service` module implements a service that records the passing of
|
//! The `poh_service` module implements a service that records the passing of
|
||||||
//! "ticks", a measure of time in the PoH stream
|
//! "ticks", a measure of time in the PoH stream
|
||||||
use crate::poh_recorder::PohRecorder;
|
use crate::poh_recorder::PohRecorder;
|
||||||
|
use solana_measure::measure::Measure;
|
||||||
use solana_sdk::poh_config::PohConfig;
|
use solana_sdk::poh_config::PohConfig;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
@ -117,11 +118,34 @@ impl PohService {
|
|||||||
let mut num_ticks = 0;
|
let mut num_ticks = 0;
|
||||||
let mut num_hashes = 0;
|
let mut num_hashes = 0;
|
||||||
let mut total_sleep_us = 0;
|
let mut total_sleep_us = 0;
|
||||||
|
let mut total_lock_time_ns = 0;
|
||||||
|
let mut total_hash_time_ns = 0;
|
||||||
|
let mut total_tick_time_ns = 0;
|
||||||
loop {
|
loop {
|
||||||
num_hashes += hashes_per_batch;
|
num_hashes += hashes_per_batch;
|
||||||
if poh.lock().unwrap().hash(hashes_per_batch) {
|
let should_tick = {
|
||||||
|
let mut lock_time = Measure::start("lock");
|
||||||
|
let mut poh_l = poh.lock().unwrap();
|
||||||
|
lock_time.stop();
|
||||||
|
total_lock_time_ns += lock_time.as_ns();
|
||||||
|
let mut hash_time = Measure::start("hash");
|
||||||
|
let r = poh_l.hash(hashes_per_batch);
|
||||||
|
hash_time.stop();
|
||||||
|
total_hash_time_ns += hash_time.as_ns();
|
||||||
|
r
|
||||||
|
};
|
||||||
|
if should_tick {
|
||||||
// Lock PohRecorder only for the final hash...
|
// Lock PohRecorder only for the final hash...
|
||||||
poh_recorder.lock().unwrap().tick();
|
{
|
||||||
|
let mut lock_time = Measure::start("lock");
|
||||||
|
let mut poh_recorder_l = poh_recorder.lock().unwrap();
|
||||||
|
lock_time.stop();
|
||||||
|
total_lock_time_ns += lock_time.as_ns();
|
||||||
|
let mut tick_time = Measure::start("tick");
|
||||||
|
poh_recorder_l.tick();
|
||||||
|
tick_time.stop();
|
||||||
|
total_tick_time_ns += tick_time.as_ns();
|
||||||
|
}
|
||||||
num_ticks += 1;
|
num_ticks += 1;
|
||||||
let elapsed_ns = now.elapsed().as_nanos() as u64;
|
let elapsed_ns = now.elapsed().as_nanos() as u64;
|
||||||
// sleep is not accurate enough to get a predictable time.
|
// sleep is not accurate enough to get a predictable time.
|
||||||
@ -133,18 +157,24 @@ impl PohService {
|
|||||||
now = Instant::now();
|
now = Instant::now();
|
||||||
|
|
||||||
if last_metric.elapsed().as_millis() > 1000 {
|
if last_metric.elapsed().as_millis() > 1000 {
|
||||||
let elapsed_ms = last_metric.elapsed().as_millis() as u64;
|
let elapsed_us = last_metric.elapsed().as_micros() as u64;
|
||||||
let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks;
|
let us_per_slot = (elapsed_us * ticks_per_slot) / num_ticks;
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"poh-service",
|
"poh-service",
|
||||||
("ticks", num_ticks as i64, i64),
|
("ticks", num_ticks as i64, i64),
|
||||||
("hashes", num_hashes as i64, i64),
|
("hashes", num_hashes as i64, i64),
|
||||||
("elapsed_ms", ms_per_slot, i64),
|
("elapsed_us", us_per_slot, i64),
|
||||||
("total_sleep_ms", total_sleep_us / 1000, i64),
|
("total_sleep_us", total_sleep_us, i64),
|
||||||
|
("total_tick_time_us", total_tick_time_ns / 1000, i64),
|
||||||
|
("total_lock_time_us", total_lock_time_ns / 1000, i64),
|
||||||
|
("total_hash_time_us", total_hash_time_ns / 1000, i64),
|
||||||
);
|
);
|
||||||
total_sleep_us = 0;
|
total_sleep_us = 0;
|
||||||
num_ticks = 0;
|
num_ticks = 0;
|
||||||
num_hashes = 0;
|
num_hashes = 0;
|
||||||
|
total_tick_time_ns = 0;
|
||||||
|
total_lock_time_ns = 0;
|
||||||
|
total_hash_time_ns = 0;
|
||||||
last_metric = Instant::now();
|
last_metric = Instant::now();
|
||||||
}
|
}
|
||||||
if poh_exit.load(Ordering::Relaxed) {
|
if poh_exit.load(Ordering::Relaxed) {
|
||||||
|
@ -20,6 +20,10 @@ impl Measure {
|
|||||||
self.duration = duration_as_ns(&self.start.elapsed());
|
self.duration = duration_as_ns(&self.start.elapsed());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn as_ns(&self) -> u64 {
|
||||||
|
self.duration
|
||||||
|
}
|
||||||
|
|
||||||
pub fn as_us(&self) -> u64 {
|
pub fn as_us(&self) -> u64 {
|
||||||
self.duration / 1000
|
self.duration / 1000
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user