Add poh speed check and tick speed calibration (#14292)

This commit is contained in:
sakridge
2020-12-29 09:35:57 -08:00
committed by GitHub
parent 444ed768dc
commit 2074e407cd
5 changed files with 130 additions and 17 deletions

View File

@@ -1,7 +1,6 @@
//! The `poh_service` module implements a service that records the passing of
//! "ticks", a measure of time in the PoH stream
use crate::poh_recorder::PohRecorder;
use solana_sdk::clock::DEFAULT_TICKS_PER_SLOT;
use solana_sdk::poh_config::PohConfig;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
@@ -25,6 +24,7 @@ impl PohService {
poh_recorder: Arc<Mutex<PohRecorder>>,
poh_config: &Arc<PohConfig>,
poh_exit: &Arc<AtomicBool>,
ticks_per_slot: u64,
) -> Self {
let poh_exit_ = poh_exit.clone();
let poh_config = poh_config.clone();
@@ -49,7 +49,12 @@ impl PohService {
if let Some(cores) = core_affinity::get_core_ids() {
core_affinity::set_for_current(cores[0]);
}
Self::tick_producer(poh_recorder, &poh_exit_);
Self::tick_producer(
poh_recorder,
&poh_exit_,
poh_config.target_tick_duration.as_nanos() as u64,
ticks_per_slot,
);
}
poh_exit_.store(true, Ordering::Relaxed);
})
@@ -85,27 +90,48 @@ impl PohService {
}
}
fn tick_producer(poh_recorder: Arc<Mutex<PohRecorder>>, poh_exit: &AtomicBool) {
fn tick_producer(
poh_recorder: Arc<Mutex<PohRecorder>>,
poh_exit: &AtomicBool,
target_tick_ns: u64,
ticks_per_slot: u64,
) {
info!("starting with target ns: {}", target_tick_ns);
let poh = poh_recorder.lock().unwrap().poh.clone();
let mut now = Instant::now();
let mut last_metric = Instant::now();
let mut num_ticks = 0;
let mut num_hashes = 0;
let mut total_sleep_us = 0;
loop {
num_hashes += NUM_HASHES_PER_BATCH;
if poh.lock().unwrap().hash(NUM_HASHES_PER_BATCH) {
// Lock PohRecorder only for the final hash...
poh_recorder.lock().unwrap().tick();
num_ticks += 1;
if num_ticks >= DEFAULT_TICKS_PER_SLOT * 2 {
let elapsed_ns = now.elapsed().as_nanos() as u64;
// sleep is not accurate enough to get a predictable time.
// Kernel can not schedule the thread for a while.
while (now.elapsed().as_nanos() as u64) < target_tick_ns {
std::sync::atomic::spin_loop_hint();
}
total_sleep_us += (now.elapsed().as_nanos() as u64 - elapsed_ns) / 1000;
now = Instant::now();
if last_metric.elapsed().as_millis() > 1000 {
let elapsed_ms = last_metric.elapsed().as_millis() as u64;
let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks;
datapoint_info!(
"poh-service",
("ticks", num_ticks as i64, i64),
("hashes", num_hashes as i64, i64),
("elapsed_ms", now.elapsed().as_millis() as i64, i64),
("elapsed_ms", ms_per_slot, i64),
("total_sleep_ms", total_sleep_us / 1000, i64),
);
total_sleep_us = 0;
num_ticks = 0;
num_hashes = 0;
now = Instant::now();
last_metric = Instant::now();
}
if poh_exit.load(Ordering::Relaxed) {
break;
@@ -189,7 +215,7 @@ mod tests {
.unwrap()
};
let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit);
let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit, 0);
poh_recorder.lock().unwrap().set_working_bank(working_bank);
// get some events