PoH batch size calibration (#15717)

2021-03-05 16:01:21 -08:00
parent c5371fdc85
commit d09112fa6d
7 changed files with 137 additions and 36 deletions
--- a/core/src/poh_service.rs
+++ b/core/src/poh_service.rs
@@ -16,8 +16,8 @@ pub struct PohService {
 // * The larger this number is from 1, the speed of recording transactions will suffer due to lock
 //   contention with the PoH hashing within `tick_producer()`.
 //
-// See benches/poh.rs for some benchmarks that attempt to justify this magic number.
-pub const NUM_HASHES_PER_BATCH: u64 = 1;
+// Can use test_poh_service to calibrate this
+pub const DEFAULT_HASHES_PER_BATCH: u64 = 64;

 pub const DEFAULT_PINNED_CPU_CORE: usize = 0;

@@ -30,6 +30,7 @@ impl PohService {
        poh_exit: &Arc<AtomicBool>,
        ticks_per_slot: u64,
        pinned_cpu_core: usize,
+        hashes_per_batch: u64,
    ) -> Self {
        let poh_exit_ = poh_exit.clone();
        let poh_config = poh_config.clone();
@@ -66,6 +67,7 @@ impl PohService {
                        &poh_exit_,
                        poh_config.target_tick_duration.as_nanos() as u64 - adjustment_per_tick,
                        ticks_per_slot,
+                        hashes_per_batch,
                    );
                }
                poh_exit_.store(true, Ordering::Relaxed);
@@ -107,8 +109,8 @@ impl PohService {
        poh_exit: &AtomicBool,
        target_tick_ns: u64,
        ticks_per_slot: u64,
+        hashes_per_batch: u64,
    ) {
-        info!("starting with target ns: {}", target_tick_ns);
        let poh = poh_recorder.lock().unwrap().poh.clone();
        let mut now = Instant::now();
        let mut last_metric = Instant::now();
@@ -116,8 +118,8 @@ impl PohService {
        let mut num_hashes = 0;
        let mut total_sleep_us = 0;
        loop {
-            num_hashes += NUM_HASHES_PER_BATCH;
-            if poh.lock().unwrap().hash(NUM_HASHES_PER_BATCH) {
+            num_hashes += hashes_per_batch;
+            if poh.lock().unwrap().hash(hashes_per_batch) {
                // Lock PohRecorder only for the final hash...
                poh_recorder.lock().unwrap().tick();
                num_ticks += 1;
@@ -161,17 +163,22 @@ impl PohService {
 mod tests {
    use super::*;
    use crate::poh_recorder::WorkingBank;
+    use rand::{thread_rng, Rng};
    use solana_ledger::genesis_utils::{create_genesis_config, GenesisConfigInfo};
    use solana_ledger::leader_schedule_cache::LeaderScheduleCache;
    use solana_ledger::{blockstore::Blockstore, get_tmp_ledger_path};
+    use solana_measure::measure::Measure;
    use solana_perf::test_tx::test_tx;
    use solana_runtime::bank::Bank;
+    use solana_sdk::clock;
    use solana_sdk::hash::hash;
    use solana_sdk::pubkey::Pubkey;
+    use solana_sdk::timing;
    use std::time::Duration;

    #[test]
    fn test_poh_service() {
+        solana_logger::setup();
        let GenesisConfigInfo { genesis_config, .. } = create_genesis_config(2);
        let bank = Arc::new(Bank::new(&genesis_config));
        let prev_hash = bank.last_blockhash();
@@ -179,9 +186,13 @@ mod tests {
        {
            let blockstore = Blockstore::open(&ledger_path)
                .expect("Expected to be able to open database ledger");
+
+            let default_target_tick_duration =
+                timing::duration_as_us(&PohConfig::default().target_tick_duration);
+            let target_tick_duration = Duration::from_micros(default_target_tick_duration);
            let poh_config = Arc::new(PohConfig {
-                hashes_per_tick: Some(2),
-                target_tick_duration: Duration::from_millis(42),
+                hashes_per_tick: Some(clock::DEFAULT_HASHES_PER_TICK),
+                target_tick_duration,
                target_tick_count: None,
            });
            let (poh_recorder, entry_receiver) = PohRecorder::new(
@@ -202,6 +213,14 @@ mod tests {
                min_tick_height: bank.tick_height(),
                max_tick_height: std::u64::MAX,
            };
+            let ticks_per_slot = bank.ticks_per_slot();
+
+            // specify RUN_TIME to run in a benchmark-like mode
+            // to calibrate batch size
+            let run_time = std::env::var("RUN_TIME")
+                .map(|x| x.parse().unwrap())
+                .unwrap_or(0);
+            let is_test_run = run_time == 0;

            let entry_producer = {
                let poh_recorder = poh_recorder.clone();
@@ -210,16 +229,33 @@ mod tests {
                Builder::new()
                    .name("solana-poh-service-entry_producer".to_string())
                    .spawn(move || {
+                        let now = Instant::now();
+                        let mut total_us = 0;
+                        let mut total_times = 0;
+                        let h1 = hash(b"hello world!");
+                        let tx = test_tx();
                        loop {
                            // send some data
-                            let h1 = hash(b"hello world!");
-                            let tx = test_tx();
-                            let _ = poh_recorder
-                                .lock()
-                                .unwrap()
-                                .record(bank.slot(), h1, vec![tx]);
+                            let mut time = Measure::start("record");
+                            let _ = poh_recorder.lock().unwrap().record(
+                                bank.slot(),
+                                h1,
+                                vec![tx.clone()],
+                            );
+                            time.stop();
+                            total_us += time.as_us();
+                            total_times += 1;
+                            if is_test_run && thread_rng().gen_ratio(1, 4) {
+                                sleep(Duration::from_millis(200));
+                            }

                            if exit.load(Ordering::Relaxed) {
+                                info!(
+                                    "spent:{}ms record: {}ms entries recorded: {}",
+                                    now.elapsed().as_millis(),
+                                    total_us / 1000,
+                                    total_times,
+                                );
                                break;
                            }
                        }
@@ -227,12 +263,16 @@ mod tests {
                    .unwrap()
            };

+            let hashes_per_batch = std::env::var("HASHES_PER_BATCH")
+                .map(|x| x.parse().unwrap())
+                .unwrap_or(DEFAULT_HASHES_PER_BATCH);
            let poh_service = PohService::new(
                poh_recorder.clone(),
                &poh_config,
                &exit,
                0,
                DEFAULT_PINNED_CPU_CORE,
+                hashes_per_batch,
            );
            poh_recorder.lock().unwrap().set_working_bank(working_bank);

@@ -241,11 +281,14 @@ mod tests {
            let mut need_tick = true;
            let mut need_entry = true;
            let mut need_partial = true;
+            let mut num_ticks = 0;

-            while need_tick || need_entry || need_partial {
+            let time = Instant::now();
+            while run_time != 0 || need_tick || need_entry || need_partial {
                let (_bank, (entry, _tick_height)) = entry_receiver.recv().unwrap();

                if entry.is_tick() {
+                    num_ticks += 1;
                    assert!(
                        entry.num_hashes <= poh_config.hashes_per_tick.unwrap(),
                        "{} <= {}",
@@ -269,7 +312,35 @@ mod tests {
                    need_entry = false;
                    hashes += entry.num_hashes;
                }
+
+                if run_time != 0 {
+                    if time.elapsed().as_millis() > run_time {
+                        break;
+                    }
+                } else {
+                    assert!(
+                        time.elapsed().as_secs() < 60,
+                        "Test should not run for this long! {}s tick {} entry {} partial {}",
+                        time.elapsed().as_secs(),
+                        need_tick,
+                        need_entry,
+                        need_partial,
+                    );
+                }
            }
+            info!(
+                "target_tick_duration: {} ticks_per_slot: {}",
+                poh_config.target_tick_duration.as_nanos(),
+                ticks_per_slot
+            );
+            let elapsed = time.elapsed();
+            info!(
+                "{} ticks in {}ms {}us/tick",
+                num_ticks,
+                elapsed.as_millis(),
+                elapsed.as_micros() / num_ticks
+            );
+
            exit.store(true, Ordering::Relaxed);
            poh_service.join().unwrap();
            entry_producer.join().unwrap();