From eced3776b044f8be3a32dd6743aaa3187b305a25 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Thu, 11 Feb 2021 20:06:04 -0600 Subject: [PATCH] add metrics to hash calculation and consolidate (#15165) * add metrics to hash calculation * add metrics to hash calculation * create HashStats struct to consolidate hash timing * formatting --- runtime/src/accounts_db.rs | 190 +++++++++++++++++++++++++------------ 1 file changed, 131 insertions(+), 59 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 054c653300..9cee6f9859 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -169,6 +169,57 @@ type ShrinkCandidates = HashMap( snapshot_storages: &[SnapshotStorage], + stats: &mut HashStats, scan_func: F, ) -> Vec where F: Fn(LoadedAccount, &mut B, Slot) + Send + Sync, B: Send + Default, { + let mut time = Measure::start("flatten"); let items: Vec<_> = snapshot_storages.iter().flatten().collect(); + time.stop(); + stats.pre_scan_flatten_time_total_us += time.as_us(); // Without chunks, we end up with 1 output vec for each outer snapshot storage. // This results in too many vectors to be efficient. @@ -3670,12 +3725,14 @@ impl AccountsDB { fn flatten_hash_intermediate( data_sections_by_pubkey: Vec>, - ) -> (Vec, Measure, usize) { + stats: &mut HashStats, + ) -> Vec { let mut flatten_time = Measure::start("flatten"); let result: Vec<_> = data_sections_by_pubkey.into_iter().flatten().collect(); - let raw_len = result.len(); flatten_time.stop(); - (result, flatten_time, raw_len) + stats.flatten_time_total_us += flatten_time.as_us(); + stats.unreduced_entries = result.len(); + result } fn compare_two_hash_entries( @@ -3694,17 +3751,20 @@ impl AccountsDB { fn sort_hash_intermediate( mut data_by_pubkey: Vec, - ) -> (Vec, Measure) { + stats: &mut HashStats, + ) -> Vec { // sort each PUBKEY_DIVISION vec let mut sort_time = Measure::start("sort"); data_by_pubkey.par_sort_unstable_by(Self::compare_two_hash_entries); sort_time.stop(); - (data_by_pubkey, sort_time) + stats.sort_time_total_us += sort_time.as_us(); + data_by_pubkey } fn de_dup_and_eliminate_zeros( sorted_data_by_pubkey: Vec, - ) -> (Vec>, Measure, u64) { + stats: &mut HashStats, + ) -> (Vec>, u64) { // 1. eliminate zero lamport accounts // 2. pick the highest slot or (slot = and highest version) of each pubkey // 3. produce this output: @@ -3715,7 +3775,8 @@ impl AccountsDB { const CHUNKS: usize = 10; let (hashes, sum) = Self::de_dup_accounts_in_parallel(&sorted_data_by_pubkey, CHUNKS); zeros.stop(); - (hashes, zeros, sum) + stats.zeros_time_total_us += zeros.as_us(); + (hashes, sum) } // 1. eliminate zero lamport accounts @@ -3800,14 +3861,15 @@ impl AccountsDB { (result, sum) } - fn flatten_hashes(hashes: Vec>) -> (Vec, Measure, usize) { + fn flatten_hashes(hashes: Vec>, stats: &mut HashStats) -> Vec { // flatten vec/vec into 1d vec of hashes in order let mut flat2_time = Measure::start("flat2"); let hashes: Vec = hashes.into_iter().flatten().collect(); flat2_time.stop(); - let hash_total = hashes.len(); + stats.flatten_after_zeros_time_total_us += flat2_time.as_us(); + stats.hash_total = hashes.len(); - (hashes, flat2_time, hash_total) + hashes } // input: @@ -3816,19 +3878,17 @@ impl AccountsDB { // vec: [..] - items which fin in the containing bin, unordered within this vec // so, assumption is middle vec is bins sorted by pubkey fn rest_of_hash_calculation( - accounts: (Vec>, Measure), + data_sections_by_pubkey: Vec>, + mut stats: &mut HashStats, ) -> (Hash, u64) { - let (data_sections_by_pubkey, time_scan) = accounts; + let outer = Self::flatten_hash_intermediate(data_sections_by_pubkey, &mut stats); - let (outer, flatten_time, raw_len) = - Self::flatten_hash_intermediate(data_sections_by_pubkey); + let sorted_data_by_pubkey = Self::sort_hash_intermediate(outer, &mut stats); - let (sorted_data_by_pubkey, sort_time) = Self::sort_hash_intermediate(outer); + let (hashes, total_lamports) = + Self::de_dup_and_eliminate_zeros(sorted_data_by_pubkey, &mut stats); - let (hashes, zeros, total_lamports) = - Self::de_dup_and_eliminate_zeros(sorted_data_by_pubkey); - - let (hashes, flat2_time, hash_total) = Self::flatten_hashes(hashes); + let hashes = Self::flatten_hashes(hashes, &mut stats); let mut hash_time = Measure::start("hashes"); let (hash, _) = @@ -3836,18 +3896,8 @@ impl AccountsDB { (*t, 0) }); hash_time.stop(); - datapoint_info!( - "calculate_accounts_hash_without_index", - ("accounts_scan", time_scan.as_us(), i64), - ("eliminate_zeros", zeros.as_us(), i64), - ("hash", hash_time.as_us(), i64), - ("sort", sort_time.as_us(), i64), - ("hash_total", hash_total, i64), - ("flatten", flatten_time.as_us(), i64), - ("flatten_after_zeros", flat2_time.as_us(), i64), - ("unreduced_entries", raw_len as i64, i64), - ); - + stats.hash_time_total_us += hash_time.as_us(); + stats.log(); (hash, total_lamports) } @@ -3910,10 +3960,13 @@ impl AccountsDB { fn scan_snapshot_stores( storage: &[SnapshotStorage], simple_capitalization_enabled: bool, - ) -> (Vec>, Measure) { + mut stats: &mut HashStats, + ) -> Vec> { let mut time = Measure::start("scan all accounts"); + stats.num_snapshot_storage = storage.len(); let result: Vec> = Self::scan_account_storage_no_bank( &storage, + &mut stats, |loaded_account: LoadedAccount, accum: &mut Vec, slot: Slot| { @@ -3943,8 +3996,8 @@ impl AccountsDB { }, ); time.stop(); - - (result, time) + stats.scan_time_total_us += time.as_us(); + result } // modeled after get_accounts_delta_hash @@ -3955,9 +4008,11 @@ impl AccountsDB { thread_pool: Option<&ThreadPool>, ) -> (Hash, u64) { let scan_and_hash = || { - let result = Self::scan_snapshot_stores(storages, simple_capitalization_enabled); + let mut stats = HashStats::default(); + let result = + Self::scan_snapshot_stores(storages, simple_capitalization_enabled, &mut stats); - Self::rest_of_hash_calculation(result) + Self::rest_of_hash_calculation(result, &mut stats) }; if let Some(thread_pool) = thread_pool { thread_pool.install(scan_and_hash) @@ -5223,8 +5278,10 @@ pub mod tests { ); account_maps.push(val); - let result = - AccountsDB::rest_of_hash_calculation((vec![account_maps.clone()], Measure::start(""))); + let result = AccountsDB::rest_of_hash_calculation( + vec![account_maps.clone()], + &mut HashStats::default(), + ); let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap(); assert_eq!(result, (expected_hash, 88)); @@ -5234,8 +5291,10 @@ pub mod tests { let val = CalculateHashIntermediate::new(0, hash, 20, Slot::default(), key); account_maps.push(val); - let result = - AccountsDB::rest_of_hash_calculation((vec![account_maps.clone()], Measure::start(""))); + let result = AccountsDB::rest_of_hash_calculation( + vec![account_maps.clone()], + &mut HashStats::default(), + ); let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap(); assert_eq!(result, (expected_hash, 108)); @@ -5245,7 +5304,8 @@ pub mod tests { let val = CalculateHashIntermediate::new(0, hash, 30, Slot::default() + 1, key); account_maps.push(val); - let result = AccountsDB::rest_of_hash_calculation((vec![account_maps], Measure::start(""))); + let result = + AccountsDB::rest_of_hash_calculation(vec![account_maps], &mut HashStats::default()); let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap(); assert_eq!(result, (expected_hash, 118)); } @@ -5334,8 +5394,10 @@ pub mod tests { let result = AccountsDB::de_dup_accounts_from_stores(is_first_slice, slice); let (hashes2, lamports2) = AccountsDB::de_dup_accounts_in_parallel(slice, 1); let (hashes3, lamports3) = AccountsDB::de_dup_accounts_in_parallel(slice, 2); - let (hashes4, _, lamports4) = - AccountsDB::de_dup_and_eliminate_zeros(slice.to_vec()); + let (hashes4, lamports4) = AccountsDB::de_dup_and_eliminate_zeros( + slice.to_vec(), + &mut HashStats::default(), + ); assert_eq!( hashes2.iter().flatten().collect::>(), @@ -5414,14 +5476,19 @@ pub mod tests { .collect(); let expected = hashes.clone(); - assert_eq!(AccountsDB::flatten_hashes(vec![hashes.clone()]).0, expected); + assert_eq!( + AccountsDB::flatten_hashes(vec![hashes.clone()], &mut HashStats::default()), + expected + ); for in_first in 1..COUNT - 1 { assert_eq!( - AccountsDB::flatten_hashes(vec![ - hashes.clone()[0..in_first].to_vec(), - hashes.clone()[in_first..COUNT].to_vec() - ]) - .0, + AccountsDB::flatten_hashes( + vec![ + hashes.clone()[0..in_first].to_vec(), + hashes.clone()[in_first..COUNT].to_vec() + ], + &mut HashStats::default() + ), expected ); } @@ -5445,13 +5512,13 @@ pub mod tests { let list = vec![val.clone(), val2.clone()]; let mut list_bkup = list.clone(); list_bkup.sort_by(AccountsDB::compare_two_hash_entries); - let (list, _) = AccountsDB::sort_hash_intermediate(list); + let list = AccountsDB::sort_hash_intermediate(list, &mut HashStats::default()); assert_eq!(list, list_bkup); let list = vec![val2, val.clone()]; // reverse args let mut list_bkup = list.clone(); list_bkup.sort_by(AccountsDB::compare_two_hash_entries); - let (list, _) = AccountsDB::sort_hash_intermediate(list); + let list = AccountsDB::sort_hash_intermediate(list, &mut HashStats::default()); assert_eq!(list, list_bkup); // slot same, vers = @@ -5558,6 +5625,7 @@ pub mod tests { let calls = AtomicU64::new(0); let result = AccountsDB::scan_account_storage_no_bank( &storages, + &mut HashStats::default(), |loaded_account: LoadedAccount, accum: &mut Vec, slot: Slot| { calls.fetch_add(1, Ordering::Relaxed); assert_eq!(loaded_account.pubkey(), &pubkey); @@ -5579,15 +5647,18 @@ pub mod tests { 3, Pubkey::new_unique(), )]]; - let (result, _, len) = AccountsDB::flatten_hash_intermediate(test.clone()); + let mut stats = HashStats::default(); + let result = AccountsDB::flatten_hash_intermediate(test.clone(), &mut stats); assert_eq!(result, test[0]); - assert_eq!(len, 1); + assert_eq!(stats.unreduced_entries, 1); - let (result, _, len) = AccountsDB::flatten_hash_intermediate(vec![ - vec![CalculateHashIntermediate::default(); 0], - ]); + let mut stats = HashStats::default(); + let result = AccountsDB::flatten_hash_intermediate( + vec![vec![CalculateHashIntermediate::default(); 0]], + &mut stats, + ); assert_eq!(result.len(), 0); - assert_eq!(len, 0); + assert_eq!(stats.unreduced_entries, 0); let test = vec![ vec![ @@ -5602,10 +5673,11 @@ pub mod tests { Pubkey::new_unique(), )], ]; - let (result, _, len) = AccountsDB::flatten_hash_intermediate(test.clone()); + let mut stats = HashStats::default(); + let result = AccountsDB::flatten_hash_intermediate(test.clone(), &mut stats); let expected = test.into_iter().flatten().collect::>(); assert_eq!(result, expected); - assert_eq!(len, expected.len()); + assert_eq!(stats.unreduced_entries, expected.len()); } fn sort_hashes_and_lamports_by_pubkey(hashes: &mut Vec<(Pubkey, Hash, u64)>) {