From da9fdd785c1b828571cf51e0108d56df60fafc0a Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Thu, 15 Jul 2021 10:26:00 -0500 Subject: [PATCH] measure and speed up 2nd half of generate_index (#18503) * add metrics and speedup storage calcs in generate_index * add tests, refactor * rename map variable * rename another index variable --- runtime/src/accounts_db.rs | 200 +++++++++++++++++++++++++++++----- runtime/src/accounts_index.rs | 2 +- 2 files changed, 176 insertions(+), 26 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index c13eff7b32..35a10039f3 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -169,6 +169,9 @@ struct GenerateIndexTimings { pub min_bin_size: usize, pub max_bin_size: usize, pub total_items: usize, + pub storage_size_accounts_map_us: u64, + pub storage_size_storages_us: u64, + pub storage_size_accounts_map_flatten_us: u64, } impl GenerateIndexTimings { @@ -181,7 +184,21 @@ impl GenerateIndexTimings { ("insertion_time_us", self.insertion_time_us, i64), ("min_bin_size", self.min_bin_size as i64, i64), ("max_bin_size", self.max_bin_size as i64, i64), - ("total_items", self.total_items as i64, i64), + ( + "storage_size_accounts_map_us", + self.storage_size_accounts_map_us as i64, + i64 + ), + ( + "storage_size_storages_us", + self.storage_size_storages_us as i64, + i64 + ), + ( + "storage_size_accounts_map_flatten_us", + self.storage_size_accounts_map_flatten_us as i64, + i64 + ), ); } } @@ -5992,47 +6009,92 @@ impl AccountsDb { .accounts_index .account_maps .iter() - .map(|i| { - let len = i.read().unwrap().len(); + .map(|map_bin| { + let len = map_bin.read().unwrap().len(); min_bin_size = std::cmp::min(min_bin_size, len); max_bin_size = std::cmp::max(max_bin_size, len); len }) .sum(); - let timings = GenerateIndexTimings { + let mut timings = GenerateIndexTimings { scan_time, index_time: index_time.as_us(), insertion_time_us: insertion_time_us.load(Ordering::Relaxed), min_bin_size, max_bin_size, total_items, + ..GenerateIndexTimings::default() }; + + if pass == 0 { + // Need to add these last, otherwise older updates will be cleaned + for slot in &slots { + self.accounts_index.add_root(*slot, false); + } + + self.initialize_storage_count_and_alive_bytes(&mut timings); + } timings.report(); } + } - // Need to add these last, otherwise older updates will be cleaned - for slot in slots { - self.accounts_index.add_root(slot, false); - } - - let mut stored_sizes_and_counts = HashMap::new(); - self.accounts_index.account_maps.iter().for_each(|i| { - i.read().unwrap().values().for_each(|entry| { - entry - .slot_list - .read() - .unwrap() - .iter() - .for_each(|(_slot, account_entry)| { - let storage_entry_meta = stored_sizes_and_counts - .entry(account_entry.store_id) - .or_insert((0, 0)); - storage_entry_meta.0 += account_entry.stored_size; - storage_entry_meta.1 += 1; - }) + fn calculate_storage_count_and_alive_bytes( + &self, + timings: &mut GenerateIndexTimings, + ) -> HashMap { + // look at every account in the account index and calculate for each storage: stored_size and count + let mut storage_size_accounts_map_time = Measure::start("storage_size_accounts_map"); + let mut maps = self + .accounts_index + .account_maps + .par_iter() + .map(|bin_map| { + let mut stored_sizes_and_counts = HashMap::new(); + bin_map.read().unwrap().values().for_each(|entry| { + entry + .slot_list + .read() + .unwrap() + .iter() + .for_each(|(_slot, account_entry)| { + let storage_entry_meta = stored_sizes_and_counts + .entry(account_entry.store_id) + .or_insert((0, 0)); + storage_entry_meta.0 += account_entry.stored_size; + storage_entry_meta.1 += 1; + }) + }); + stored_sizes_and_counts }) - }); + .collect::>(); + storage_size_accounts_map_time.stop(); + timings.storage_size_accounts_map_us = storage_size_accounts_map_time.as_us(); + + // flatten/merge the HashMaps from the parallel iteration above + let mut storage_size_accounts_map_flatten_time = + Measure::start("storage_size_accounts_map_flatten_time"); + let mut stored_sizes_and_counts = maps.pop().unwrap_or_default(); + for map in maps { + for (store_id, meta) in map.into_iter() { + let storage_entry_meta = stored_sizes_and_counts.entry(store_id).or_insert((0, 0)); + storage_entry_meta.0 += meta.0; + storage_entry_meta.1 += meta.1; + } + } + storage_size_accounts_map_flatten_time.stop(); + timings.storage_size_accounts_map_flatten_us = + storage_size_accounts_map_flatten_time.as_us(); + stored_sizes_and_counts + } + + fn set_storage_count_and_alive_bytes( + &self, + stored_sizes_and_counts: HashMap, + timings: &mut GenerateIndexTimings, + ) { + // store count and size for each storage + let mut storage_size_storages_time = Measure::start("storage_size_storages"); for slot_stores in self.storage.0.iter() { for (id, store) in slot_stores.value().read().unwrap().iter() { // Should be default at this point @@ -6047,6 +6109,13 @@ impl AccountsDb { } } } + storage_size_storages_time.stop(); + timings.storage_size_storages_us = storage_size_storages_time.as_us(); + } + + fn initialize_storage_count_and_alive_bytes(&self, timings: &mut GenerateIndexTimings) { + let stored_sizes_and_counts = self.calculate_storage_count_and_alive_bytes(timings); + self.set_storage_count_and_alive_bytes(stored_sizes_and_counts, timings); } pub(crate) fn print_accounts_stats(&self, label: &str) { @@ -11763,6 +11832,87 @@ pub mod tests { assert!(!accounts.is_candidate_for_shrink(&entry)); } + #[test] + fn test_calculate_storage_count_and_alive_bytes() { + let accounts = AccountsDb::new_single(); + let shared_key = solana_sdk::pubkey::new_rand(); + let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner()); + let slot0 = 0; + accounts.store_uncached(slot0, &[(&shared_key, &account)]); + + let result = + accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default()); + assert_eq!(result.len(), 1); + for (k, v) in result.iter() { + assert_eq!((k, v), (&0, &(144, 1))); + } + } + + #[test] + fn test_calculate_storage_count_and_alive_bytes_0_accounts() { + let accounts = AccountsDb::new_single(); + let result = + accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default()); + assert!(result.is_empty()); + } + + #[test] + fn test_calculate_storage_count_and_alive_bytes_2_accounts() { + let accounts = AccountsDb::new_single(); + let keys = [ + solana_sdk::pubkey::Pubkey::new(&[0; 32]), + solana_sdk::pubkey::Pubkey::new(&[255; 32]), + ]; + // make sure accounts are in 2 different bins + assert!( + crate::accounts_index::get_bin_pubkey(&keys[0]) + != crate::accounts_index::get_bin_pubkey(&keys[1]) + ); + let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner()); + let account_big = AccountSharedData::new(1, 1000, AccountSharedData::default().owner()); + let slot0 = 0; + accounts.store_uncached(slot0, &[(&keys[0], &account)]); + accounts.store_uncached(slot0, &[(&keys[1], &account_big)]); + + let result = + accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default()); + assert_eq!(result.len(), 1); + for (k, v) in result.iter() { + assert_eq!((k, v), (&0, &(1280, 2))); + } + } + + #[test] + fn test_set_storage_count_and_alive_bytes() { + let accounts = AccountsDb::new_single(); + + // make sure we have storage 0 + let shared_key = solana_sdk::pubkey::new_rand(); + let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner()); + let slot0 = 0; + accounts.store_uncached(slot0, &[(&shared_key, &account)]); + + // fake out the store count to avoid the assert + for slot_stores in accounts.storage.0.iter() { + for (_id, store) in slot_stores.value().read().unwrap().iter() { + store.alive_bytes.store(0, Ordering::SeqCst); + } + } + + // populate based on made up hash data + let mut hashmap = HashMap::default(); + hashmap.insert(0, (2, 3)); + accounts.set_storage_count_and_alive_bytes(hashmap, &mut GenerateIndexTimings::default()); + assert_eq!(accounts.storage.0.len(), 1); + for slot_stores in accounts.storage.0.iter() { + for (id, store) in slot_stores.value().read().unwrap().iter() { + assert_eq!(id, &0); + assert_eq!(store.count_and_status.read().unwrap().0, 3); + assert_eq!(store.alive_bytes.load(Ordering::SeqCst), 2); + } + } + } + #[test] fn test_purge_alive_unrooted_slots_after_clean() { let accounts = AccountsDb::new_single(); diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index b69ce8c2ae..a75b4956bc 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -593,7 +593,7 @@ pub trait ZeroLamport { fn is_zero_lamport(&self) -> bool; } -fn get_bin_pubkey(pubkey: &Pubkey) -> usize { +pub(crate) fn get_bin_pubkey(pubkey: &Pubkey) -> usize { let byte_of_pubkey_to_bin = 0; // TODO: this should not be 0. For now it needs to be due to requests for in-order pubkeys (pubkey.as_ref()[byte_of_pubkey_to_bin] as usize) * BINS / ((u8::MAX as usize) + 1) }