measure and speed up 2nd half of generate_index (#18503)

* add metrics and speedup storage calcs in generate_index

* add tests, refactor

* rename map variable

* rename another index variable
This commit is contained in:
Jeff Washington (jwash)
2021-07-15 10:26:00 -05:00
committed by GitHub
parent 0f8bcf65af
commit da9fdd785c
2 changed files with 176 additions and 26 deletions

View File

@ -169,6 +169,9 @@ struct GenerateIndexTimings {
pub min_bin_size: usize, pub min_bin_size: usize,
pub max_bin_size: usize, pub max_bin_size: usize,
pub total_items: usize, pub total_items: usize,
pub storage_size_accounts_map_us: u64,
pub storage_size_storages_us: u64,
pub storage_size_accounts_map_flatten_us: u64,
} }
impl GenerateIndexTimings { impl GenerateIndexTimings {
@ -181,7 +184,21 @@ impl GenerateIndexTimings {
("insertion_time_us", self.insertion_time_us, i64), ("insertion_time_us", self.insertion_time_us, i64),
("min_bin_size", self.min_bin_size as i64, i64), ("min_bin_size", self.min_bin_size as i64, i64),
("max_bin_size", self.max_bin_size as i64, i64), ("max_bin_size", self.max_bin_size as i64, i64),
("total_items", self.total_items as i64, i64), (
"storage_size_accounts_map_us",
self.storage_size_accounts_map_us as i64,
i64
),
(
"storage_size_storages_us",
self.storage_size_storages_us as i64,
i64
),
(
"storage_size_accounts_map_flatten_us",
self.storage_size_accounts_map_flatten_us as i64,
i64
),
); );
} }
} }
@ -5992,47 +6009,92 @@ impl AccountsDb {
.accounts_index .accounts_index
.account_maps .account_maps
.iter() .iter()
.map(|i| { .map(|map_bin| {
let len = i.read().unwrap().len(); let len = map_bin.read().unwrap().len();
min_bin_size = std::cmp::min(min_bin_size, len); min_bin_size = std::cmp::min(min_bin_size, len);
max_bin_size = std::cmp::max(max_bin_size, len); max_bin_size = std::cmp::max(max_bin_size, len);
len len
}) })
.sum(); .sum();
let timings = GenerateIndexTimings { let mut timings = GenerateIndexTimings {
scan_time, scan_time,
index_time: index_time.as_us(), index_time: index_time.as_us(),
insertion_time_us: insertion_time_us.load(Ordering::Relaxed), insertion_time_us: insertion_time_us.load(Ordering::Relaxed),
min_bin_size, min_bin_size,
max_bin_size, max_bin_size,
total_items, total_items,
..GenerateIndexTimings::default()
}; };
if pass == 0 {
// Need to add these last, otherwise older updates will be cleaned
for slot in &slots {
self.accounts_index.add_root(*slot, false);
}
self.initialize_storage_count_and_alive_bytes(&mut timings);
}
timings.report(); timings.report();
} }
}
// Need to add these last, otherwise older updates will be cleaned fn calculate_storage_count_and_alive_bytes(
for slot in slots { &self,
self.accounts_index.add_root(slot, false); timings: &mut GenerateIndexTimings,
} ) -> HashMap<usize, (usize, usize)> {
// look at every account in the account index and calculate for each storage: stored_size and count
let mut stored_sizes_and_counts = HashMap::new(); let mut storage_size_accounts_map_time = Measure::start("storage_size_accounts_map");
self.accounts_index.account_maps.iter().for_each(|i| { let mut maps = self
i.read().unwrap().values().for_each(|entry| { .accounts_index
entry .account_maps
.slot_list .par_iter()
.read() .map(|bin_map| {
.unwrap() let mut stored_sizes_and_counts = HashMap::new();
.iter() bin_map.read().unwrap().values().for_each(|entry| {
.for_each(|(_slot, account_entry)| { entry
let storage_entry_meta = stored_sizes_and_counts .slot_list
.entry(account_entry.store_id) .read()
.or_insert((0, 0)); .unwrap()
storage_entry_meta.0 += account_entry.stored_size; .iter()
storage_entry_meta.1 += 1; .for_each(|(_slot, account_entry)| {
}) let storage_entry_meta = stored_sizes_and_counts
.entry(account_entry.store_id)
.or_insert((0, 0));
storage_entry_meta.0 += account_entry.stored_size;
storage_entry_meta.1 += 1;
})
});
stored_sizes_and_counts
}) })
}); .collect::<Vec<_>>();
storage_size_accounts_map_time.stop();
timings.storage_size_accounts_map_us = storage_size_accounts_map_time.as_us();
// flatten/merge the HashMaps from the parallel iteration above
let mut storage_size_accounts_map_flatten_time =
Measure::start("storage_size_accounts_map_flatten_time");
let mut stored_sizes_and_counts = maps.pop().unwrap_or_default();
for map in maps {
for (store_id, meta) in map.into_iter() {
let storage_entry_meta = stored_sizes_and_counts.entry(store_id).or_insert((0, 0));
storage_entry_meta.0 += meta.0;
storage_entry_meta.1 += meta.1;
}
}
storage_size_accounts_map_flatten_time.stop();
timings.storage_size_accounts_map_flatten_us =
storage_size_accounts_map_flatten_time.as_us();
stored_sizes_and_counts
}
fn set_storage_count_and_alive_bytes(
&self,
stored_sizes_and_counts: HashMap<usize, (usize, usize)>,
timings: &mut GenerateIndexTimings,
) {
// store count and size for each storage
let mut storage_size_storages_time = Measure::start("storage_size_storages");
for slot_stores in self.storage.0.iter() { for slot_stores in self.storage.0.iter() {
for (id, store) in slot_stores.value().read().unwrap().iter() { for (id, store) in slot_stores.value().read().unwrap().iter() {
// Should be default at this point // Should be default at this point
@ -6047,6 +6109,13 @@ impl AccountsDb {
} }
} }
} }
storage_size_storages_time.stop();
timings.storage_size_storages_us = storage_size_storages_time.as_us();
}
fn initialize_storage_count_and_alive_bytes(&self, timings: &mut GenerateIndexTimings) {
let stored_sizes_and_counts = self.calculate_storage_count_and_alive_bytes(timings);
self.set_storage_count_and_alive_bytes(stored_sizes_and_counts, timings);
} }
pub(crate) fn print_accounts_stats(&self, label: &str) { pub(crate) fn print_accounts_stats(&self, label: &str) {
@ -11763,6 +11832,87 @@ pub mod tests {
assert!(!accounts.is_candidate_for_shrink(&entry)); assert!(!accounts.is_candidate_for_shrink(&entry));
} }
#[test]
fn test_calculate_storage_count_and_alive_bytes() {
let accounts = AccountsDb::new_single();
let shared_key = solana_sdk::pubkey::new_rand();
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
let slot0 = 0;
accounts.store_uncached(slot0, &[(&shared_key, &account)]);
let result =
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
assert_eq!(result.len(), 1);
for (k, v) in result.iter() {
assert_eq!((k, v), (&0, &(144, 1)));
}
}
#[test]
fn test_calculate_storage_count_and_alive_bytes_0_accounts() {
let accounts = AccountsDb::new_single();
let result =
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
assert!(result.is_empty());
}
#[test]
fn test_calculate_storage_count_and_alive_bytes_2_accounts() {
let accounts = AccountsDb::new_single();
let keys = [
solana_sdk::pubkey::Pubkey::new(&[0; 32]),
solana_sdk::pubkey::Pubkey::new(&[255; 32]),
];
// make sure accounts are in 2 different bins
assert!(
crate::accounts_index::get_bin_pubkey(&keys[0])
!= crate::accounts_index::get_bin_pubkey(&keys[1])
);
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
let account_big = AccountSharedData::new(1, 1000, AccountSharedData::default().owner());
let slot0 = 0;
accounts.store_uncached(slot0, &[(&keys[0], &account)]);
accounts.store_uncached(slot0, &[(&keys[1], &account_big)]);
let result =
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
assert_eq!(result.len(), 1);
for (k, v) in result.iter() {
assert_eq!((k, v), (&0, &(1280, 2)));
}
}
#[test]
fn test_set_storage_count_and_alive_bytes() {
let accounts = AccountsDb::new_single();
// make sure we have storage 0
let shared_key = solana_sdk::pubkey::new_rand();
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
let slot0 = 0;
accounts.store_uncached(slot0, &[(&shared_key, &account)]);
// fake out the store count to avoid the assert
for slot_stores in accounts.storage.0.iter() {
for (_id, store) in slot_stores.value().read().unwrap().iter() {
store.alive_bytes.store(0, Ordering::SeqCst);
}
}
// populate based on made up hash data
let mut hashmap = HashMap::default();
hashmap.insert(0, (2, 3));
accounts.set_storage_count_and_alive_bytes(hashmap, &mut GenerateIndexTimings::default());
assert_eq!(accounts.storage.0.len(), 1);
for slot_stores in accounts.storage.0.iter() {
for (id, store) in slot_stores.value().read().unwrap().iter() {
assert_eq!(id, &0);
assert_eq!(store.count_and_status.read().unwrap().0, 3);
assert_eq!(store.alive_bytes.load(Ordering::SeqCst), 2);
}
}
}
#[test] #[test]
fn test_purge_alive_unrooted_slots_after_clean() { fn test_purge_alive_unrooted_slots_after_clean() {
let accounts = AccountsDb::new_single(); let accounts = AccountsDb::new_single();

View File

@ -593,7 +593,7 @@ pub trait ZeroLamport {
fn is_zero_lamport(&self) -> bool; fn is_zero_lamport(&self) -> bool;
} }
fn get_bin_pubkey(pubkey: &Pubkey) -> usize { pub(crate) fn get_bin_pubkey(pubkey: &Pubkey) -> usize {
let byte_of_pubkey_to_bin = 0; // TODO: this should not be 0. For now it needs to be due to requests for in-order pubkeys let byte_of_pubkey_to_bin = 0; // TODO: this should not be 0. For now it needs to be due to requests for in-order pubkeys
(pubkey.as_ref()[byte_of_pubkey_to_bin] as usize) * BINS / ((u8::MAX as usize) + 1) (pubkey.as_ref()[byte_of_pubkey_to_bin] as usize) * BINS / ((u8::MAX as usize) + 1)
} }