measure and speed up 2nd half of generate_index (#18503)
* add metrics and speedup storage calcs in generate_index * add tests, refactor * rename map variable * rename another index variable
This commit is contained in:
committed by
GitHub
parent
0f8bcf65af
commit
da9fdd785c
@ -169,6 +169,9 @@ struct GenerateIndexTimings {
|
||||
pub min_bin_size: usize,
|
||||
pub max_bin_size: usize,
|
||||
pub total_items: usize,
|
||||
pub storage_size_accounts_map_us: u64,
|
||||
pub storage_size_storages_us: u64,
|
||||
pub storage_size_accounts_map_flatten_us: u64,
|
||||
}
|
||||
|
||||
impl GenerateIndexTimings {
|
||||
@ -181,7 +184,21 @@ impl GenerateIndexTimings {
|
||||
("insertion_time_us", self.insertion_time_us, i64),
|
||||
("min_bin_size", self.min_bin_size as i64, i64),
|
||||
("max_bin_size", self.max_bin_size as i64, i64),
|
||||
("total_items", self.total_items as i64, i64),
|
||||
(
|
||||
"storage_size_accounts_map_us",
|
||||
self.storage_size_accounts_map_us as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"storage_size_storages_us",
|
||||
self.storage_size_storages_us as i64,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"storage_size_accounts_map_flatten_us",
|
||||
self.storage_size_accounts_map_flatten_us as i64,
|
||||
i64
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -5992,47 +6009,92 @@ impl AccountsDb {
|
||||
.accounts_index
|
||||
.account_maps
|
||||
.iter()
|
||||
.map(|i| {
|
||||
let len = i.read().unwrap().len();
|
||||
.map(|map_bin| {
|
||||
let len = map_bin.read().unwrap().len();
|
||||
min_bin_size = std::cmp::min(min_bin_size, len);
|
||||
max_bin_size = std::cmp::max(max_bin_size, len);
|
||||
len
|
||||
})
|
||||
.sum();
|
||||
|
||||
let timings = GenerateIndexTimings {
|
||||
let mut timings = GenerateIndexTimings {
|
||||
scan_time,
|
||||
index_time: index_time.as_us(),
|
||||
insertion_time_us: insertion_time_us.load(Ordering::Relaxed),
|
||||
min_bin_size,
|
||||
max_bin_size,
|
||||
total_items,
|
||||
..GenerateIndexTimings::default()
|
||||
};
|
||||
|
||||
if pass == 0 {
|
||||
// Need to add these last, otherwise older updates will be cleaned
|
||||
for slot in &slots {
|
||||
self.accounts_index.add_root(*slot, false);
|
||||
}
|
||||
|
||||
self.initialize_storage_count_and_alive_bytes(&mut timings);
|
||||
}
|
||||
timings.report();
|
||||
}
|
||||
}
|
||||
|
||||
// Need to add these last, otherwise older updates will be cleaned
|
||||
for slot in slots {
|
||||
self.accounts_index.add_root(slot, false);
|
||||
}
|
||||
|
||||
let mut stored_sizes_and_counts = HashMap::new();
|
||||
self.accounts_index.account_maps.iter().for_each(|i| {
|
||||
i.read().unwrap().values().for_each(|entry| {
|
||||
entry
|
||||
.slot_list
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.for_each(|(_slot, account_entry)| {
|
||||
let storage_entry_meta = stored_sizes_and_counts
|
||||
.entry(account_entry.store_id)
|
||||
.or_insert((0, 0));
|
||||
storage_entry_meta.0 += account_entry.stored_size;
|
||||
storage_entry_meta.1 += 1;
|
||||
})
|
||||
fn calculate_storage_count_and_alive_bytes(
|
||||
&self,
|
||||
timings: &mut GenerateIndexTimings,
|
||||
) -> HashMap<usize, (usize, usize)> {
|
||||
// look at every account in the account index and calculate for each storage: stored_size and count
|
||||
let mut storage_size_accounts_map_time = Measure::start("storage_size_accounts_map");
|
||||
let mut maps = self
|
||||
.accounts_index
|
||||
.account_maps
|
||||
.par_iter()
|
||||
.map(|bin_map| {
|
||||
let mut stored_sizes_and_counts = HashMap::new();
|
||||
bin_map.read().unwrap().values().for_each(|entry| {
|
||||
entry
|
||||
.slot_list
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.for_each(|(_slot, account_entry)| {
|
||||
let storage_entry_meta = stored_sizes_and_counts
|
||||
.entry(account_entry.store_id)
|
||||
.or_insert((0, 0));
|
||||
storage_entry_meta.0 += account_entry.stored_size;
|
||||
storage_entry_meta.1 += 1;
|
||||
})
|
||||
});
|
||||
stored_sizes_and_counts
|
||||
})
|
||||
});
|
||||
.collect::<Vec<_>>();
|
||||
storage_size_accounts_map_time.stop();
|
||||
timings.storage_size_accounts_map_us = storage_size_accounts_map_time.as_us();
|
||||
|
||||
// flatten/merge the HashMaps from the parallel iteration above
|
||||
let mut storage_size_accounts_map_flatten_time =
|
||||
Measure::start("storage_size_accounts_map_flatten_time");
|
||||
let mut stored_sizes_and_counts = maps.pop().unwrap_or_default();
|
||||
for map in maps {
|
||||
for (store_id, meta) in map.into_iter() {
|
||||
let storage_entry_meta = stored_sizes_and_counts.entry(store_id).or_insert((0, 0));
|
||||
storage_entry_meta.0 += meta.0;
|
||||
storage_entry_meta.1 += meta.1;
|
||||
}
|
||||
}
|
||||
storage_size_accounts_map_flatten_time.stop();
|
||||
timings.storage_size_accounts_map_flatten_us =
|
||||
storage_size_accounts_map_flatten_time.as_us();
|
||||
stored_sizes_and_counts
|
||||
}
|
||||
|
||||
fn set_storage_count_and_alive_bytes(
|
||||
&self,
|
||||
stored_sizes_and_counts: HashMap<usize, (usize, usize)>,
|
||||
timings: &mut GenerateIndexTimings,
|
||||
) {
|
||||
// store count and size for each storage
|
||||
let mut storage_size_storages_time = Measure::start("storage_size_storages");
|
||||
for slot_stores in self.storage.0.iter() {
|
||||
for (id, store) in slot_stores.value().read().unwrap().iter() {
|
||||
// Should be default at this point
|
||||
@ -6047,6 +6109,13 @@ impl AccountsDb {
|
||||
}
|
||||
}
|
||||
}
|
||||
storage_size_storages_time.stop();
|
||||
timings.storage_size_storages_us = storage_size_storages_time.as_us();
|
||||
}
|
||||
|
||||
fn initialize_storage_count_and_alive_bytes(&self, timings: &mut GenerateIndexTimings) {
|
||||
let stored_sizes_and_counts = self.calculate_storage_count_and_alive_bytes(timings);
|
||||
self.set_storage_count_and_alive_bytes(stored_sizes_and_counts, timings);
|
||||
}
|
||||
|
||||
pub(crate) fn print_accounts_stats(&self, label: &str) {
|
||||
@ -11763,6 +11832,87 @@ pub mod tests {
|
||||
assert!(!accounts.is_candidate_for_shrink(&entry));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_storage_count_and_alive_bytes() {
|
||||
let accounts = AccountsDb::new_single();
|
||||
let shared_key = solana_sdk::pubkey::new_rand();
|
||||
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
|
||||
let slot0 = 0;
|
||||
accounts.store_uncached(slot0, &[(&shared_key, &account)]);
|
||||
|
||||
let result =
|
||||
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
|
||||
assert_eq!(result.len(), 1);
|
||||
for (k, v) in result.iter() {
|
||||
assert_eq!((k, v), (&0, &(144, 1)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_storage_count_and_alive_bytes_0_accounts() {
|
||||
let accounts = AccountsDb::new_single();
|
||||
let result =
|
||||
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_storage_count_and_alive_bytes_2_accounts() {
|
||||
let accounts = AccountsDb::new_single();
|
||||
let keys = [
|
||||
solana_sdk::pubkey::Pubkey::new(&[0; 32]),
|
||||
solana_sdk::pubkey::Pubkey::new(&[255; 32]),
|
||||
];
|
||||
// make sure accounts are in 2 different bins
|
||||
assert!(
|
||||
crate::accounts_index::get_bin_pubkey(&keys[0])
|
||||
!= crate::accounts_index::get_bin_pubkey(&keys[1])
|
||||
);
|
||||
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
|
||||
let account_big = AccountSharedData::new(1, 1000, AccountSharedData::default().owner());
|
||||
let slot0 = 0;
|
||||
accounts.store_uncached(slot0, &[(&keys[0], &account)]);
|
||||
accounts.store_uncached(slot0, &[(&keys[1], &account_big)]);
|
||||
|
||||
let result =
|
||||
accounts.calculate_storage_count_and_alive_bytes(&mut GenerateIndexTimings::default());
|
||||
assert_eq!(result.len(), 1);
|
||||
for (k, v) in result.iter() {
|
||||
assert_eq!((k, v), (&0, &(1280, 2)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_storage_count_and_alive_bytes() {
|
||||
let accounts = AccountsDb::new_single();
|
||||
|
||||
// make sure we have storage 0
|
||||
let shared_key = solana_sdk::pubkey::new_rand();
|
||||
let account = AccountSharedData::new(1, 1, AccountSharedData::default().owner());
|
||||
let slot0 = 0;
|
||||
accounts.store_uncached(slot0, &[(&shared_key, &account)]);
|
||||
|
||||
// fake out the store count to avoid the assert
|
||||
for slot_stores in accounts.storage.0.iter() {
|
||||
for (_id, store) in slot_stores.value().read().unwrap().iter() {
|
||||
store.alive_bytes.store(0, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
// populate based on made up hash data
|
||||
let mut hashmap = HashMap::default();
|
||||
hashmap.insert(0, (2, 3));
|
||||
accounts.set_storage_count_and_alive_bytes(hashmap, &mut GenerateIndexTimings::default());
|
||||
assert_eq!(accounts.storage.0.len(), 1);
|
||||
for slot_stores in accounts.storage.0.iter() {
|
||||
for (id, store) in slot_stores.value().read().unwrap().iter() {
|
||||
assert_eq!(id, &0);
|
||||
assert_eq!(store.count_and_status.read().unwrap().0, 3);
|
||||
assert_eq!(store.alive_bytes.load(Ordering::SeqCst), 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_purge_alive_unrooted_slots_after_clean() {
|
||||
let accounts = AccountsDb::new_single();
|
||||
|
@ -593,7 +593,7 @@ pub trait ZeroLamport {
|
||||
fn is_zero_lamport(&self) -> bool;
|
||||
}
|
||||
|
||||
fn get_bin_pubkey(pubkey: &Pubkey) -> usize {
|
||||
pub(crate) fn get_bin_pubkey(pubkey: &Pubkey) -> usize {
|
||||
let byte_of_pubkey_to_bin = 0; // TODO: this should not be 0. For now it needs to be due to requests for in-order pubkeys
|
||||
(pubkey.as_ref()[byte_of_pubkey_to_bin] as usize) * BINS / ((u8::MAX as usize) + 1)
|
||||
}
|
||||
|
Reference in New Issue
Block a user