From 239ab8799c40b2fc9f0081c2e593b6ef4e41e449 Mon Sep 17 00:00:00 2001 From: carllin Date: Wed, 12 May 2021 15:29:30 -0700 Subject: [PATCH] Remove bloat from secondary indexes (#17048) --- runtime/src/accounts_db.rs | 26 +-- runtime/src/accounts_index.rs | 352 ++++++++++++--------------------- runtime/src/secondary_index.rs | 341 ++++++++++++-------------------- 3 files changed, 259 insertions(+), 460 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 41a1343694..b07b40e4f4 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -1335,7 +1335,6 @@ impl AccountsDb { &pubkey, &mut reclaims, max_clean_root, - &self.account_indexes, ); } reclaims @@ -1476,12 +1475,9 @@ impl AccountsDb { let mut dead_keys = Vec::new(); for (pubkey, slots_set) in pubkey_to_slot_set { - let is_empty = self.accounts_index.purge_exact( - &pubkey, - slots_set, - &mut reclaims, - &self.account_indexes, - ); + let is_empty = self + .accounts_index + .purge_exact(&pubkey, slots_set, &mut reclaims); if is_empty { dead_keys.push(pubkey); } @@ -3264,22 +3260,14 @@ impl AccountsDb { match scan_result { ScanStorageResult::Cached(cached_keys) => { for pubkey in cached_keys.iter() { - self.accounts_index.purge_exact( - pubkey, - &purge_slot, - &mut reclaims, - &self.account_indexes, - ); + self.accounts_index + .purge_exact(pubkey, &purge_slot, &mut reclaims); } } ScanStorageResult::Stored(stored_keys) => { for set_ref in stored_keys.iter() { - self.accounts_index.purge_exact( - set_ref.key(), - &purge_slot, - &mut reclaims, - &self.account_indexes, - ); + self.accounts_index + .purge_exact(set_ref.key(), &purge_slot, &mut reclaims); } } } diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index 633e0aac77..2b29e374be 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -492,7 +492,7 @@ pub trait ZeroLamport { fn is_zero_lamport(&self) -> bool; } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct AccountsIndex { pub account_maps: RwLock>>, program_id_index: SecondaryIndex, @@ -503,6 +503,26 @@ pub struct AccountsIndex { zero_lamport_pubkeys: DashSet, } +impl Default for AccountsIndex { + fn default() -> Self { + Self { + account_maps: RwLock::>>::default(), + program_id_index: SecondaryIndex::::new( + "program_id_index_stats", + ), + spl_token_mint_index: SecondaryIndex::::new( + "spl_token_mint_index_stats", + ), + spl_token_owner_index: SecondaryIndex::::new( + "spl_token_owner_index_stats", + ), + roots_tracker: RwLock::::default(), + ongoing_scan_roots: RwLock::>::default(), + zero_lamport_pubkeys: DashSet::::default(), + } + } +} + impl AccountsIndex { fn iter(&self, range: Option) -> AccountsIndexIterator where @@ -858,15 +878,10 @@ impl AccountsIndex { if index_entry.get().slot_list.read().unwrap().is_empty() { index_entry.remove(); - // Note passing `None` to remove all the entries for this key - // is only safe because we have the lock for this key's entry - // in the AccountsIndex, so no other thread is also updating - // the index - self.purge_secondary_indexes_by_inner_key( - key, - None::<&Slot>, - account_indexes, - ); + // Note it's only safe to remove all the entries for this key + // because we have the lock for this key's entry in the AccountsIndex, + // so no other thread is also updating the index + self.purge_secondary_indexes_by_inner_key(key, account_indexes); } } } @@ -954,28 +969,23 @@ impl AccountsIndex { pubkey: &Pubkey, slots_to_purge: &'a C, reclaims: &mut SlotList, - account_indexes: &AccountSecondaryIndexes, ) -> bool where C: Contains<'a, Slot>, { - let is_empty = { - let mut write_account_map_entry = self.get_account_write_entry(pubkey).unwrap(); - write_account_map_entry.slot_list_mut(|slot_list| { - slot_list.retain(|(slot, item)| { - let should_purge = slots_to_purge.contains(&slot); - if should_purge { - reclaims.push((*slot, item.clone())); - false - } else { - true - } - }); - slot_list.is_empty() - }) - }; - self.purge_secondary_indexes_by_inner_key(pubkey, Some(slots_to_purge), account_indexes); - is_empty + let mut write_account_map_entry = self.get_account_write_entry(pubkey).unwrap(); + write_account_map_entry.slot_list_mut(|slot_list| { + slot_list.retain(|(slot, item)| { + let should_purge = slots_to_purge.contains(&slot); + if should_purge { + reclaims.push((*slot, item.clone())); + false + } else { + true + } + }); + slot_list.is_empty() + }) } pub fn min_ongoing_scan_root(&self) -> Option { @@ -1079,7 +1089,6 @@ impl AccountsIndex { fn update_secondary_indexes( &self, pubkey: &Pubkey, - slot: Slot, account_owner: &Pubkey, account_data: &[u8], account_indexes: &AccountSecondaryIndexes, @@ -1091,7 +1100,7 @@ impl AccountsIndex { if account_indexes.contains(&AccountIndex::ProgramId) && account_indexes.include_key(account_owner) { - self.program_id_index.insert(account_owner, pubkey, slot); + self.program_id_index.insert(account_owner, pubkey); } // Note because of the below check below on the account data length, when an // account hits zero lamports and is reset to AccountSharedData::Default, then we skip @@ -1115,7 +1124,7 @@ impl AccountsIndex { ..SPL_TOKEN_ACCOUNT_OWNER_OFFSET + PUBKEY_BYTES], ); if account_indexes.include_key(&owner_key) { - self.spl_token_owner_index.insert(&owner_key, pubkey, slot); + self.spl_token_owner_index.insert(&owner_key, pubkey); } } @@ -1125,7 +1134,7 @@ impl AccountsIndex { ..SPL_TOKEN_ACCOUNT_MINT_OFFSET + PUBKEY_BYTES], ); if account_indexes.include_key(&mint_key) { - self.spl_token_mint_index.insert(&mint_key, pubkey, slot); + self.spl_token_mint_index.insert(&mint_key, pubkey); } } } @@ -1151,7 +1160,7 @@ impl AccountsIndex { } w_account_entry.update(slot, account_info, reclaims); } - self.update_secondary_indexes(pubkey, slot, account_owner, account_data, account_indexes); + self.update_secondary_indexes(pubkey, account_owner, account_data, account_indexes); } // Updates the given pubkey at the given slot with the new account information. @@ -1187,7 +1196,7 @@ impl AccountsIndex { w_account_entry.update(slot, account_info, reclaims); is_newly_inserted }; - self.update_secondary_indexes(pubkey, slot, account_owner, account_data, account_indexes); + self.update_secondary_indexes(pubkey, account_owner, account_data, account_indexes); is_newly_inserted } @@ -1213,37 +1222,29 @@ impl AccountsIndex { } } - fn purge_secondary_indexes_by_inner_key<'a, C>( + fn purge_secondary_indexes_by_inner_key<'a>( &'a self, inner_key: &Pubkey, - slots_to_remove: Option<&'a C>, account_indexes: &AccountSecondaryIndexes, - ) where - C: Contains<'a, Slot>, - { + ) { if account_indexes.contains(&AccountIndex::ProgramId) { - self.program_id_index - .remove_by_inner_key(inner_key, slots_to_remove); + self.program_id_index.remove_by_inner_key(inner_key); } if account_indexes.contains(&AccountIndex::SplTokenOwner) { - self.spl_token_owner_index - .remove_by_inner_key(inner_key, slots_to_remove); + self.spl_token_owner_index.remove_by_inner_key(inner_key); } if account_indexes.contains(&AccountIndex::SplTokenMint) { - self.spl_token_mint_index - .remove_by_inner_key(inner_key, slots_to_remove); + self.spl_token_mint_index.remove_by_inner_key(inner_key); } } fn purge_older_root_entries( &self, - pubkey: &Pubkey, slot_list: &mut SlotList, reclaims: &mut SlotList, max_clean_root: Option, - account_indexes: &AccountSecondaryIndexes, ) { let roots_tracker = &self.roots_tracker.read().unwrap(); let newest_root_in_slot_list = @@ -1261,8 +1262,6 @@ impl AccountsIndex { } !should_purge }); - - self.purge_secondary_indexes_by_inner_key(pubkey, Some(&purged_slots), account_indexes); } pub fn clean_rooted_entries( @@ -1270,18 +1269,11 @@ impl AccountsIndex { pubkey: &Pubkey, reclaims: &mut SlotList, max_clean_root: Option, - account_indexes: &AccountSecondaryIndexes, ) { let mut is_slot_list_empty = false; if let Some(mut locked_entry) = self.get_account_write_entry(pubkey) { locked_entry.slot_list_mut(|slot_list| { - self.purge_older_root_entries( - pubkey, - slot_list, - reclaims, - max_clean_root, - account_indexes, - ); + self.purge_older_root_entries(slot_list, reclaims, max_clean_root); is_slot_list_empty = slot_list.is_empty(); }); } @@ -2755,7 +2747,7 @@ pub mod tests { secondary_index: &SecondaryIndex, key_start: usize, key_end: usize, - account_index: &AccountSecondaryIndexes, + secondary_indexes: &AccountSecondaryIndexes, ) { // No roots, should be no reclaims let slots = vec![1, 2, 5, 9]; @@ -2773,7 +2765,7 @@ pub mod tests { // Make sure these accounts are added to secondary index &inline_spl_token_v2_0::id(), &account_data, - account_index, + secondary_indexes, true, &mut vec![], ); @@ -2793,43 +2785,43 @@ pub mod tests { .read() .unwrap() .len(), - slots.len() + 1 ); index.purge_exact( &account_key, &slots.into_iter().collect::>(), &mut vec![], - account_index, ); + index.handle_dead_keys(&[&account_key], secondary_indexes); assert!(secondary_index.index.is_empty()); assert!(secondary_index.reverse_index.is_empty()); } #[test] fn test_purge_exact_dashmap_secondary_index() { - let (key_start, key_end, account_index) = create_dashmap_secondary_index_state(); + let (key_start, key_end, secondary_indexes) = create_dashmap_secondary_index_state(); let index = AccountsIndex::::default(); run_test_purge_exact_secondary_index( &index, &index.spl_token_mint_index, key_start, key_end, - &account_index, + &secondary_indexes, ); } #[test] fn test_purge_exact_rwlock_secondary_index() { - let (key_start, key_end, account_index) = create_rwlock_secondary_index_state(); + let (key_start, key_end, secondary_indexes) = create_rwlock_secondary_index_state(); let index = AccountsIndex::::default(); run_test_purge_exact_secondary_index( &index, &index.spl_token_owner_index, key_start, key_end, - &account_index, + &secondary_indexes, ); } @@ -2839,13 +2831,7 @@ pub mod tests { let index = AccountsIndex::::default(); let mut slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; let mut reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - None, - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, None); assert!(reclaims.is_empty()); assert_eq!(slot_list, vec![(1, true), (2, true), (5, true), (9, true)]); @@ -2855,13 +2841,7 @@ pub mod tests { // Note 2 is not a root index.add_root(5, false); reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - None, - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, None); assert_eq!(reclaims, vec![(1, true), (2, true)]); assert_eq!(slot_list, vec![(5, true), (9, true)]); @@ -2869,13 +2849,7 @@ pub mod tests { slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; index.add_root(6, false); reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - None, - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, None); assert_eq!(reclaims, vec![(1, true), (2, true)]); assert_eq!(slot_list, vec![(5, true), (9, true)]); @@ -2883,26 +2857,14 @@ pub mod tests { // outcome slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - Some(6), - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, Some(6)); assert_eq!(reclaims, vec![(1, true), (2, true)]); assert_eq!(slot_list, vec![(5, true), (9, true)]); // Pass a max root, earlier slots should be reclaimed slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - Some(5), - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, Some(5)); assert_eq!(reclaims, vec![(1, true), (2, true)]); assert_eq!(slot_list, vec![(5, true), (9, true)]); @@ -2910,13 +2872,7 @@ pub mod tests { // so nothing will be purged slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - Some(2), - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, Some(2)); assert!(reclaims.is_empty()); assert_eq!(slot_list, vec![(1, true), (2, true), (5, true), (9, true)]); @@ -2924,13 +2880,7 @@ pub mod tests { // so nothing will be purged slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - Some(1), - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, Some(1)); assert!(reclaims.is_empty()); assert_eq!(slot_list, vec![(1, true), (2, true), (5, true), (9, true)]); @@ -2938,41 +2888,32 @@ pub mod tests { // some of the roots in the list, shouldn't return those smaller roots slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; reclaims = vec![]; - index.purge_older_root_entries( - &Pubkey::default(), - &mut slot_list, - &mut reclaims, - Some(7), - &AccountSecondaryIndexes::default(), - ); + index.purge_older_root_entries(&mut slot_list, &mut reclaims, Some(7)); assert_eq!(reclaims, vec![(1, true), (2, true)]); assert_eq!(slot_list, vec![(5, true), (9, true)]); } - fn check_secondary_index_unique( + fn check_secondary_index_mapping_correct( secondary_index: &SecondaryIndex, - slot: Slot, - key: &Pubkey, + secondary_index_keys: &[Pubkey], account_key: &Pubkey, ) where SecondaryIndexEntryType: SecondaryIndexEntry + Default + Sync + Send, { // Check secondary index has unique mapping from secondary index key // to the account key and slot - assert_eq!(secondary_index.index.len(), 1); - let inner_key_map = secondary_index.index.get(key).unwrap(); - assert_eq!(inner_key_map.len(), 1); - inner_key_map - .value() - .get(account_key, &|slots_map: Option<&RwLock>>| { - let slots_map = slots_map.unwrap(); - assert_eq!(slots_map.read().unwrap().len(), 1); - assert!(slots_map.read().unwrap().contains(&slot)); - }); - - // Check reverse index is unique - let slots_map = secondary_index.reverse_index.get(account_key).unwrap(); - assert_eq!(slots_map.value().read().unwrap().get(&slot).unwrap(), key); + for secondary_index_key in secondary_index_keys { + assert_eq!(secondary_index.index.len(), secondary_index_keys.len()); + let account_key_map = secondary_index.get(secondary_index_key); + assert_eq!(account_key_map.len(), 1); + assert_eq!(account_key_map, vec![*account_key]); + } + // Check reverse index contains all of the `secondary_index_keys` + let secondary_index_key_map = secondary_index.reverse_index.get(account_key).unwrap(); + assert_eq!( + &*secondary_index_key_map.value().read().unwrap(), + secondary_index_keys + ); } fn run_test_secondary_indexes< @@ -2982,12 +2923,11 @@ pub mod tests { secondary_index: &SecondaryIndex, key_start: usize, key_end: usize, - account_index: &AccountSecondaryIndexes, + secondary_indexes: &AccountSecondaryIndexes, ) { - let mut account_index = account_index.clone(); + let mut secondary_indexes = secondary_indexes.clone(); let account_key = Pubkey::new_unique(); let index_key = Pubkey::new_unique(); - let slot = 1; let mut account_data = vec![0; inline_spl_token_v2_0::state::Account::get_packed_len()]; account_data[key_start..key_end].clone_from_slice(&(index_key.to_bytes())); @@ -2997,7 +2937,7 @@ pub mod tests { &account_key, &Pubkey::default(), &account_data, - &account_index, + &secondary_indexes, true, &mut vec![], ); @@ -3010,66 +2950,31 @@ pub mod tests { &account_key, &inline_spl_token_v2_0::id(), &account_data[1..], - &account_index, + &secondary_indexes, true, &mut vec![], ); assert!(secondary_index.index.is_empty()); assert!(secondary_index.reverse_index.is_empty()); - // excluded key - account_index.keys = Some(AccountSecondaryIndexesIncludeExclude { - keys: [index_key].iter().cloned().collect::>(), - exclude: true, - }); - index.upsert( - 0, - &account_key, - &inline_spl_token_v2_0::id(), - &account_data[1..], - &account_index, - true, - &mut vec![], - ); - assert!(secondary_index.index.is_empty()); - assert!(secondary_index.reverse_index.is_empty()); - - // not-included key - account_index.keys = Some(AccountSecondaryIndexesIncludeExclude { - keys: [account_key].iter().cloned().collect::>(), - exclude: false, - }); - index.upsert( - 0, - &account_key, - &inline_spl_token_v2_0::id(), - &account_data[1..], - &account_index, - true, - &mut vec![], - ); - assert!(secondary_index.index.is_empty()); - assert!(secondary_index.reverse_index.is_empty()); - - account_index.keys = None; + secondary_indexes.keys = None; // Just right. Inserting the same index multiple times should be ok for _ in 0..2 { index.update_secondary_indexes( &account_key, - slot, &inline_spl_token_v2_0::id(), &account_data, - &account_index, + &secondary_indexes, ); - check_secondary_index_unique(secondary_index, slot, &index_key, &account_key); + check_secondary_index_mapping_correct(secondary_index, &[index_key], &account_key); } // included assert!(!secondary_index.index.is_empty()); assert!(!secondary_index.reverse_index.is_empty()); - account_index.keys = Some(AccountSecondaryIndexesIncludeExclude { + secondary_indexes.keys = Some(AccountSecondaryIndexesIncludeExclude { keys: [index_key].iter().cloned().collect::>(), exclude: false, }); @@ -3077,17 +2982,16 @@ pub mod tests { secondary_index.reverse_index.clear(); index.update_secondary_indexes( &account_key, - slot, &inline_spl_token_v2_0::id(), &account_data, - &account_index, + &secondary_indexes, ); assert!(!secondary_index.index.is_empty()); assert!(!secondary_index.reverse_index.is_empty()); - check_secondary_index_unique(secondary_index, slot, &index_key, &account_key); + check_secondary_index_mapping_correct(secondary_index, &[index_key], &account_key); // not-excluded - account_index.keys = Some(AccountSecondaryIndexesIncludeExclude { + secondary_indexes.keys = Some(AccountSecondaryIndexesIncludeExclude { keys: [].iter().cloned().collect::>(), exclude: true, }); @@ -3095,16 +2999,15 @@ pub mod tests { secondary_index.reverse_index.clear(); index.update_secondary_indexes( &account_key, - slot, &inline_spl_token_v2_0::id(), &account_data, - &account_index, + &secondary_indexes, ); assert!(!secondary_index.index.is_empty()); assert!(!secondary_index.reverse_index.is_empty()); - check_secondary_index_unique(secondary_index, slot, &index_key, &account_key); + check_secondary_index_mapping_correct(secondary_index, &[index_key], &account_key); - account_index.keys = None; + secondary_indexes.keys = None; index .get_account_write_entry(&account_key) @@ -3112,34 +3015,34 @@ pub mod tests { .slot_list_mut(|slot_list| slot_list.clear()); // Everything should be deleted - index.handle_dead_keys(&[&account_key], &account_index); + index.handle_dead_keys(&[&account_key], &secondary_indexes); assert!(secondary_index.index.is_empty()); assert!(secondary_index.reverse_index.is_empty()); } #[test] fn test_dashmap_secondary_index() { - let (key_start, key_end, account_index) = create_dashmap_secondary_index_state(); + let (key_start, key_end, secondary_indexes) = create_dashmap_secondary_index_state(); let index = AccountsIndex::::default(); run_test_secondary_indexes( &index, &index.spl_token_mint_index, key_start, key_end, - &account_index, + &secondary_indexes, ); } #[test] fn test_rwlock_secondary_index() { - let (key_start, key_end, account_index) = create_rwlock_secondary_index_state(); + let (key_start, key_end, secondary_indexes) = create_rwlock_secondary_index_state(); let index = AccountsIndex::::default(); run_test_secondary_indexes( &index, &index.spl_token_owner_index, key_start, key_end, - &account_index, + &secondary_indexes, ); } @@ -3150,7 +3053,7 @@ pub mod tests { secondary_index: &SecondaryIndex, index_key_start: usize, index_key_end: usize, - account_index: &AccountSecondaryIndexes, + secondary_indexes: &AccountSecondaryIndexes, ) { let account_key = Pubkey::new_unique(); let secondary_key1 = Pubkey::new_unique(); @@ -3169,61 +3072,66 @@ pub mod tests { &account_key, &inline_spl_token_v2_0::id(), &account_data1, - account_index, + secondary_indexes, true, &mut vec![], ); - // Now write a different mint index + // Now write a different mint index for the same account index.upsert( slot, &account_key, &inline_spl_token_v2_0::id(), &account_data2, - account_index, + secondary_indexes, true, &mut vec![], ); - // Check correctness - check_secondary_index_unique(&secondary_index, slot, &secondary_key2, &account_key); - assert!(secondary_index.get(&secondary_key1).is_empty()); - assert_eq!(secondary_index.get(&secondary_key2), vec![account_key]); + // Both pubkeys will now be present in the index + check_secondary_index_mapping_correct( + &secondary_index, + &[secondary_key1, secondary_key2], + &account_key, + ); - // If another fork reintroduces secondary_key1, then it should be re-added to the - // index - let fork = slot + 1; + // If a later slot also introduces secondary_key1, then it should still exist in the index + let later_slot = slot + 1; index.upsert( - fork, + later_slot, &account_key, &inline_spl_token_v2_0::id(), &account_data1, - account_index, + secondary_indexes, true, &mut vec![], ); assert_eq!(secondary_index.get(&secondary_key1), vec![account_key]); - // If we set a root at fork, and clean, then the secondary_key1 should no longer - // be findable - index.add_root(fork, false); + // If we set a root at `later_slot`, and clean, then even though the account with secondary_key1 + // was outdated by the update in the later slot, the primary account key is still alive, + // so both secondary keys will still be kept alive. + index.add_root(later_slot, false); index .get_account_write_entry(&account_key) .unwrap() .slot_list_mut(|slot_list| { - index.purge_older_root_entries( - &account_key, - slot_list, - &mut vec![], - None, - account_index, - ) + index.purge_older_root_entries(slot_list, &mut vec![], None) }); - assert!(secondary_index.get(&secondary_key2).is_empty()); - assert_eq!(secondary_index.get(&secondary_key1), vec![account_key]); - // Check correctness - check_secondary_index_unique(secondary_index, fork, &secondary_key1, &account_key); + check_secondary_index_mapping_correct( + secondary_index, + &[secondary_key1, secondary_key2], + &account_key, + ); + + // Removing the remaining entry for this pubkey in the index should mark the + // pubkey as dead and finally remove all the secondary indexes + let mut reclaims = vec![]; + index.purge_exact(&account_key, &later_slot, &mut reclaims); + index.handle_dead_keys(&[&account_key], secondary_indexes); + assert!(secondary_index.index.is_empty()); + assert!(secondary_index.reverse_index.is_empty()); } #[test] diff --git a/runtime/src/secondary_index.rs b/runtime/src/secondary_index.rs index ff50190582..c6d45961b4 100644 --- a/runtime/src/secondary_index.rs +++ b/runtime/src/secondary_index.rs @@ -1,157 +1,121 @@ -use crate::contains::Contains; use dashmap::{mapref::entry::Entry::Occupied, DashMap}; -use log::*; -use solana_sdk::{clock::Slot, pubkey::Pubkey}; +use solana_sdk::pubkey::Pubkey; use std::{ - borrow::Borrow, - collections::{hash_map, HashMap, HashSet}, + collections::HashSet, fmt::Debug, - sync::{Arc, RwLock}, + sync::{ + atomic::{AtomicU64, Ordering}, + RwLock, + }, }; -pub type SecondaryReverseIndexEntry = RwLock>; +// The only cases where an inner key should map to a different outer key is +// if the key had different account data for the indexed key across different +// slots. As this is rare, it should be ok to use a Vec here over a HashSet, even +// though we are running some key existence checks. +pub type SecondaryReverseIndexEntry = RwLock>; pub trait SecondaryIndexEntry: Debug { - fn get_or_create(&self, key: &Pubkey, f: &dyn Fn(&RwLock>)); - fn get(&self, key: &Pubkey, f: &dyn Fn(Option<&RwLock>>) -> T) -> T; - fn remove_key_if_empty(&self, key: &Pubkey); + fn insert_if_not_exists(&self, key: &Pubkey, inner_keys_count: &AtomicU64); + // Removes a value from the set. Returns whether the value was present in the set. + fn remove_inner_key(&self, key: &Pubkey) -> bool; fn is_empty(&self) -> bool; fn keys(&self) -> Vec; fn len(&self) -> usize; } +#[derive(Debug, Default)] +pub struct SecondaryIndexStats { + last_report: AtomicU64, + num_inner_keys: AtomicU64, +} + #[derive(Debug, Default)] pub struct DashMapSecondaryIndexEntry { - pubkey_to_slot_set: DashMap>>, + account_keys: DashMap, } impl SecondaryIndexEntry for DashMapSecondaryIndexEntry { - fn get_or_create(&self, key: &Pubkey, f: &dyn Fn(&RwLock>)) { - let slot_set = self.pubkey_to_slot_set.get(key).unwrap_or_else(|| { - self.pubkey_to_slot_set - .entry(*key) - .or_insert(RwLock::new(HashSet::new())) - .downgrade() - }); - - f(&slot_set) - } - - fn get(&self, key: &Pubkey, f: &dyn Fn(Option<&RwLock>>) -> T) -> T { - let slot_set = self.pubkey_to_slot_set.get(key); - - f(slot_set.as_ref().map(|entry_ref| entry_ref.value())) - } - - fn remove_key_if_empty(&self, key: &Pubkey) { - if let Occupied(key_entry) = self.pubkey_to_slot_set.entry(*key) { - // Delete the `key` if the slot set is empty - let slot_set = key_entry.get(); - - // Write lock on `key_entry` above through the `entry` - // means nobody else has access to this lock at this time, - // so this check for empty -> remove() is atomic - if slot_set.read().unwrap().is_empty() { - key_entry.remove(); - } + fn insert_if_not_exists(&self, key: &Pubkey, inner_keys_count: &AtomicU64) { + if self.account_keys.get(key).is_none() { + self.account_keys.entry(*key).or_insert_with(|| { + inner_keys_count.fetch_add(1, Ordering::Relaxed); + }); } } + fn remove_inner_key(&self, key: &Pubkey) -> bool { + self.account_keys.remove(key).is_some() + } + fn is_empty(&self) -> bool { - self.pubkey_to_slot_set.is_empty() + self.account_keys.is_empty() } fn keys(&self) -> Vec { - self.pubkey_to_slot_set + self.account_keys .iter() .map(|entry_ref| *entry_ref.key()) .collect() } fn len(&self) -> usize { - self.pubkey_to_slot_set.len() + self.account_keys.len() } } #[derive(Debug, Default)] pub struct RwLockSecondaryIndexEntry { - pubkey_to_slot_set: RwLock>>>>, + account_keys: RwLock>, } impl SecondaryIndexEntry for RwLockSecondaryIndexEntry { - fn get_or_create(&self, key: &Pubkey, f: &dyn Fn(&RwLock>)) { - let slot_set = self.pubkey_to_slot_set.read().unwrap().get(key).cloned(); - - let slot_set = { - if let Some(slot_set) = slot_set { - slot_set - } else { - self.pubkey_to_slot_set - .write() - .unwrap() - .entry(*key) - .or_insert_with(|| Arc::new(RwLock::new(HashSet::new()))) - .clone() - } + fn insert_if_not_exists(&self, key: &Pubkey, inner_keys_count: &AtomicU64) { + let exists = self.account_keys.read().unwrap().contains(key); + if !exists { + let mut w_account_keys = self.account_keys.write().unwrap(); + w_account_keys.insert(*key); + inner_keys_count.fetch_add(1, Ordering::Relaxed); }; - - f(&slot_set) } - fn get(&self, key: &Pubkey, f: &dyn Fn(Option<&RwLock>>) -> T) -> T { - let slot_set = self.pubkey_to_slot_set.read().unwrap().get(key).cloned(); - f(slot_set.as_deref()) - } - - fn remove_key_if_empty(&self, key: &Pubkey) { - if let hash_map::Entry::Occupied(key_entry) = - self.pubkey_to_slot_set.write().unwrap().entry(*key) - { - // Delete the `key` if the slot set is empty - let slot_set = key_entry.get(); - - // Write lock on `key_entry` above through the `entry` - // means nobody else has access to this lock at this time, - // so this check for empty -> remove() is atomic - if slot_set.read().unwrap().is_empty() { - key_entry.remove(); - } - } + fn remove_inner_key(&self, key: &Pubkey) -> bool { + self.account_keys.write().unwrap().remove(key) } fn is_empty(&self) -> bool { - self.pubkey_to_slot_set.read().unwrap().is_empty() + self.account_keys.read().unwrap().is_empty() } fn keys(&self) -> Vec { - self.pubkey_to_slot_set - .read() - .unwrap() - .keys() - .cloned() - .collect() + self.account_keys.read().unwrap().iter().cloned().collect() } fn len(&self) -> usize { - self.pubkey_to_slot_set.read().unwrap().len() + self.account_keys.read().unwrap().len() } } #[derive(Debug, Default)] pub struct SecondaryIndex { + metrics_name: &'static str, // Map from index keys to index values pub index: DashMap, - // Map from index values back to index keys, used for cleanup. - // Alternative is to store Option in each AccountInfo in the - // AccountsIndex if something is an SPL account with a mint, but then - // every AccountInfo would have to allocate `Option` pub reverse_index: DashMap, + stats: SecondaryIndexStats, } impl SecondaryIndex { - pub fn insert(&self, key: &Pubkey, inner_key: &Pubkey, slot: Slot) { + pub fn new(metrics_name: &'static str) -> Self { + Self { + metrics_name, + ..Self::default() + } + } + + pub fn insert(&self, key: &Pubkey, inner_key: &Pubkey) { { let pubkeys_map = self.index.get(key).unwrap_or_else(|| { self.index @@ -160,92 +124,70 @@ impl .downgrade() }); - pubkeys_map.get_or_create(inner_key, &|slots_set: &RwLock>| { - let contains_key = slots_set.read().unwrap().contains(&slot); - if !contains_key { - slots_set.write().unwrap().insert(slot); - } - }); + pubkeys_map.insert_if_not_exists(inner_key, &self.stats.num_inner_keys); } - let prev_key = { - let slots_map = self.reverse_index.get(inner_key).unwrap_or_else(|| { - self.reverse_index - .entry(*inner_key) - .or_insert(RwLock::new(HashMap::new())) - .downgrade() - }); - let should_insert = { - // Most of the time, key should already exist and match - // the one in the update - if let Some(existing_key) = slots_map.read().unwrap().get(&slot) { - existing_key != key - } else { - // If there is no key yet, then insert - true - } - }; - if should_insert { - slots_map.write().unwrap().insert(slot, *key) - } else { - None - } - }; + let outer_keys = self.reverse_index.get(inner_key).unwrap_or_else(|| { + self.reverse_index + .entry(*inner_key) + .or_insert(RwLock::new(Vec::with_capacity(1))) + .downgrade() + }); - if let Some(prev_key) = prev_key { - // If the inner key was moved to a different primary key, remove - // the previous index entry. - - // Check is necessary because another thread's writes could feasibly be - // interleaved between `should_insert = { ... slots_map.get(...) ... }` and - // `prev_key = { ... slots_map.insert(...) ... }` - // Currently this isn't possible due to current AccountsIndex's (pubkey, slot)-per-thread - // exclusive-locking, but check is here for future-proofing a more relaxed implementation - if prev_key != *key { - self.remove_index_entries(&prev_key, inner_key, &[slot]); + let should_insert = !outer_keys.read().unwrap().contains(&key); + if should_insert { + let mut w_outer_keys = outer_keys.write().unwrap(); + if !w_outer_keys.contains(&key) { + w_outer_keys.push(*key); } } + + let now = solana_sdk::timing::timestamp(); + let last = self.stats.last_report.load(Ordering::Relaxed); + let should_report = now.saturating_sub(last) > 1000 + && self.stats.last_report.compare_exchange( + last, + now, + Ordering::Relaxed, + Ordering::Relaxed, + ) == Ok(last); + + if should_report { + datapoint_info!( + self.metrics_name, + ("num_secondary_keys", self.index.len() as i64, i64), + ( + "num_inner_keys", + self.stats.num_inner_keys.load(Ordering::Relaxed) as i64, + i64 + ), + ( + "num_reverse_index_keys", + self.reverse_index.len() as i64, + i64 + ), + ); + } } - pub fn remove_index_entries(&self, key: &Pubkey, inner_key: &Pubkey, slots: &[Slot]) { - let is_key_empty = if let Some(inner_key_map) = self.index.get(&key) { - // Delete the slot from the slot set - let is_inner_key_empty = - inner_key_map.get(&inner_key, &|slot_set: Option<&RwLock>>| { - if let Some(slot_set) = slot_set { - let mut w_slot_set = slot_set.write().unwrap(); - for slot in slots.iter() { - let is_present = w_slot_set.remove(slot); - if !is_present { - warn!("Reverse index is missing previous entry for key {}, inner_key: {}, slot: {}", - key, inner_key, slot); - } - } - w_slot_set.is_empty() - } else { - false - } - }); - - // Check if `key` is empty - if is_inner_key_empty { - // Write lock on `inner_key_entry` above through the `entry` - // means nobody else has access to this lock at this time, - // so this check for empty -> remove() is atomic - inner_key_map.remove_key_if_empty(inner_key); - inner_key_map.is_empty() - } else { - false - } - } else { - false + // Only safe to call from `remove_by_inner_key()` due to asserts + fn remove_index_entries(&self, outer_key: &Pubkey, removed_inner_key: &Pubkey) { + let is_outer_key_empty = { + let inner_key_map = self + .index + .get_mut(&outer_key) + .expect("If we're removing a key, then it must have an entry in the map"); + // If we deleted a pubkey from the reverse_index, then the corresponding entry + // better exist in this index as well or the two indexes are out of sync! + assert!(inner_key_map.value().remove_inner_key(&removed_inner_key)); + inner_key_map.is_empty() }; // Delete the `key` if the set of inner keys is empty - if is_key_empty { + if is_outer_key_empty { // Other threads may have interleaved writes to this `key`, // so double-check again for its emptiness - if let Occupied(key_entry) = self.index.entry(*key) { + if let Occupied(key_entry) = self.index.entry(*outer_key) { if key_entry.get().is_empty() { key_entry.remove(); } @@ -253,70 +195,31 @@ impl } } - // Specifying `slots_to_remove` == Some will only remove keys for those specific slots - // found for the `inner_key` in the reverse index. Otherwise, passing `None` - // will remove all keys that are found for the `inner_key` in the reverse index. - - // Note passing `None` is dangerous unless you're sure there's no other competing threads - // writing updates to the index for this Pubkey at the same time! - pub fn remove_by_inner_key<'a, C>(&'a self, inner_key: &Pubkey, slots_to_remove: Option<&'a C>) - where - C: Contains<'a, Slot>, - { + pub fn remove_by_inner_key(&self, inner_key: &Pubkey) { // Save off which keys in `self.index` had slots removed so we can remove them // after we purge the reverse index - let mut key_to_removed_slots: HashMap> = HashMap::new(); + let mut removed_outer_keys: HashSet = HashSet::new(); // Check if the entry for `inner_key` in the reverse index is empty // and can be removed - let needs_remove = { - if let Some(slots_to_remove) = slots_to_remove { - self.reverse_index - .get(inner_key) - .map(|slots_map| { - // Ideally we use a concurrent map here as well to prevent clean - // from blocking writes, but memory usage of DashMap is high - let mut w_slots_map = slots_map.value().write().unwrap(); - for slot in slots_to_remove.contains_iter() { - if let Some(removed_key) = w_slots_map.remove(slot.borrow()) { - key_to_removed_slots - .entry(removed_key) - .or_default() - .push(*slot.borrow()); - } - } - w_slots_map.is_empty() - }) - .unwrap_or(false) - } else { - if let Some((_, removed_slot_map)) = self.reverse_index.remove(inner_key) { - for (slot, removed_key) in removed_slot_map.into_inner().unwrap().into_iter() { - key_to_removed_slots - .entry(removed_key) - .or_default() - .push(slot); - } - } - // We just removed the key, no need to remove it again - false - } - }; - - if needs_remove { - // Other threads may have interleaved writes to this `inner_key`, between - // releasing the `self.reverse_index.get(inner_key)` lock and now, - // so double-check again for emptiness - if let Occupied(slot_map) = self.reverse_index.entry(*inner_key) { - if slot_map.get().read().unwrap().is_empty() { - slot_map.remove(); - } + if let Some((_, outer_keys_set)) = self.reverse_index.remove(inner_key) { + for removed_outer_key in outer_keys_set.into_inner().unwrap().into_iter() { + removed_outer_keys.insert(removed_outer_key); } } // Remove this value from those keys - for (key, slots) in key_to_removed_slots { - self.remove_index_entries(&key, inner_key, &slots); + for outer_key in &removed_outer_keys { + self.remove_index_entries(outer_key, inner_key); } + + // Safe to `fetch_sub()` here because a dead key cannot be removed more than once, + // and the `num_inner_keys` must have been incremented by exactly removed_outer_keys.len() + // in previous unique insertions of `inner_key` into `self.index` for each key + // in `removed_outer_keys` + self.stats + .num_inner_keys + .fetch_sub(removed_outer_keys.len() as u64, Ordering::Relaxed); } pub fn get(&self, key: &Pubkey) -> Vec {