From 6dfad0652fcd32456160740af429b74a4ea32d7b Mon Sep 17 00:00:00 2001 From: carllin Date: Mon, 11 Jan 2021 17:00:23 -0800 Subject: [PATCH] Cache account stores, flush from AccountsBackgroundService (#13140) --- accounts-bench/src/main.rs | 11 +- core/src/tvu.rs | 2 + core/src/validator.rs | 4 + core/tests/snapshots.rs | 4 +- ledger-tool/src/main.rs | 1 + ledger/src/bank_forks_utils.rs | 1 + ledger/src/blockstore_processor.rs | 5 + run.sh | 1 + runtime/benches/accounts.rs | 43 +- runtime/benches/accounts_index.rs | 2 +- runtime/benches/append_vec.rs | 1 + runtime/src/accounts.rs | 119 +- runtime/src/accounts_background_service.rs | 64 +- runtime/src/accounts_cache.rs | 254 +++ runtime/src/accounts_db.rs | 2255 +++++++++++++++---- runtime/src/accounts_index.rs | 160 +- runtime/src/append_vec.rs | 47 +- runtime/src/bank.rs | 416 ++-- runtime/src/lib.rs | 1 + runtime/src/serde_snapshot.rs | 19 +- runtime/src/serde_snapshot/tests.rs | 7 +- runtime/src/snapshot_utils.rs | 6 + runtime/src/system_instruction_processor.rs | 2 + runtime/tests/accounts.rs | 6 +- validator/src/main.rs | 6 + 25 files changed, 2604 insertions(+), 833 deletions(-) create mode 100644 runtime/src/accounts_cache.rs diff --git a/accounts-bench/src/main.rs b/accounts-bench/src/main.rs index 6bfbea7a30..83f1669bf9 100644 --- a/accounts-bench/src/main.rs +++ b/accounts-bench/src/main.rs @@ -2,13 +2,11 @@ use clap::{crate_description, crate_name, value_t, App, Arg}; use rayon::prelude::*; use solana_measure::measure::Measure; use solana_runtime::{ - accounts::{create_test_accounts, update_accounts, Accounts}, + accounts::{create_test_accounts, update_accounts_bench, Accounts}, accounts_index::Ancestors, }; use solana_sdk::{genesis_config::ClusterType, pubkey::Pubkey}; -use std::env; -use std::fs; -use std::path::PathBuf; +use std::{collections::HashSet, env, fs, path::PathBuf}; fn main() { solana_logger::setup(); @@ -56,7 +54,8 @@ fn main() { if fs::remove_dir_all(path.clone()).is_err() { println!("Warning: Couldn't remove {:?}", path); } - let accounts = Accounts::new(vec![path], &ClusterType::Testnet); + let accounts = + Accounts::new_with_config(vec![path], &ClusterType::Testnet, HashSet::new(), false); println!("Creating {} accounts", num_accounts); let mut create_time = Measure::start("create accounts"); let pubkeys: Vec<_> = (0..num_slots) @@ -92,7 +91,7 @@ fn main() { time.stop(); println!("{}", time); for slot in 0..num_slots { - update_accounts(&accounts, &pubkeys, ((x + 1) * num_slots + slot) as u64); + update_accounts_bench(&accounts, &pubkeys, ((x + 1) * num_slots + slot) as u64); accounts.add_root((x * num_slots + slot) as u64); } } else { diff --git a/core/src/tvu.rs b/core/src/tvu.rs index 3c827ec418..55a5dcd244 100644 --- a/core/src/tvu.rs +++ b/core/src/tvu.rs @@ -78,6 +78,7 @@ pub struct TvuConfig { pub trusted_validators: Option>, pub repair_validators: Option>, pub accounts_hash_fault_injection_slots: u64, + pub accounts_db_caching_enabled: bool, } impl Tvu { @@ -272,6 +273,7 @@ impl Tvu { bank_forks.clone(), &exit, accounts_background_request_handler, + tvu_config.accounts_db_caching_enabled, ); Tvu { diff --git a/core/src/validator.rs b/core/src/validator.rs index 0364b8ac62..df92965295 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -119,6 +119,7 @@ pub struct ValidatorConfig { pub no_poh_speed_test: bool, pub poh_pinned_cpu_core: usize, pub account_indexes: HashSet, + pub accounts_db_caching_enabled: bool, } impl Default for ValidatorConfig { @@ -164,6 +165,7 @@ impl Default for ValidatorConfig { no_poh_speed_test: true, poh_pinned_cpu_core: poh_service::DEFAULT_PINNED_CPU_CORE, account_indexes: HashSet::new(), + accounts_db_caching_enabled: false, } } } @@ -629,6 +631,7 @@ impl Validator { trusted_validators: config.trusted_validators.clone(), repair_validators: config.repair_validators.clone(), accounts_hash_fault_injection_slots: config.accounts_hash_fault_injection_slots, + accounts_db_caching_enabled: config.accounts_db_caching_enabled, }, ); @@ -960,6 +963,7 @@ fn new_banks_from_ledger( frozen_accounts: config.frozen_accounts.clone(), debug_keys: config.debug_keys.clone(), account_indexes: config.account_indexes.clone(), + accounts_db_caching_enabled: config.accounts_db_caching_enabled, ..blockstore_processor::ProcessOptions::default() }; diff --git a/core/tests/snapshots.rs b/core/tests/snapshots.rs index 0d1cab1d16..1f36ecf27e 100644 --- a/core/tests/snapshots.rs +++ b/core/tests/snapshots.rs @@ -105,6 +105,7 @@ mod tests { None, None, HashSet::new(), + false, ); bank0.freeze(); let mut bank_forks = BankForks::new(bank0); @@ -161,6 +162,7 @@ mod tests { None, None, HashSet::new(), + false, ) .unwrap(); @@ -216,7 +218,7 @@ mod tests { if slot % set_root_interval == 0 || slot == last_slot - 1 { // set_root should send a snapshot request bank_forks.set_root(bank.slot(), &request_sender, None); - snapshot_request_handler.handle_snapshot_requests(); + snapshot_request_handler.handle_snapshot_requests(false); } } diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index bce6715106..659482a17c 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -1926,6 +1926,7 @@ fn main() { ); assert!(bank.is_complete()); bank.squash(); + bank.force_flush_accounts_cache(); bank.clean_accounts(true); bank.update_accounts_hash(); if rehash { diff --git a/ledger/src/bank_forks_utils.rs b/ledger/src/bank_forks_utils.rs index 74bc4899cc..de2d6f24c2 100644 --- a/ledger/src/bank_forks_utils.rs +++ b/ledger/src/bank_forks_utils.rs @@ -69,6 +69,7 @@ pub fn load( process_options.debug_keys.clone(), Some(&crate::builtins::get(process_options.bpf_jit)), process_options.account_indexes.clone(), + process_options.accounts_db_caching_enabled, ) .expect("Load from snapshot failed"); if let Some(shrink_paths) = shrink_paths { diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs index 557bf1c139..59eb5e9bb7 100644 --- a/ledger/src/blockstore_processor.rs +++ b/ledger/src/blockstore_processor.rs @@ -346,6 +346,7 @@ pub struct ProcessOptions { pub frozen_accounts: Vec, pub debug_keys: Option>>, pub account_indexes: HashSet, + pub accounts_db_caching_enabled: bool, } pub fn process_blockstore( @@ -371,6 +372,7 @@ pub fn process_blockstore( opts.debug_keys.clone(), Some(&crate::builtins::get(opts.bpf_jit)), opts.account_indexes.clone(), + opts.accounts_db_caching_enabled, ); let bank0 = Arc::new(bank0); info!("processing ledger for slot 0..."); @@ -929,6 +931,8 @@ fn load_frozen_forks( new_root_bank.squash(); if last_free.elapsed() > Duration::from_secs(10) { + // Must be called after `squash()`, so that AccountsDb knows what + // the roots are for the cache flushing in exhaustively_free_unused_resource(). // This could take few secs; so update last_free later new_root_bank.exhaustively_free_unused_resource(); last_free = Instant::now(); @@ -2901,6 +2905,7 @@ pub mod tests { None, None, HashSet::new(), + false, ); *bank.epoch_schedule() } diff --git a/run.sh b/run.sh index 7833a84565..613a1f4f71 100755 --- a/run.sh +++ b/run.sh @@ -105,6 +105,7 @@ args=( --init-complete-file "$dataDir"/init-completed --snapshot-compression none --require-tower + --accounts-db-caching-enabled ) # shellcheck disable=SC2086 solana-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS & diff --git a/runtime/benches/accounts.rs b/runtime/benches/accounts.rs index 0c95a46304..217d2f669e 100644 --- a/runtime/benches/accounts.rs +++ b/runtime/benches/accounts.rs @@ -50,6 +50,7 @@ fn test_accounts_create(bencher: &mut Bencher) { None, None, HashSet::new(), + false, ); bencher.iter(|| { let mut pubkeys: Vec = vec![]; @@ -61,35 +62,39 @@ fn test_accounts_create(bencher: &mut Bencher) { fn test_accounts_squash(bencher: &mut Bencher) { let (mut genesis_config, _) = create_genesis_config(100_000); genesis_config.rent.burn_percent = 100; // Avoid triggering an assert in Bank::distribute_rent_to_validators() - let bank1 = Arc::new(Bank::new_with_paths( + let mut prev_bank = Arc::new(Bank::new_with_paths( &genesis_config, vec![PathBuf::from("bench_a1")], &[], None, None, HashSet::new(), + false, )); let mut pubkeys: Vec = vec![]; - deposit_many(&bank1, &mut pubkeys, 250_000); - bank1.freeze(); + deposit_many(&prev_bank, &mut pubkeys, 250_000); + prev_bank.freeze(); // Measures the performance of the squash operation. // This mainly consists of the freeze operation which calculates the // merkle hash of the account state and distribution of fees and rent let mut slot = 1u64; bencher.iter(|| { - let bank2 = Arc::new(Bank::new_from_parent(&bank1, &Pubkey::default(), slot)); - bank2.deposit(&pubkeys[0], 1); - bank2.squash(); + let next_bank = Arc::new(Bank::new_from_parent(&prev_bank, &Pubkey::default(), slot)); + next_bank.deposit(&pubkeys[0], 1); + next_bank.squash(); slot += 1; + prev_bank = next_bank; }); } #[bench] fn test_accounts_hash_bank_hash(bencher: &mut Bencher) { - let accounts = Accounts::new( + let accounts = Accounts::new_with_config( vec![PathBuf::from("bench_accounts_hash_internal")], &ClusterType::Development, + HashSet::new(), + false, ); let mut pubkeys: Vec = vec![]; let num_accounts = 60_000; @@ -107,9 +112,11 @@ fn test_accounts_hash_bank_hash(bencher: &mut Bencher) { #[bench] fn test_update_accounts_hash(bencher: &mut Bencher) { solana_logger::setup(); - let accounts = Accounts::new( + let accounts = Accounts::new_with_config( vec![PathBuf::from("update_accounts_hash")], &ClusterType::Development, + HashSet::new(), + false, ); let mut pubkeys: Vec = vec![]; create_test_accounts(&accounts, &mut pubkeys, 50_000, 0); @@ -124,9 +131,11 @@ fn test_update_accounts_hash(bencher: &mut Bencher) { #[bench] fn test_accounts_delta_hash(bencher: &mut Bencher) { solana_logger::setup(); - let accounts = Accounts::new( + let accounts = Accounts::new_with_config( vec![PathBuf::from("accounts_delta_hash")], &ClusterType::Development, + HashSet::new(), + false, ); let mut pubkeys: Vec = vec![]; create_test_accounts(&accounts, &mut pubkeys, 100_000, 0); @@ -138,17 +147,19 @@ fn test_accounts_delta_hash(bencher: &mut Bencher) { #[bench] fn bench_delete_dependencies(bencher: &mut Bencher) { solana_logger::setup(); - let accounts = Accounts::new( + let accounts = Accounts::new_with_config( vec![PathBuf::from("accounts_delete_deps")], &ClusterType::Development, + HashSet::new(), + false, ); let mut old_pubkey = Pubkey::default(); let zero_account = Account::new(0, 0, &Account::default().owner); for i in 0..1000 { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new((i + 1) as u64, 0, &Account::default().owner); - accounts.store_slow(i, &pubkey, &account); - accounts.store_slow(i, &old_pubkey, &zero_account); + accounts.store_slow_uncached(i, &pubkey, &account); + accounts.store_slow_uncached(i, &old_pubkey, &zero_account); old_pubkey = pubkey; accounts.add_root(i); } @@ -165,12 +176,14 @@ fn store_accounts_with_possible_contention( F: Fn(&Accounts, &[Pubkey]) + Send + Copy, { let num_readers = 5; - let accounts = Arc::new(Accounts::new( + let accounts = Arc::new(Accounts::new_with_config( vec![ PathBuf::from(std::env::var("FARF_DIR").unwrap_or_else(|_| "farf".to_string())) .join(bench_name), ], &ClusterType::Development, + HashSet::new(), + false, )); let num_keys = 1000; let slot = 0; @@ -180,7 +193,7 @@ fn store_accounts_with_possible_contention( .map(|_| { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, 0, &Account::default().owner); - accounts.store_slow(slot, &pubkey, &account); + accounts.store_slow_uncached(slot, &pubkey, &account); pubkey }) .collect(), @@ -206,7 +219,7 @@ fn store_accounts_with_possible_contention( // Write to a different slot than the one being read from. Because // there's a new account pubkey being written to every time, will // compete for the accounts index lock on every store - accounts.store_slow(slot + 1, &solana_sdk::pubkey::new_rand(), &account); + accounts.store_slow_uncached(slot + 1, &solana_sdk::pubkey::new_rand(), &account); } }) } diff --git a/runtime/benches/accounts_index.rs b/runtime/benches/accounts_index.rs index feb35e1bdb..7e8b4840a6 100644 --- a/runtime/benches/accounts_index.rs +++ b/runtime/benches/accounts_index.rs @@ -47,7 +47,7 @@ fn bench_accounts_index(bencher: &mut Bencher) { ); reclaims.clear(); } - index.add_root(root); + index.add_root(root, false); root += 1; fork += 1; }); diff --git a/runtime/benches/append_vec.rs b/runtime/benches/append_vec.rs index 5eba3a827c..1891f96fbf 100644 --- a/runtime/benches/append_vec.rs +++ b/runtime/benches/append_vec.rs @@ -48,6 +48,7 @@ fn append_vec_sequential_read(bencher: &mut Bencher) { let mut indexes = add_test_accounts(&vec, size); bencher.iter(|| { let (sample, pos) = indexes.pop().unwrap(); + println!("reading pos {} {}", sample, pos); let (account, _next) = vec.get_account(pos).unwrap(); let (_meta, test) = create_test_account(sample); assert_eq!(account.data, test.data.as_slice()); diff --git a/runtime/src/accounts.rs b/runtime/src/accounts.rs index 8c8cd6f292..4f1a2a1828 100644 --- a/runtime/src/accounts.rs +++ b/runtime/src/accounts.rs @@ -1,7 +1,6 @@ use crate::{ - accounts_db::{AccountsDB, AppendVecId, BankHashInfo, ErrorCounters}, + accounts_db::{AccountsDB, AppendVecId, BankHashInfo, ErrorCounters, LoadedAccount}, accounts_index::{AccountIndex, Ancestors, IndexKey}, - append_vec::StoredAccount, bank::{ NonceRollbackFull, NonceRollbackInfo, TransactionCheckResult, TransactionExecutionResult, }, @@ -82,19 +81,21 @@ pub enum AccountAddressFilter { impl Accounts { pub fn new(paths: Vec, cluster_type: &ClusterType) -> Self { - Self::new_with_indexes(paths, cluster_type, HashSet::new()) + Self::new_with_config(paths, cluster_type, HashSet::new(), false) } - pub fn new_with_indexes( + pub fn new_with_config( paths: Vec, cluster_type: &ClusterType, account_indexes: HashSet, + caching_enabled: bool, ) -> Self { Self { - accounts_db: Arc::new(AccountsDB::new_with_indexes( + accounts_db: Arc::new(AccountsDB::new_with_config( paths, cluster_type, account_indexes, + caching_enabled, )), account_locks: Mutex::new(HashSet::new()), readonly_locks: Arc::new(RwLock::new(Some(HashMap::new()))), @@ -447,24 +448,20 @@ impl Accounts { } /// scans underlying accounts_db for this delta (slot) with a map function - /// from StoredAccount to B + /// from LoadedAccount to B /// returns only the latest/current version of B for this slot - fn scan_slot(&self, slot: Slot, func: F) -> Vec + pub fn scan_slot(&self, slot: Slot, func: F) -> Vec where - F: Fn(&StoredAccount) -> Option + Send + Sync, + F: Fn(LoadedAccount) -> Option + Send + Sync, B: Send + Default, { let accumulator: Vec> = self.accounts_db.scan_account_storage( slot, - |stored_account: &StoredAccount, - _id: AppendVecId, - accum: &mut Vec<(Pubkey, u64, B)>| { - if let Some(val) = func(&stored_account) { - accum.push(( - stored_account.meta.pubkey, - std::u64::MAX - stored_account.meta.write_version, - val, - )); + |loaded_account: LoadedAccount, _id: AppendVecId, accum: &mut Vec<(Pubkey, u64, B)>| { + let pubkey = *loaded_account.pubkey(); + let write_version = loaded_account.write_version(); + if let Some(val) = func(loaded_account) { + accum.push((pubkey, std::u64::MAX - write_version, val)); } }, ); @@ -488,11 +485,11 @@ impl Accounts { self.scan_slot(slot, |stored_account| { let hit = match program_id { None => true, - Some(program_id) => stored_account.account_meta.owner == *program_id, + Some(program_id) => stored_account.owner() == program_id, }; if hit { - Some((stored_account.meta.pubkey, stored_account.clone_account())) + Some((*stored_account.pubkey(), stored_account.account())) } else { None } @@ -675,9 +672,15 @@ impl Accounts { ) } - /// Slow because lock is held for 1 operation instead of many - pub fn store_slow(&self, slot: Slot, pubkey: &Pubkey, account: &Account) { - self.accounts_db.store(slot, &[(pubkey, account)]); + /// Slow because lock is held for 1 operation instead of many. + /// WARNING: This noncached version is only to be used for tests/benchmarking + /// as bypassing the cache in general is not supported + pub fn store_slow_uncached(&self, slot: Slot, pubkey: &Pubkey, account: &Account) { + self.accounts_db.store_uncached(slot, &[(pubkey, account)]); + } + + pub fn store_slow_cached(&self, slot: Slot, pubkey: &Pubkey, account: &Account) { + self.accounts_db.store_cached(slot, &[(pubkey, account)]); } fn is_locked_readonly(&self, key: &Pubkey) -> bool { @@ -846,7 +849,7 @@ impl Accounts { /// Store the accounts into the DB // allow(clippy) needed for various gating flags #[allow(clippy::too_many_arguments)] - pub fn store_accounts( + pub fn store_cached( &self, slot: Slot, txs: &[Transaction], @@ -868,7 +871,7 @@ impl Accounts { fix_recent_blockhashes_sysvar_delay, rent_fix_enabled, ); - self.accounts_db.store(slot, &accounts_to_store); + self.accounts_db.store_cached(slot, &accounts_to_store); } /// Purge a slot if it is not a root @@ -876,6 +879,7 @@ impl Accounts { pub fn purge_slot(&self, slot: Slot) { self.accounts_db.purge_slot(slot); } + /// Add a slot to root. Root slots cannot be purged pub fn add_root(&self, slot: Slot) { self.accounts_db.add_root(slot) @@ -1024,16 +1028,18 @@ pub fn create_test_accounts( for t in 0..num { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new((t + 1) as u64, 0, &Account::default().owner); - accounts.store_slow(slot, &pubkey, &account); + accounts.store_slow_uncached(slot, &pubkey, &account); pubkeys.push(pubkey); } } -pub fn update_accounts(accounts: &Accounts, pubkeys: &[Pubkey], slot: u64) { +// Only used by bench, not safe to call otherwise accounts can conflict with the +// accounts cache! +pub fn update_accounts_bench(accounts: &Accounts, pubkeys: &[Pubkey], slot: u64) { for pubkey in pubkeys { let amount = thread_rng().gen_range(0, 10); let account = Account::new(amount, 0, &Account::default().owner); - accounts.store_slow(slot, &pubkey, &account); + accounts.store_slow_uncached(slot, &pubkey, &account); } } @@ -1070,9 +1076,10 @@ mod tests { ) -> Vec { let mut hash_queue = BlockhashQueue::new(100); hash_queue.register_hash(&tx.message().recent_blockhash, &fee_calculator); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); for ka in ka.iter() { - accounts.store_slow(0, &ka.0, &ka.1); + accounts.store_slow_uncached(0, &ka.0, &ka.1); } let ancestors = vec![(0, 0)].into_iter().collect(); @@ -1619,18 +1626,19 @@ mod tests { #[test] fn test_load_by_program_slot() { - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); // Load accounts owned by various programs into AccountsDB let pubkey0 = solana_sdk::pubkey::new_rand(); let account0 = Account::new(1, 0, &Pubkey::new(&[2; 32])); - accounts.store_slow(0, &pubkey0, &account0); + accounts.store_slow_uncached(0, &pubkey0, &account0); let pubkey1 = solana_sdk::pubkey::new_rand(); let account1 = Account::new(1, 0, &Pubkey::new(&[2; 32])); - accounts.store_slow(0, &pubkey1, &account1); + accounts.store_slow_uncached(0, &pubkey1, &account1); let pubkey2 = solana_sdk::pubkey::new_rand(); let account2 = Account::new(1, 0, &Pubkey::new(&[3; 32])); - accounts.store_slow(0, &pubkey2, &account2); + accounts.store_slow_uncached(0, &pubkey2, &account2); let loaded = accounts.load_by_program_slot(0, Some(&Pubkey::new(&[2; 32]))); assert_eq!(loaded.len(), 2); @@ -1642,7 +1650,8 @@ mod tests { #[test] fn test_accounts_account_not_found() { - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); let mut error_counters = ErrorCounters::default(); let ancestors = vec![(0, 0)].into_iter().collect(); @@ -1660,7 +1669,8 @@ mod tests { #[test] #[should_panic] fn test_accounts_empty_bank_hash() { - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); accounts.bank_hash_at(1); } @@ -1676,11 +1686,12 @@ mod tests { let account2 = Account::new(3, 0, &Pubkey::default()); let account3 = Account::new(4, 0, &Pubkey::default()); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); - accounts.store_slow(0, &keypair0.pubkey(), &account0); - accounts.store_slow(0, &keypair1.pubkey(), &account1); - accounts.store_slow(0, &keypair2.pubkey(), &account2); - accounts.store_slow(0, &keypair3.pubkey(), &account3); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); + accounts.store_slow_uncached(0, &keypair0.pubkey(), &account0); + accounts.store_slow_uncached(0, &keypair1.pubkey(), &account1); + accounts.store_slow_uncached(0, &keypair2.pubkey(), &account2); + accounts.store_slow_uncached(0, &keypair3.pubkey(), &account3); let instructions = vec![CompiledInstruction::new(2, &(), vec![0, 1])]; let message = Message::new_with_compiled_instructions( @@ -1788,10 +1799,11 @@ mod tests { let account1 = Account::new(2, 0, &Pubkey::default()); let account2 = Account::new(3, 0, &Pubkey::default()); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); - accounts.store_slow(0, &keypair0.pubkey(), &account0); - accounts.store_slow(0, &keypair1.pubkey(), &account1); - accounts.store_slow(0, &keypair2.pubkey(), &account2); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); + accounts.store_slow_uncached(0, &keypair0.pubkey(), &account0); + accounts.store_slow_uncached(0, &keypair1.pubkey(), &account1); + accounts.store_slow_uncached(0, &keypair2.pubkey(), &account2); let accounts_arc = Arc::new(accounts); @@ -1917,7 +1929,8 @@ mod tests { let mut loaded = vec![loaded0, loaded1]; - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); { let mut readonly_locks = accounts.readonly_locks.write().unwrap(); let readonly_locks = readonly_locks.as_mut().unwrap(); @@ -1969,15 +1982,16 @@ mod tests { #[test] fn huge_clean() { solana_logger::setup(); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); let mut old_pubkey = Pubkey::default(); let zero_account = Account::new(0, 0, &Account::default().owner); info!("storing.."); for i in 0..2_000 { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new((i + 1) as u64, 0, &Account::default().owner); - accounts.store_slow(i, &pubkey, &account); - accounts.store_slow(i, &old_pubkey, &zero_account); + accounts.store_slow_uncached(i, &pubkey, &account); + accounts.store_slow_uncached(i, &old_pubkey, &zero_account); old_pubkey = pubkey; accounts.add_root(i); if i % 1_000 == 0 { @@ -2011,7 +2025,8 @@ mod tests { #[test] fn test_instructions() { solana_logger::setup(); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); let instructions_key = solana_sdk::sysvar::instructions::id(); let keypair = Keypair::new(); @@ -2291,7 +2306,8 @@ mod tests { let mut loaded = vec![loaded]; let next_blockhash = Hash::new_unique(); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); let collected_accounts = accounts.collect_accounts_to_store( &txs, None, @@ -2401,7 +2417,8 @@ mod tests { let mut loaded = vec![loaded]; let next_blockhash = Hash::new_unique(); - let accounts = Accounts::new(Vec::new(), &ClusterType::Development); + let accounts = + Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false); let collected_accounts = accounts.collect_accounts_to_store( &txs, None, diff --git a/runtime/src/accounts_background_service.rs b/runtime/src/accounts_background_service.rs index 384eba1b93..18386e2a89 100644 --- a/runtime/src/accounts_background_service.rs +++ b/runtime/src/accounts_background_service.rs @@ -77,7 +77,7 @@ pub struct SnapshotRequestHandler { impl SnapshotRequestHandler { // Returns the latest requested snapshot slot, if one exists - pub fn handle_snapshot_requests(&self) -> Option { + pub fn handle_snapshot_requests(&self, accounts_db_caching_enabled: bool) -> Option { self.snapshot_request_receiver .try_iter() .last() @@ -92,9 +92,19 @@ impl SnapshotRequestHandler { hash_time.stop(); let mut shrink_time = Measure::start("shrink_time"); - snapshot_root_bank.process_stale_slot_with_budget(0, SHRUNKEN_ACCOUNT_PER_INTERVAL); + if !accounts_db_caching_enabled { + snapshot_root_bank + .process_stale_slot_with_budget(0, SHRUNKEN_ACCOUNT_PER_INTERVAL); + } shrink_time.stop(); + let mut flush_accounts_cache_time = Measure::start("flush_accounts_cache_time"); + if accounts_db_caching_enabled { + // Force flush all the roots from the cache so that the snapshot can be taken. + snapshot_root_bank.force_flush_accounts_cache(); + } + flush_accounts_cache_time.stop(); + let mut clean_time = Measure::start("clean_time"); // Don't clean the slot we're snapshotting because it may have zero-lamport // accounts that were included in the bank delta hash when the bank was frozen, @@ -103,6 +113,12 @@ impl SnapshotRequestHandler { snapshot_root_bank.clean_accounts(true); clean_time.stop(); + if accounts_db_caching_enabled { + shrink_time = Measure::start("shrink_time"); + snapshot_root_bank.shrink_candidate_slots(); + shrink_time.stop(); + } + // Generate an accounts package let mut snapshot_time = Measure::start("snapshot_time"); let r = snapshot_utils::snapshot_bank( @@ -130,6 +146,12 @@ impl SnapshotRequestHandler { datapoint_info!( "handle_snapshot_requests-timing", + ("hash_time", hash_time.as_us(), i64), + ( + "flush_accounts_cache_time", + flush_accounts_cache_time.as_us(), + i64 + ), ("shrink_time", shrink_time.as_us(), i64), ("clean_time", clean_time.as_us(), i64), ("snapshot_time", snapshot_time.as_us(), i64), @@ -138,7 +160,6 @@ impl SnapshotRequestHandler { purge_old_snapshots_time.as_us(), i64 ), - ("hash_time", hash_time.as_us(), i64), ); snapshot_root_bank.block_height() }) @@ -180,11 +201,11 @@ pub struct ABSRequestHandler { impl ABSRequestHandler { // Returns the latest requested snapshot block height, if one exists - pub fn handle_snapshot_requests(&self) -> Option { + pub fn handle_snapshot_requests(&self, accounts_db_caching_enabled: bool) -> Option { self.snapshot_request_handler .as_ref() .and_then(|snapshot_request_handler| { - snapshot_request_handler.handle_snapshot_requests() + snapshot_request_handler.handle_snapshot_requests(accounts_db_caching_enabled) }) } @@ -208,6 +229,7 @@ impl AccountsBackgroundService { bank_forks: Arc>, exit: &Arc, request_handler: ABSRequestHandler, + accounts_db_caching_enabled: bool, ) -> Self { info!("AccountsBackgroundService active"); let exit = exit.clone(); @@ -250,26 +272,36 @@ impl AccountsBackgroundService { // request for `N` to the snapshot request channel before setting a root `R > N`, and // snapshot_request_handler.handle_requests() will always look for the latest // available snapshot in the channel. - let snapshot_block_height = request_handler.handle_snapshot_requests(); + let snapshot_block_height = + request_handler.handle_snapshot_requests(accounts_db_caching_enabled); + if accounts_db_caching_enabled { + bank.flush_accounts_cache_if_needed(); + } if let Some(snapshot_block_height) = snapshot_block_height { // Safe, see proof above assert!(last_cleaned_block_height <= snapshot_block_height); last_cleaned_block_height = snapshot_block_height; } else { - // under sustained writes, shrink can lag behind so cap to - // SHRUNKEN_ACCOUNT_PER_INTERVAL (which is based on INTERVAL_MS, - // which in turn roughly asscociated block time) - consumed_budget = bank - .process_stale_slot_with_budget( - consumed_budget, - SHRUNKEN_ACCOUNT_PER_INTERVAL, - ) - .min(SHRUNKEN_ACCOUNT_PER_INTERVAL); - + if accounts_db_caching_enabled { + bank.shrink_candidate_slots(); + } else { + // under sustained writes, shrink can lag behind so cap to + // SHRUNKEN_ACCOUNT_PER_INTERVAL (which is based on INTERVAL_MS, + // which in turn roughly asscociated block time) + consumed_budget = bank + .process_stale_slot_with_budget( + consumed_budget, + SHRUNKEN_ACCOUNT_PER_INTERVAL, + ) + .min(SHRUNKEN_ACCOUNT_PER_INTERVAL); + } if bank.block_height() - last_cleaned_block_height > (CLEAN_INTERVAL_BLOCKS + thread_rng().gen_range(0, 10)) { + if accounts_db_caching_enabled { + bank.force_flush_accounts_cache(); + } bank.clean_accounts(true); last_cleaned_block_height = bank.block_height(); } diff --git a/runtime/src/accounts_cache.rs b/runtime/src/accounts_cache.rs new file mode 100644 index 0000000000..daac6d8cb1 --- /dev/null +++ b/runtime/src/accounts_cache.rs @@ -0,0 +1,254 @@ +use dashmap::DashMap; +use solana_sdk::{account::Account, clock::Slot, hash::Hash, pubkey::Pubkey}; +use std::{ + collections::HashSet, + ops::Deref, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, RwLock, + }, +}; + +pub type SlotCache = Arc; + +#[derive(Default, Debug)] +pub struct SlotCacheInner { + cache: DashMap, + same_account_writes: AtomicU64, + same_account_writes_size: AtomicU64, + unique_account_writes_size: AtomicU64, + is_frozen: AtomicBool, +} + +impl SlotCacheInner { + pub fn report_slot_store_metrics(&self) { + datapoint_info!( + "slot_repeated_writes", + ( + "same_account_writes", + self.same_account_writes.load(Ordering::Relaxed), + i64 + ), + ( + "same_account_writes_size", + self.same_account_writes_size.load(Ordering::Relaxed), + i64 + ), + ( + "unique_account_writes_size", + self.unique_account_writes_size.load(Ordering::Relaxed), + i64 + ) + ); + } + + pub fn insert(&self, pubkey: &Pubkey, account: Account, hash: Hash) { + if self.cache.contains_key(pubkey) { + self.same_account_writes.fetch_add(1, Ordering::Relaxed); + self.same_account_writes_size + .fetch_add(account.data.len() as u64, Ordering::Relaxed); + } else { + self.unique_account_writes_size + .fetch_add(account.data.len() as u64, Ordering::Relaxed); + } + self.cache.insert(*pubkey, CachedAccount { account, hash }); + } + + pub fn get_cloned(&self, pubkey: &Pubkey) -> Option { + self.cache + .get(pubkey) + // 1) Maybe can eventually use a Cow to avoid a clone on every read + // 2) Popping is only safe if its guaranteed only replay/banking threads + // are reading from the AccountsDb + .map(|account_ref| account_ref.value().clone()) + } + + pub fn mark_slot_frozen(&self) { + self.is_frozen.store(true, Ordering::SeqCst); + } + + pub fn is_frozen(&self) -> bool { + self.is_frozen.load(Ordering::SeqCst) + } +} + +impl Deref for SlotCacheInner { + type Target = DashMap; + fn deref(&self) -> &Self::Target { + &self.cache + } +} + +#[derive(Debug, Clone)] +pub struct CachedAccount { + pub account: Account, + pub hash: Hash, +} + +#[derive(Debug, Default)] +pub struct AccountsCache { + cache: DashMap, + // Queue of potentially unflushed roots. Random eviction + cache too large + // could have triggered a flush of this slot already + maybe_unflushed_roots: RwLock>, + max_flushed_root: AtomicU64, +} + +impl AccountsCache { + pub fn report_size(&self) { + let total_unique_writes_size: u64 = self + .cache + .iter() + .map(|item| { + let slot_cache = item.value(); + slot_cache + .unique_account_writes_size + .load(Ordering::Relaxed) + }) + .sum(); + datapoint_info!( + "accounts_cache_size", + ( + "num_roots", + self.maybe_unflushed_roots.read().unwrap().len(), + i64 + ), + ("num_slots", self.cache.len(), i64), + ("total_unique_writes_size", total_unique_writes_size, i64), + ); + } + + pub fn store(&self, slot: Slot, pubkey: &Pubkey, account: Account, hash: Hash) { + let slot_cache = self.slot_cache(slot).unwrap_or_else(|| + // DashMap entry.or_insert() returns a RefMut, essentially a write lock, + // which is dropped after this block ends, minimizing time held by the lock. + // However, we still want to persist the reference to the `SlotStores` behind + // the lock, hence we clone it out, (`SlotStores` is an Arc so is cheap to clone). + self + .cache + .entry(slot) + .or_insert(Arc::new(SlotCacheInner::default())) + .clone()); + + slot_cache.insert(pubkey, account, hash); + } + + pub fn load(&self, slot: Slot, pubkey: &Pubkey) -> Option { + self.slot_cache(slot) + .and_then(|slot_cache| slot_cache.get_cloned(pubkey)) + } + + pub fn remove_slot(&self, slot: Slot) -> Option { + self.cache.remove(&slot).map(|(_, slot_cache)| slot_cache) + } + + pub fn slot_cache(&self, slot: Slot) -> Option { + self.cache.get(&slot).map(|result| result.value().clone()) + } + + pub fn add_root(&self, root: Slot) { + self.maybe_unflushed_roots.write().unwrap().insert(root); + } + + pub fn clear_roots(&self) -> HashSet { + std::mem::replace( + &mut self.maybe_unflushed_roots.write().unwrap(), + HashSet::new(), + ) + } + + // Removes slots less than or equal to `max_root`. Only safe to pass in a rooted slot, + // otherwise the slot removed could still be undergoing replay! + pub fn remove_slots_le(&self, max_root: Slot) -> Vec<(Slot, SlotCache)> { + let mut removed_slots = vec![]; + self.cache.retain(|slot, slot_cache| { + let should_remove = *slot <= max_root; + if should_remove { + removed_slots.push((*slot, slot_cache.clone())) + } + !should_remove + }); + removed_slots + } + + pub fn find_older_frozen_slots(&self, num_to_retain: usize) -> Vec { + if self.cache.len() > num_to_retain { + let mut slots: Vec<_> = self + .cache + .iter() + .filter_map(|item| { + let (slot, slot_cache) = item.pair(); + if slot_cache.is_frozen() { + Some(*slot) + } else { + None + } + }) + .collect(); + slots.sort_unstable(); + slots.truncate(slots.len().saturating_sub(num_to_retain)); + slots + } else { + vec![] + } + } + + pub fn num_slots(&self) -> usize { + self.cache.len() + } + + pub fn fetch_max_flush_root(&self) -> Slot { + self.max_flushed_root.load(Ordering::Relaxed) + } + + pub fn set_max_flush_root(&self, root: Slot) { + self.max_flushed_root.fetch_max(root, Ordering::Relaxed); + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + + #[test] + fn test_remove_slots_le() { + let cache = AccountsCache::default(); + // Cache is empty, should return nothing + assert!(cache.remove_slots_le(1).is_empty()); + let inserted_slot = 0; + cache.store( + inserted_slot, + &Pubkey::new_unique(), + Account::new(1, 0, &Pubkey::default()), + Hash::default(), + ); + // If the cache is told the size limit is 0, it should return the one slot + let removed = cache.remove_slots_le(0); + assert_eq!(removed.len(), 1); + assert_eq!(removed[0].0, inserted_slot); + } + + #[test] + fn test_find_older_frozen_slots() { + let cache = AccountsCache::default(); + // Cache is empty, should return nothing + assert!(cache.find_older_frozen_slots(0).is_empty()); + let inserted_slot = 0; + cache.store( + inserted_slot, + &Pubkey::new_unique(), + Account::new(1, 0, &Pubkey::default()), + Hash::default(), + ); + + // If the cache is told the size limit is 0, it should return nothing because there's only + // one cached slot + assert!(cache.find_older_frozen_slots(1).is_empty()); + // If the cache is told the size limit is 0, it should return nothing, because there's no + // frozen slots + assert!(cache.find_older_frozen_slots(0).is_empty()); + cache.slot_cache(inserted_slot).unwrap().mark_slot_frozen(); + // If the cache is told the size limit is 0, it should return the one frozen slot + assert_eq!(cache.find_older_frozen_slots(0), vec![inserted_slot]); + } +} diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 6909744290..a678a8219f 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -19,14 +19,17 @@ //! commit for each slot entry would be indexed. use crate::{ - accounts_index::{AccountIndex, AccountsIndex, Ancestors, IndexKey, SlotList, SlotSlice}, - append_vec::{AppendVec, StoredAccount, StoredMeta}, + accounts_cache::{AccountsCache, CachedAccount, SlotCache}, + accounts_index::{ + AccountIndex, AccountsIndex, Ancestors, IndexKey, IsCached, SlotList, SlotSlice, + }, + append_vec::{AppendVec, StoredAccountMeta, StoredMeta}, }; use blake3::traits::digest::Digest; use dashmap::DashMap; use lazy_static::lazy_static; use log::*; -use rand::{thread_rng, Rng}; +use rand::{prelude::SliceRandom, thread_rng, Rng}; use rayon::{prelude::*, ThreadPool}; use serde::{Deserialize, Serialize}; use solana_measure::measure::Measure; @@ -38,11 +41,12 @@ use solana_sdk::{ hash::{Hash, Hasher}, pubkey::Pubkey, }; -use std::convert::TryFrom; +use solana_vote_program::vote_state::MAX_LOCKOUT_HISTORY; use std::{ + borrow::Cow, boxed::Box, collections::{BTreeMap, HashMap, HashSet}, - convert::TryInto, + convert::{TryFrom, TryInto}, io::{Error as IOError, Result as IOResult}, ops::RangeBounds, path::{Path, PathBuf}, @@ -53,11 +57,34 @@ use std::{ use tempfile::TempDir; const PAGE_SIZE: u64 = 4 * 1024; +const MAX_RECYCLE_STORES: usize = 1000; +const STORE_META_OVERHEAD: usize = 256; +const MAX_CACHE_SLOTS: usize = 200; +const FLUSH_CACHE_RANDOM_THRESHOLD: usize = MAX_LOCKOUT_HISTORY; + pub const DEFAULT_FILE_SIZE: u64 = PAGE_SIZE * 1024; pub const DEFAULT_NUM_THREADS: u32 = 8; pub const DEFAULT_NUM_DIRS: u32 = 4; -const MAX_RECYCLE_STORES: usize = 1000; -const STORE_META_OVERHEAD: usize = 256; +pub const SHRINK_RATIO: f64 = 0.80; + +// A specially reserved storage id just for entries in the cache, so that +// operations that take a storage entry can maintain a common interface +// when interacting with cached accounts. This id is "virtual" in that it +// doesn't actually refer to an actual storage entry. +const CACHE_VIRTUAL_STORAGE_ID: usize = AppendVecId::MAX; + +// A specially reserved write version (identifier for ordering writes in an AppendVec) +// for entries in the cache, so that operations that take a storage entry can maintain +// a common interface when interacting with cached accounts. This version is "virtual" in +// that it doesn't actually map to an entry in an AppendVec. +const CACHE_VIRTUAL_WRITE_VERSION: u64 = 0; + +// A specially reserved offset (represents an offset into an AppendVec) +// for entries in the cache, so that operations that take a storage entry can maintain +// a common interface when interacting with cached accounts. This version is "virtual" in +// that it doesn't actually map to an entry in an AppendVec. +const CACHE_VIRTUAL_OFFSET: usize = 0; +const CACHE_VIRTUAL_STORED_SIZE: usize = 0; lazy_static! { // FROZEN_ACCOUNT_PANIC is used to signal local_cluster that an AccountsDB panic has occurred, @@ -91,10 +118,20 @@ pub struct AccountInfo { /// offset into the storage offset: usize, + /// needed to track shrink candidacy in bytes. Used to update the number + /// of alive bytes in an AppendVec as newer slots purge outdated entries + stored_size: usize, + /// lamports in the account used when squashing kept for optimization /// purposes to remove accounts with zero balance. lamports: u64, } +impl IsCached for AccountInfo { + fn is_cached(&self) -> bool { + self.store_id == CACHE_VIRTUAL_STORAGE_ID + } +} + /// An offset into the AccountsDB::storage vector pub type AppendVecId = usize; pub type SnapshotStorage = Vec>; @@ -107,6 +144,7 @@ type AccountSlots = HashMap>; type AppendVecOffsets = HashMap>; type ReclaimResult = (AccountSlots, AppendVecOffsets); type StorageFinder<'a> = Box Arc + 'a>; +type ShrinkCandidates = HashMap>>; trait Versioned { fn version(&self) -> u64; @@ -124,6 +162,104 @@ impl Versioned for (u64, AccountInfo) { } } +pub enum LoadedAccountAccessor<'a> { + Stored(Option<(Arc, usize)>), + Cached((&'a AccountsCache, Slot, &'a Pubkey)), +} + +impl<'a> LoadedAccountAccessor<'a> { + fn get_loaded_account(&self) -> Option { + match self { + LoadedAccountAccessor::Stored(storage_entry) => { + // May not be present if slot was cleaned up in between + storage_entry.as_ref().and_then(|(storage_entry, offset)| { + storage_entry + .get_stored_account_meta(*offset) + .map(LoadedAccount::Stored) + }) + } + LoadedAccountAccessor::Cached((cache, slot, pubkey)) => { + // May not be present if slot was cleaned up in between + cache.load(*slot, pubkey).map(|cached_account| { + LoadedAccount::Cached((**pubkey, Cow::Owned(cached_account))) + }) + } + } + } +} + +pub enum LoadedAccount<'a> { + Stored(StoredAccountMeta<'a>), + Cached((Pubkey, Cow<'a, CachedAccount>)), +} + +impl<'a> LoadedAccount<'a> { + pub fn owner(&self) -> &Pubkey { + match self { + LoadedAccount::Stored(stored_account_meta) => &stored_account_meta.account_meta.owner, + LoadedAccount::Cached((_, cached_account)) => &cached_account.account.owner, + } + } + + pub fn executable(&self) -> bool { + match self { + LoadedAccount::Stored(stored_account_meta) => { + stored_account_meta.account_meta.executable + } + LoadedAccount::Cached((_, cached_account)) => cached_account.account.executable, + } + } + + pub fn loaded_hash(&self) -> &Hash { + match self { + LoadedAccount::Stored(stored_account_meta) => &stored_account_meta.hash, + LoadedAccount::Cached((_, cached_account)) => &cached_account.hash, + } + } + + pub fn pubkey(&self) -> &Pubkey { + match self { + LoadedAccount::Stored(stored_account_meta) => &stored_account_meta.meta.pubkey, + LoadedAccount::Cached((pubkey, _)) => &pubkey, + } + } + + pub fn write_version(&self) -> u64 { + match self { + LoadedAccount::Stored(stored_account_meta) => stored_account_meta.meta.write_version, + LoadedAccount::Cached(_) => CACHE_VIRTUAL_WRITE_VERSION, + } + } + + pub fn compute_hash(&self, slot: Slot, cluster_type: &ClusterType, pubkey: &Pubkey) -> Hash { + match self { + LoadedAccount::Stored(stored_account_meta) => { + AccountsDB::hash_stored_account(slot, &stored_account_meta, cluster_type) + } + LoadedAccount::Cached((_, cached_account)) => { + AccountsDB::hash_account(slot, &cached_account.account, pubkey, cluster_type) + } + } + } + + pub fn stored_size(&self) -> usize { + match self { + LoadedAccount::Stored(stored_account_meta) => stored_account_meta.stored_size, + LoadedAccount::Cached(_) => CACHE_VIRTUAL_STORED_SIZE, + } + } + + pub fn account(self) -> Account { + match self { + LoadedAccount::Stored(stored_account_meta) => stored_account_meta.clone_account(), + LoadedAccount::Cached((_, cached_account)) => match cached_account { + Cow::Owned(cached_account) => cached_account.account, + Cow::Borrowed(cached_account) => cached_account.account.clone(), + }, + } + } +} + #[derive(Clone, Default, Debug)] pub struct AccountStorage(pub DashMap); @@ -196,6 +332,8 @@ pub struct AccountStorageEntry { /// This is used as a rough estimate for slot shrinking. As such a relaxed /// use case, this value ARE NOT strictly synchronized with count_and_status! approx_store_count: AtomicUsize, + + alive_bytes: AtomicUsize, } impl AccountStorageEntry { @@ -210,6 +348,7 @@ impl AccountStorageEntry { accounts, count_and_status: RwLock::new((0, AccountStorageStatus::Available)), approx_store_count: AtomicUsize::new(0), + alive_bytes: AtomicUsize::new(0), } } @@ -220,6 +359,7 @@ impl AccountStorageEntry { accounts: AppendVec::new_empty_map(accounts_current_len), count_and_status: RwLock::new((0, AccountStorageStatus::Available)), approx_store_count: AtomicUsize::new(0), + alive_bytes: AtomicUsize::new(0), } } @@ -251,6 +391,7 @@ impl AccountStorageEntry { self.slot.store(slot, Ordering::Release); self.id.store(id, Ordering::Relaxed); self.approx_store_count.store(0, Ordering::Relaxed); + self.alive_bytes.store(0, Ordering::Relaxed); } pub fn status(&self) -> AccountStorageStatus { @@ -265,6 +406,10 @@ impl AccountStorageEntry { self.approx_store_count.load(Ordering::Relaxed) } + pub fn alive_bytes(&self) -> usize { + self.alive_bytes.load(Ordering::SeqCst) + } + pub fn written_bytes(&self) -> u64 { self.accounts.len() as u64 } @@ -289,19 +434,15 @@ impl AccountStorageEntry { self.accounts.flush() } - fn get_account(&self, account_info: &AccountInfo) -> Option { - Some( - self.accounts - .get_account(account_info.offset)? - .0 - .clone_account(), - ) + fn get_stored_account_meta(&self, offset: usize) -> Option { + Some(self.accounts.get_account(offset)?.0) } - fn add_account(&self) { + fn add_account(&self, num_bytes: usize) { let mut count_and_status = self.count_and_status.write().unwrap(); *count_and_status = (count_and_status.0 + 1, count_and_status.1); self.approx_store_count.fetch_add(1, Ordering::Relaxed); + self.alive_bytes.fetch_add(num_bytes, Ordering::SeqCst); } fn try_available(&self) -> bool { @@ -316,7 +457,7 @@ impl AccountStorageEntry { } } - fn remove_account(&self) -> usize { + fn remove_account(&self, num_bytes: usize) -> usize { let mut count_and_status = self.count_and_status.write().unwrap(); let (mut count, mut status) = *count_and_status; @@ -345,6 +486,7 @@ impl AccountStorageEntry { self.append_vec_id(), ); + self.alive_bytes.fetch_sub(num_bytes, Ordering::SeqCst); count -= 1; *count_and_status = (count, status); count @@ -431,11 +573,14 @@ pub struct AccountsDB { pub storage: AccountStorage, + pub accounts_cache: AccountsCache, + recycle_stores: RwLock>>, /// distribute the accounts across storage lists pub next_id: AtomicUsize, - pub shrink_candidate_slots: Mutex>, + pub shrink_candidate_slots: Mutex, + pub shrink_candidate_slots_v1: Mutex>, pub(crate) write_version: AtomicU64, @@ -469,6 +614,8 @@ pub struct AccountsDB { pub cluster_type: Option, pub account_indexes: HashSet, + + pub caching_enabled: bool, } #[derive(Debug, Default)] @@ -512,7 +659,7 @@ impl solana_frozen_abi::abi_example::AbiExample for AccountsDB { let some_data_len = 5; let some_slot: Slot = 0; let account = Account::new(1, some_data_len, &key); - accounts_db.store(some_slot, &[(&key, &account)]); + accounts_db.store_uncached(some_slot, &[(&key, &account)]); accounts_db.add_root(0); accounts_db @@ -527,10 +674,12 @@ impl Default for AccountsDB { bank_hashes.insert(0, BankHashInfo::default()); AccountsDB { accounts_index: AccountsIndex::default(), - storage: AccountStorage(DashMap::new()), + storage: AccountStorage::default(), + accounts_cache: AccountsCache::default(), recycle_stores: RwLock::new(Vec::new()), next_id: AtomicUsize::new(0), - shrink_candidate_slots: Mutex::new(Vec::new()), + shrink_candidate_slots_v1: Mutex::new(Vec::new()), + shrink_candidate_slots: Mutex::new(HashMap::new()), write_version: AtomicU64::new(0), paths: vec![], shrink_paths: RwLock::new(None), @@ -548,19 +697,21 @@ impl Default for AccountsDB { stats: AccountsStats::default(), cluster_type: None, account_indexes: HashSet::new(), + caching_enabled: false, } } } impl AccountsDB { pub fn new(paths: Vec, cluster_type: &ClusterType) -> Self { - AccountsDB::new_with_indexes(paths, cluster_type, HashSet::new()) + AccountsDB::new_with_config(paths, cluster_type, HashSet::new(), false) } - pub fn new_with_indexes( + pub fn new_with_config( paths: Vec, cluster_type: &ClusterType, account_indexes: HashSet, + caching_enabled: bool, ) -> Self { let new = if !paths.is_empty() { Self { @@ -568,6 +719,7 @@ impl AccountsDB { temp_paths: None, cluster_type: Some(*cluster_type), account_indexes, + caching_enabled, ..Self::default() } } else { @@ -579,6 +731,7 @@ impl AccountsDB { temp_paths: Some(temp_dirs), cluster_type: Some(*cluster_type), account_indexes, + caching_enabled, ..Self::default() } }; @@ -668,18 +821,8 @@ impl AccountsDB { reclaim_result } - fn do_reset_uncleaned_roots( - &self, - candidates: &mut MutexGuard>, - max_clean_root: Option, - ) { - let previous_roots = self.accounts_index.reset_uncleaned_roots(max_clean_root); - candidates.extend(previous_roots); - } - - #[cfg(test)] - fn reset_uncleaned_roots(&self) { - self.do_reset_uncleaned_roots(&mut self.shrink_candidate_slots.lock().unwrap(), None); + fn do_reset_uncleaned_roots(&self, max_clean_root: Option) { + self.accounts_index.reset_uncleaned_roots(max_clean_root); } fn calc_delete_dependencies( @@ -753,21 +896,25 @@ impl AccountsDB { fn purge_keys_exact( &self, pubkey_to_slot_set: Vec<(Pubkey, HashSet)>, - ) -> (Vec<(u64, AccountInfo)>, Vec) { + ) -> Vec<(u64, AccountInfo)> { let mut reclaims = Vec::new(); let mut dead_keys = Vec::new(); for (pubkey, slots_set) in pubkey_to_slot_set { - let (new_reclaims, is_empty) = - self.accounts_index - .purge_exact(&pubkey, slots_set, &self.account_indexes); + let is_empty = self.accounts_index.purge_exact( + &pubkey, + &slots_set, + &mut reclaims, + &self.account_indexes, + ); if is_empty { dead_keys.push(pubkey); } - reclaims.extend(new_reclaims); } - (reclaims, dead_keys) + self.accounts_index + .handle_dead_keys(&dead_keys, &self.account_indexes); + reclaims } // Purge zero lamport accounts and older rooted account states as garbage @@ -786,8 +933,8 @@ impl AccountsDB { Some(std::cmp::min(min_scan_root, max_clean_root)) } }; - let mut candidates = self.shrink_candidate_slots.lock().unwrap(); + let mut candidates_v1 = self.shrink_candidate_slots_v1.lock().unwrap(); self.report_store_stats(); let mut accounts_scan = Measure::start("accounts_scan"); @@ -852,7 +999,12 @@ impl AccountsDB { let mut clean_old_rooted = Measure::start("clean_old_roots"); let (purged_account_slots, removed_accounts) = self.clean_old_rooted_accounts(purges_in_root, max_clean_root); - self.do_reset_uncleaned_roots(&mut candidates, max_clean_root); + + if self.caching_enabled { + self.do_reset_uncleaned_roots(max_clean_root); + } else { + self.do_reset_uncleaned_roots_v1(&mut candidates_v1, max_clean_root); + } clean_old_rooted.stop(); let mut store_counts_time = Measure::start("store_counts"); @@ -931,17 +1083,14 @@ impl AccountsDB { }) .collect(); - let (reclaims, dead_keys) = self.purge_keys_exact(pubkey_to_slot_set); - - self.accounts_index - .handle_dead_keys(&dead_keys, &self.account_indexes); - + let reclaims = self.purge_keys_exact(pubkey_to_slot_set); self.handle_reclaims(&reclaims, None, false, None); reclaims_time.stop(); datapoint_info!( "clean_accounts", ("accounts_scan", accounts_scan.as_us() as i64, i64), + ("clean_old_rooted", clean_old_rooted.as_us() as i64, i64), ("store_counts", store_counts_time.as_us() as i64, i64), ("purge_filter", purge_filter.as_us() as i64, i64), ("calc_deps", calc_deps_time.as_us() as i64, i64), @@ -967,6 +1116,8 @@ impl AccountsDB { /// `process_dead_slots`. For instance, on store, no slots should be cleaned up, /// but during the background clean accounts purges accounts from old rooted slots, /// so outdated slots may be removed. + /// * `reclaim_result` - Information about accounts that were removed from storage, does + /// not include accounts that were removed from the cache fn handle_reclaims( &self, reclaims: SlotSlice, @@ -1006,59 +1157,270 @@ impl AccountsDB { if dead_slots.is_empty() { return; } - let mut clean_dead_slots = Measure::start("reclaims::purge_slots"); - self.clean_dead_slots(&dead_slots, purged_account_slots); + let mut clean_dead_slots = Measure::start("reclaims::clean_dead_slots"); + self.clean_stored_dead_slots(&dead_slots, purged_account_slots); clean_dead_slots.stop(); - let mut purge_slots = Measure::start("reclaims::purge_slots"); - self.purge_slots(&dead_slots); - purge_slots.stop(); + let mut purge_removed_slots = Measure::start("reclaims::purge_removed_slots"); + self.purge_removed_slots_from_store(&dead_slots); + purge_removed_slots.stop(); + + // If the slot is dead, remove the need to shrink the storages as + // the storage entries will be purged. + for slot in dead_slots { + self.shrink_candidate_slots.lock().unwrap().remove(slot); + } debug!( "process_dead_slots({}): {} {} {:?}", dead_slots.len(), clean_dead_slots, - purge_slots, + purge_removed_slots, dead_slots, ); } - fn do_shrink_stale_slot(&self, slot: Slot) -> usize { - self.do_shrink_slot(slot, false) - } - - fn do_shrink_slot_forced(&self, slot: Slot) { - self.do_shrink_slot(slot, true); - } - - fn shrink_stale_slot(&self, candidates: &mut MutexGuard>) -> usize { - let mut shrunken_account_total = 0; - let mut shrunk_slot_count = 0; - let start = Instant::now(); - let num_roots = self.accounts_index.num_roots(); - loop { - if let Some(slot) = self.do_next_shrink_slot(candidates) { - shrunken_account_total += self.do_shrink_stale_slot(slot); - } else { - return 0; + fn do_shrink_slot_stores<'a, I>(&'a self, slot: Slot, stores: I) + where + I: Iterator>, + { + debug!("do_shrink_slot_stores: slot: {}", slot); + let mut stored_accounts = vec![]; + for store in stores { + let mut start = 0; + while let Some((account, next)) = store.accounts.get_account(start) { + stored_accounts.push(( + account.meta.pubkey, + account.clone_account(), + *account.hash, + next - start, + (store.append_vec_id(), account.offset), + account.meta.write_version, + )); + start = next; } - if start.elapsed().as_millis() > 100 || shrunk_slot_count > num_roots / 10 { - debug!( - "do_shrink_stale_slot: {} {} {}us", - shrunk_slot_count, - candidates.len(), - start.elapsed().as_micros() - ); - break; - } - shrunk_slot_count += 1; } - shrunken_account_total + + let mut index_read_elapsed = Measure::start("index_read_elapsed"); + let alive_accounts: Vec<_> = { + stored_accounts + .iter() + .filter( + |( + pubkey, + _account, + _account_hash, + _storage_size, + (store_id, offset), + _write_version, + )| { + if let Some((locked_entry, _)) = self.accounts_index.get(pubkey, None, None) + { + locked_entry + .slot_list() + .iter() + .any(|(_slot, i)| i.store_id == *store_id && i.offset == *offset) + } else { + false + } + }, + ) + .collect() + }; + index_read_elapsed.stop(); + + let alive_total: u64 = alive_accounts + .iter() + .map( + |(_pubkey, _account, _account_hash, account_size, _location, _write_version)| { + *account_size as u64 + }, + ) + .sum(); + let aligned_total: u64 = self.page_align(alive_total); + + let total_starting_accounts = stored_accounts.len(); + let total_accounts_after_shrink = alive_accounts.len(); + debug!( + "shrinking: slot: {}, total_starting_accounts: {} => total_accounts_after_shrink: {} ({} bytes; aligned to: {})", + slot, + total_starting_accounts, + total_accounts_after_shrink, + alive_total, + aligned_total + ); + + let mut rewrite_elapsed = Measure::start("rewrite_elapsed"); + let mut dead_storages = vec![]; + let mut find_alive_elapsed = 0; + let mut create_and_insert_store_elapsed = 0; + let mut write_storage_elapsed = 0; + let mut store_accounts_timing = StoreAccountsTiming::default(); + if aligned_total > 0 { + let mut start = Measure::start("find_alive_elapsed"); + let mut accounts = Vec::with_capacity(alive_accounts.len()); + let mut hashes = Vec::with_capacity(alive_accounts.len()); + let mut write_versions = Vec::with_capacity(alive_accounts.len()); + + for (pubkey, account, account_hash, _size, _location, write_version) in &alive_accounts + { + accounts.push((pubkey, account)); + hashes.push(*account_hash); + write_versions.push(*write_version); + } + start.stop(); + find_alive_elapsed = start.as_us(); + + let mut start = Measure::start("create_and_insert_store_elapsed"); + let shrunken_store = if let Some(new_store) = + self.try_recycle_and_insert_store(slot, aligned_total, aligned_total + 1024) + { + new_store + } else { + let maybe_shrink_paths = self.shrink_paths.read().unwrap(); + if let Some(ref shrink_paths) = *maybe_shrink_paths { + self.create_and_insert_store_with_paths( + slot, + aligned_total, + "shrink-w-path", + shrink_paths, + ) + } else { + self.create_and_insert_store(slot, aligned_total, "shrink") + } + }; + start.stop(); + create_and_insert_store_elapsed = start.as_us(); + + // here, we're writing back alive_accounts. That should be an atomic operation + // without use of rather wide locks in this whole function, because we're + // mutating rooted slots; There should be no writers to them. + store_accounts_timing = self.store_accounts_custom( + slot, + &accounts, + &hashes, + Some(Box::new(move |_, _| shrunken_store.clone())), + Some(Box::new(write_versions.into_iter())), + false, + ); + + // `store_accounts_custom()` above may have purged accounts from some + // other storage entries (the ones that were just overwritten by this + // new storage entry). This means some of those stores might have caused + // this slot to be readded to `self.shrink_candidate_slots`, so delete + // those here + self.shrink_candidate_slots.lock().unwrap().remove(&slot); + + // Purge old, overwritten storage entries + let mut start = Measure::start("write_storage_elapsed"); + if let Some(slot_stores) = self.storage.get_slot_stores(slot) { + slot_stores.write().unwrap().retain(|_key, store| { + if store.count() == 0 { + dead_storages.push(store.clone()); + } + store.count() > 0 + }); + } + start.stop(); + write_storage_elapsed = start.as_us(); + } + rewrite_elapsed.stop(); + + let mut recycle_stores_write_time = Measure::start("recycle_stores_write_time"); + let mut recycle_stores = self.recycle_stores.write().unwrap(); + recycle_stores_write_time.stop(); + + let mut drop_storage_entries_elapsed = Measure::start("drop_storage_entries_elapsed"); + if recycle_stores.len() < MAX_RECYCLE_STORES { + recycle_stores.extend(dead_storages); + drop(recycle_stores); + } else { + self.stats + .dropped_stores + .fetch_add(recycle_stores.len() as u64, Ordering::Relaxed); + drop(recycle_stores); + drop(dead_storages); + } + drop_storage_entries_elapsed.stop(); + + datapoint_info!( + "do_shrink_slot_stores_time", + ("index_read_elapsed", index_read_elapsed.as_us(), i64), + ("find_alive_elapsed", find_alive_elapsed, i64), + ( + "create_and_insert_store_elapsed", + create_and_insert_store_elapsed, + i64 + ), + ( + "store_accounts_elapsed", + store_accounts_timing.store_accounts_elapsed, + i64 + ), + ( + "update_index_elapsed", + store_accounts_timing.update_index_elapsed, + i64 + ), + ( + "handle_reclaims_elapsed", + store_accounts_timing.handle_reclaims_elapsed, + i64 + ), + ("write_storage_elapsed", write_storage_elapsed, i64), + ("rewrite_elapsed", rewrite_elapsed.as_us(), i64), + ( + "drop_storage_entries_elapsed", + drop_storage_entries_elapsed.as_us(), + i64 + ), + ( + "recycle_stores_write_time", + recycle_stores_write_time.as_us(), + i64 + ), + ("total_starting_accounts", total_starting_accounts, i64), + ( + "total_accounts_after_shrink", + total_accounts_after_shrink, + i64 + ) + ); } // Reads all accounts in given slot's AppendVecs and filter only to alive, // then create a minimum AppendVec filled with the alive. - fn do_shrink_slot(&self, slot: Slot, forced: bool) -> usize { + fn shrink_slot_forced(&self, slot: Slot) -> usize { + debug!("shrink_slot_forced: slot: {}", slot); + + if let Some(stores_lock) = self.storage.get_slot_stores(slot) { + let stores: Vec> = + stores_lock.read().unwrap().values().cloned().collect(); + let mut alive_count = 0; + let mut stored_count = 0; + for store in &stores { + alive_count += store.count(); + stored_count += store.approx_stored_count(); + } + if alive_count == stored_count && stores.len() == 1 { + trace!( + "shrink_slot_forced ({}): not able to shrink at all: alive/stored: {} / {}", + slot, + alive_count, + stored_count, + ); + return 0; + } + self.do_shrink_slot_stores(slot, stores.iter()); + alive_count + } else { + 0 + } + } + + // Reads all accounts in given slot's AppendVecs and filter only to alive, + // then create a minimum AppendVec filled with the alive. + fn do_shrink_slot_v1(&self, slot: Slot, forced: bool) -> usize { trace!("shrink_stale_slot: slot: {}", slot); let mut stored_accounts = vec![]; @@ -1214,6 +1576,7 @@ impl AccountsDB { &hashes, Some(Box::new(move |_, _| shrunken_store.clone())), Some(Box::new(write_versions.into_iter())), + false, ); let mut start = Measure::start("write_storage_elapsed"); @@ -1288,8 +1651,55 @@ impl AccountsDB { alive_accounts.len() } + fn do_reset_uncleaned_roots_v1( + &self, + candidates: &mut MutexGuard>, + max_clean_root: Option, + ) { + let previous_roots = self.accounts_index.reset_uncleaned_roots(max_clean_root); + candidates.extend(previous_roots); + } + + #[cfg(test)] + fn reset_uncleaned_roots_v1(&self) { + self.do_reset_uncleaned_roots_v1(&mut self.shrink_candidate_slots_v1.lock().unwrap(), None); + } + + fn do_shrink_stale_slot_v1(&self, slot: Slot) -> usize { + self.do_shrink_slot_v1(slot, false) + } + + fn do_shrink_slot_forced_v1(&self, slot: Slot) { + self.do_shrink_slot_v1(slot, true); + } + + fn shrink_stale_slot_v1(&self, candidates: &mut MutexGuard>) -> usize { + let mut shrunken_account_total = 0; + let mut shrunk_slot_count = 0; + let start = Instant::now(); + let num_roots = self.accounts_index.num_roots(); + loop { + if let Some(slot) = self.do_next_shrink_slot_v1(candidates) { + shrunken_account_total += self.do_shrink_stale_slot_v1(slot); + } else { + return 0; + } + if start.elapsed().as_millis() > 100 || shrunk_slot_count > num_roots / 10 { + debug!( + "do_shrink_stale_slot_v1: {} {} {}us", + shrunk_slot_count, + candidates.len(), + start.elapsed().as_micros() + ); + break; + } + shrunk_slot_count += 1; + } + shrunken_account_total + } + // Infinitely returns rooted roots in cyclic order - fn do_next_shrink_slot(&self, candidates: &mut MutexGuard>) -> Option { + fn do_next_shrink_slot_v1(&self, candidates: &mut MutexGuard>) -> Option { // At this point, a lock (= candidates) is ensured to be held to keep // do_reset_uncleaned_roots() (in clean_accounts()) from updating candidates. // Also, candidates in the lock may be swapped here if it's empty. @@ -1308,22 +1718,14 @@ impl AccountsDB { } #[cfg(test)] - fn next_shrink_slot(&self) -> Option { - let mut candidates = self.shrink_candidate_slots.lock().unwrap(); - self.do_next_shrink_slot(&mut candidates) + fn next_shrink_slot_v1(&self) -> Option { + let mut candidates = self.shrink_candidate_slots_v1.lock().unwrap(); + self.do_next_shrink_slot_v1(&mut candidates) } - fn all_root_slots_in_index(&self) -> Vec { - self.accounts_index.all_roots() - } - - fn all_slots_in_storage(&self) -> Vec { - self.storage.all_slots() - } - - pub fn process_stale_slot(&self) -> usize { + pub fn process_stale_slot_v1(&self) -> usize { let mut measure = Measure::start("stale_slot_shrink-ms"); - let candidates = self.shrink_candidate_slots.try_lock(); + let candidates = self.shrink_candidate_slots_v1.try_lock(); if candidates.is_err() { // skip and return immediately if locked by clean_accounts() // the calling background thread will just retry later. @@ -1333,7 +1735,7 @@ impl AccountsDB { // with clean_accounts(). let mut candidates = candidates.unwrap(); - let count = self.shrink_stale_slot(&mut candidates); + let count = self.shrink_stale_slot_v1(&mut candidates); measure.stop(); inc_new_counter_info!("stale_slot_shrink-ms", measure.as_ms() as usize); @@ -1341,15 +1743,42 @@ impl AccountsDB { } #[cfg(test)] - fn shrink_all_stale_slots(&self) { + fn shrink_all_stale_slots_v1(&self) { for slot in self.all_slots_in_storage() { - self.do_shrink_stale_slot(slot); + self.do_shrink_stale_slot_v1(slot); } } + fn all_slots_in_storage(&self) -> Vec { + self.storage.all_slots() + } + + fn all_root_slots_in_index(&self) -> Vec { + self.accounts_index.all_roots() + } + + pub fn shrink_candidate_slots(&self) -> usize { + let shrink_slots = std::mem::replace( + &mut *self.shrink_candidate_slots.lock().unwrap(), + HashMap::new(), + ); + let num_candidates = shrink_slots.len(); + for (slot, slot_shrink_candidates) in shrink_slots { + let mut measure = Measure::start("shrink_candidate_slots-ms"); + self.do_shrink_slot_stores(slot, slot_shrink_candidates.values()); + measure.stop(); + inc_new_counter_info!("shrink_candidate_slots-ms", measure.as_ms() as usize); + } + num_candidates + } + pub fn shrink_all_slots(&self) { for slot in self.all_slots_in_storage() { - self.do_shrink_slot_forced(slot); + if self.caching_enabled { + self.shrink_slot_forced(slot); + } else { + self.do_shrink_slot_forced_v1(slot); + } } } @@ -1362,8 +1791,14 @@ impl AccountsDB { self.accounts_index .scan_accounts(ancestors, |pubkey, (account_info, slot)| { let account_slot = self - .get_account_from_storage(slot, account_info) - .map(|account| (pubkey, account, slot)); + .get_account_accessor_from_cache_or_storage( + slot, + pubkey, + account_info.store_id, + account_info.offset, + ) + .get_loaded_account() + .map(|loaded_account| (pubkey, loaded_account.account(), slot)); scan_func(&mut collector, account_slot) }); collector @@ -1378,8 +1813,14 @@ impl AccountsDB { self.accounts_index .unchecked_scan_accounts(ancestors, |pubkey, (account_info, slot)| { let account_slot = self - .get_account_from_storage(slot, account_info) - .map(|account| (pubkey, account, slot)); + .get_account_accessor_from_cache_or_storage( + slot, + pubkey, + account_info.store_id, + account_info.offset, + ) + .get_loaded_account() + .map(|loaded_account| (pubkey, loaded_account.account(), slot)); scan_func(&mut collector, account_slot) }); collector @@ -1397,8 +1838,14 @@ impl AccountsDB { range, |pubkey, (account_info, slot)| { let account_slot = self - .get_account_from_storage(slot, account_info) - .map(|account| (pubkey, account, slot)); + .get_account_accessor_from_cache_or_storage( + slot, + pubkey, + account_info.store_id, + account_info.offset, + ) + .get_loaded_account() + .map(|loaded_account| (pubkey, loaded_account.account(), slot)); scan_func(&mut collector, account_slot) }, ); @@ -1421,8 +1868,14 @@ impl AccountsDB { index_key, |pubkey, (account_info, slot)| { let account_slot = self - .get_account_from_storage(slot, account_info) - .map(|account| (pubkey, account, slot)); + .get_account_accessor_from_cache_or_storage( + slot, + pubkey, + account_info.store_id, + account_info.offset, + ) + .get_loaded_account() + .map(|loaded_account| (pubkey, loaded_account.account(), slot)); scan_func(&mut collector, account_slot) }, ); @@ -1432,7 +1885,7 @@ impl AccountsDB { /// Scan a specific slot through all the account storage in parallel pub fn scan_account_storage(&self, slot: Slot, scan_func: F) -> Vec where - F: Fn(&StoredAccount, AppendVecId, &mut B) + Send + Sync, + F: Fn(LoadedAccount, AppendVecId, &mut B) + Send + Sync, B: Send + Default, { self.scan_account_storage_inner(slot, scan_func) @@ -1440,27 +1893,51 @@ impl AccountsDB { fn scan_account_storage_inner(&self, slot: Slot, scan_func: F) -> Vec where - F: Fn(&StoredAccount, AppendVecId, &mut B) + Send + Sync, + F: Fn(LoadedAccount, AppendVecId, &mut B) + Send + Sync, B: Send + Default, { - let storage_maps: Vec> = self - .storage - .get_slot_stores(slot) - .map(|res| res.read().unwrap().values().cloned().collect()) - .unwrap_or_default(); - self.thread_pool.install(|| { - storage_maps - .into_par_iter() - .map(|storage| { - let accounts = storage.accounts.accounts(0); - let mut retval = B::default(); - accounts.into_iter().for_each(|stored_account| { - scan_func(&stored_account, storage.append_vec_id(), &mut retval) - }); - retval - }) - .collect() - }) + if let Some(slot_cache) = self.accounts_cache.slot_cache(slot) { + // If we see the slot in the cache, then all the account information + // is in this cached slot + let mut retval = B::default(); + for cached_account in slot_cache.iter() { + scan_func( + LoadedAccount::Cached(( + *cached_account.key(), + Cow::Borrowed(cached_account.value()), + )), + CACHE_VIRTUAL_STORAGE_ID, + &mut retval, + ); + } + vec![retval] + } else { + // If the slot is not in the cache, then all the account information must have + // been flushed. This is guaranteed because we only remove the rooted slot from + // the cache *after* we've finished flushing in `flush_slot_cache`. + let storage_maps: Vec> = self + .storage + .get_slot_stores(slot) + .map(|res| res.read().unwrap().values().cloned().collect()) + .unwrap_or_default(); + self.thread_pool.install(|| { + storage_maps + .into_par_iter() + .map(|storage| { + let accounts = storage.accounts.accounts(0); + let mut retval = B::default(); + accounts.into_iter().for_each(|stored_account| { + scan_func( + LoadedAccount::Stored(stored_account), + storage.append_vec_id(), + &mut retval, + ) + }); + retval + }) + .collect() + }) + } } pub fn set_hash(&self, slot: Slot, parent_slot: Slot) { @@ -1482,8 +1959,17 @@ impl AccountsDB { } pub fn load(&self, ancestors: &Ancestors, pubkey: &Pubkey) -> Option<(Account, Slot)> { + self.do_load(ancestors, pubkey, None) + } + + fn do_load( + &self, + ancestors: &Ancestors, + pubkey: &Pubkey, + max_root: Option, + ) -> Option<(Account, Slot)> { let (slot, store_id, offset) = { - let (lock, index) = self.accounts_index.get(pubkey, Some(ancestors), None)?; + let (lock, index) = self.accounts_index.get(pubkey, Some(ancestors), max_root)?; let slot_list = lock.slot_list(); let ( slot, @@ -1496,14 +1982,24 @@ impl AccountsDB { }; //TODO: thread this as a ref + self.get_account_accessor_from_cache_or_storage(slot, pubkey, store_id, offset) + .get_loaded_account() + .map(|loaded_account| (loaded_account.account(), slot)) + } + + #[cfg(test)] + pub fn alive_account_count_in_slot(&self, slot: Slot) -> usize { self.storage - .get_account_storage_entry(slot, store_id) - .and_then(|store| { - store - .accounts - .get_account(offset) - .map(|account| (account.0.clone_account(), slot)) + .get_slot_stores(slot) + .map(|storages| { + storages + .read() + .unwrap() + .values() + .map(|s| s.count_and_status.read().unwrap().0) + .sum() }) + .unwrap_or(0) } pub fn load_account_hash(&self, ancestors: &Ancestors, pubkey: &Pubkey) -> Hash { @@ -1523,24 +2019,36 @@ impl AccountsDB { // lock released here }; - let entry = self - .storage - .get_account_storage_entry(slot, store_id) - .unwrap(); - let account = entry.accounts.get_account(offset); - *account.as_ref().unwrap().0.hash + self.get_account_accessor_from_cache_or_storage(slot, pubkey, store_id, offset) + .get_loaded_account() + .map(|loaded_account| *loaded_account.loaded_hash()) + .unwrap() } pub fn load_slow(&self, ancestors: &Ancestors, pubkey: &Pubkey) -> Option<(Account, Slot)> { self.load(ancestors, pubkey) } - fn get_account_from_storage(&self, slot: Slot, account_info: &AccountInfo) -> Option { - let account_storage_entry = self - .storage - .get_account_storage_entry(slot, account_info.store_id); - account_storage_entry - .and_then(|account_storage_entry| account_storage_entry.get_account(account_info)) + // Only safe to use the `get_account_accessor_from_cache_or_storage() -> get_loaded_account()` + // pattern if you're holding the AccountIndex lock for the `pubkey`, otherwise, a cache + // flush could happen between `get_account_accessor_from_cache_or_storage()` and + //`get_loaded_account()`, and the `LoadedAccountAccessor::Cached((&self.accounts_cache, slot, pubkey))` + // returned here won't be able to find a slot cache entry for that `slot`. + fn get_account_accessor_from_cache_or_storage<'a>( + &'a self, + slot: Slot, + pubkey: &'a Pubkey, + store_id: usize, + offset: usize, + ) -> LoadedAccountAccessor<'a> { + if store_id == CACHE_VIRTUAL_STORAGE_ID { + LoadedAccountAccessor::Cached((&self.accounts_cache, slot, pubkey)) + } else { + let account_storage_entry = self.storage.get_account_storage_entry(slot, store_id); + LoadedAccountAccessor::Stored( + account_storage_entry.map(|account_storage_entry| (account_storage_entry, offset)), + ) + } } fn try_recycle_and_insert_store( @@ -1713,6 +2221,10 @@ impl AccountsDB { self.page_align(size), )); + if store.append_vec_id() == CACHE_VIRTUAL_STORAGE_ID { + panic!("We've run out of storage ids!"); + } + debug!( "creating store: {} slot: {} len: {} size: {} from: {} path: {:?}", store.append_vec_id(), @@ -1761,10 +2273,11 @@ impl AccountsDB { .or_insert(Arc::new(RwLock::new(HashMap::new()))) .clone()); - slot_storages + assert!(slot_storages .write() .unwrap() - .insert(store.append_vec_id(), store); + .insert(store.append_vec_id(), store) + .is_none()); } pub fn purge_slot(&self, slot: Slot) { @@ -1801,18 +2314,24 @@ impl AccountsDB { recycle_stores_write_time.as_us() } - fn purge_slots(&self, slots: &HashSet) { - //add_root should be called first - let non_roots: Vec<_> = slots - .iter() - .filter(|slot| !self.accounts_index.is_root(**slot)) - .collect(); + /// # Arguments + /// * `removed_slots` - Slots that were previously rooted but just removed + fn purge_removed_slots_from_store(&self, removed_slots: &HashSet) { + // Check all slots `removed_slots` are no longer rooted + let mut safety_checks_elapsed = Measure::start("safety_checks_elapsed"); + for slot in removed_slots.iter() { + assert!(!self.accounts_index.is_root(*slot)) + } + safety_checks_elapsed.stop(); + + // Purge the storage entries of the removed slots + let mut remove_storages_elapsed = Measure::start("remove_storages_elapsed"); let mut all_removed_slot_storages = vec![]; let mut total_removed_storage_entries = 0; let mut total_removed_bytes = 0; - - let mut remove_storages_elapsed = Measure::start("remove_storages_elapsed"); - for slot in non_roots { + for slot in removed_slots { + // The removed slot must alrady have been flushed from the cache + assert!(self.accounts_cache.slot_cache(*slot).is_none()); if let Some((_, slot_removed_storages)) = self.storage.0.remove(&slot) { { let r_slot_removed_storages = slot_removed_storages.read().unwrap(); @@ -1838,6 +2357,107 @@ impl AccountsDB { drop(all_removed_slot_storages); drop_storage_entries_elapsed.stop(); + datapoint_info!( + "purge_slots_time", + ("safety_checks_elapsed", safety_checks_elapsed.as_us(), i64), + ( + "remove_storages_elapsed", + remove_storages_elapsed.as_us(), + i64 + ), + ( + "drop_storage_entries_elapsed", + drop_storage_entries_elapsed.as_us(), + i64 + ), + ("num_slots_removed", num_slots_removed, i64), + ( + "total_removed_storage_entries", + total_removed_storage_entries, + i64 + ), + ("total_removed_bytes", total_removed_bytes, i64), + ( + "recycle_stores_write_elapsed", + recycle_stores_write_time, + i64 + ), + ); + } + + fn purge_slot_cache_keys(&self, dead_slot: Slot, slot_cache: SlotCache) { + // Slot purged from cache should not exist in the backing store + assert!(self.storage.get_slot_stores(dead_slot).is_none()); + let dead_slots: HashSet = vec![dead_slot].into_iter().collect(); + let mut purged_slot_pubkeys: HashSet<(Slot, Pubkey)> = HashSet::new(); + let pubkey_to_slot_set: Vec<(Pubkey, HashSet)> = slot_cache + .iter() + .map(|account| { + purged_slot_pubkeys.insert((dead_slot, *account.key())); + (*account.key(), dead_slots.clone()) + }) + .collect(); + let num_purged_keys = pubkey_to_slot_set.len(); + let reclaims = self.purge_keys_exact(pubkey_to_slot_set); + assert_eq!(reclaims.len(), num_purged_keys); + self.finalize_dead_slot_removal(&dead_slots, purged_slot_pubkeys, None); + } + + fn purge_slots(&self, slots: &HashSet) { + //add_root should be called first + let non_roots: Vec<_> = slots + .iter() + .filter(|slot| !self.accounts_index.is_root(**slot)) + .collect(); + let mut all_removed_slot_storages = vec![]; + let mut total_removed_storage_entries = 0; + let mut total_removed_bytes = 0; + + let mut remove_storages_elapsed = Measure::start("remove_storages_elapsed"); + + for remove_slot in non_roots { + if let Some(slot_cache) = self.accounts_cache.remove_slot(*remove_slot) { + // If the slot is still in the cache, remove the backing storages for + // the slot and from the Accounts Index + self.purge_slot_cache_keys(*remove_slot, slot_cache); + } else if let Some((_, slot_removed_storages)) = self.storage.0.remove(&remove_slot) { + // Because AccountsBackgroundService synchronously flushes from the accounts cache + // and handles all Bank::drop() (the cleanup function that leads to this + // function call), then we don't need to worry above an overlapping cache flush + // with this function call. This means, if we get into this case, we can be + // confident that the entire state for this slot has been flushed to the storage + // already. + + // Note this only cleans up the storage entries. The accounts index cleaning + // (removing from the slot list, decrementing the account ref count), is handled in + // clean_accounts() -> purge_older_root_entries() + { + let r_slot_removed_storages = slot_removed_storages.read().unwrap(); + total_removed_storage_entries += r_slot_removed_storages.len(); + total_removed_bytes += r_slot_removed_storages + .values() + .map(|i| i.accounts.capacity()) + .sum::(); + } + all_removed_slot_storages.push(slot_removed_storages.clone()); + } + // It should not be possible that a slot is neither in the cache or storage. Even in + // a slot with all ticks, `Bank::new_from_parent()` immediately stores some sysvars + // on bank creation. + } + remove_storages_elapsed.stop(); + + let num_slots_removed = all_removed_slot_storages.len(); + + let recycle_stores_write_time = + self.recycle_slot_stores(total_removed_storage_entries, &all_removed_slot_storages); + + let mut drop_storage_entries_elapsed = Measure::start("drop_storage_entries_elapsed"); + // Backing mmaps for removed storages entries explicitly dropped here outside + // of any locks + drop(all_removed_slot_storages); + drop_storage_entries_elapsed.stop(); + datapoint_info!( "purge_slots_time", ( @@ -1865,26 +2485,45 @@ impl AccountsDB { ); } + // TODO: This is currently: + // 1. Unsafe with scan because it can remove a slot in the middle + // of a scan. + // 2. Doesn't handle cache flushes that happen during the slot deletion (see comment below). pub fn remove_unrooted_slot(&self, remove_slot: Slot) { if self.accounts_index.is_root(remove_slot) { panic!("Trying to remove accounts for rooted slot {}", remove_slot); } + if let Some(slot_cache) = self.accounts_cache.remove_slot(remove_slot) { + // If the slot is still in the cache, remove it from the cache + self.purge_slot_cache_keys(remove_slot, slot_cache); + } + + // TODO: Handle if the slot was flushed to storage while we were removing the cached + // slot above, i.e. it's possible the storage contains partial version of the current + // slot. One way to handle this is to augment slots to contain a "version", That way, + // 1) We clean older versions via the natural clean() pipeline + // without having to call this function out of band. + // 2) This deletion doesn't have to block on scan + // Reads will then always read the latest version of a slot. Scans will also know + // which version their parents because banks will also be augmented with this version, + // which handles cases where a deletion of one version happens in the middle of the scan. let pubkey_sets: Vec> = self.scan_account_storage( remove_slot, - |stored_account: &StoredAccount, _, accum: &mut HashSet| { - accum.insert(stored_account.meta.pubkey); + |loaded_account: LoadedAccount, _, accum: &mut HashSet| { + accum.insert(*loaded_account.pubkey()); }, ); // Purge this slot from the accounts index + let purge_slot: HashSet = vec![remove_slot].into_iter().collect(); let mut reclaims = vec![]; { let pubkeys = pubkey_sets.iter().flatten(); for pubkey in pubkeys { - self.accounts_index.clean_unrooted_entries_by_slot( - remove_slot, + self.accounts_index.purge_exact( pubkey, + &purge_slot, &mut reclaims, &self.account_indexes, ); @@ -1909,7 +2548,7 @@ impl AccountsDB { pub fn hash_stored_account( slot: Slot, - account: &StoredAccount, + account: &StoredAccountMeta, cluster_type: &ClusterType, ) -> Hash { let include_owner = Self::include_owner(cluster_type, slot); @@ -2080,58 +2719,39 @@ impl AccountsDB { .fetch_add(count as u64, Ordering::Relaxed) } - fn store_accounts_to< - F: FnMut(Slot, usize) -> Arc, - P: Iterator, - >( + fn write_accounts_to_storage Arc>( &self, slot: Slot, - accounts: &[(&Pubkey, &Account)], hashes: &[Hash], mut storage_finder: F, - mut write_version_producer: P, + accounts_and_meta_to_store: &[(StoredMeta, &Account)], ) -> Vec { - let default_account = Account::default(); - let with_meta: Vec<(StoredMeta, &Account)> = accounts - .iter() - .map(|(pubkey, account)| { - let account = if account.lamports == 0 { - &default_account - } else { - *account - }; - let data_len = account.data.len() as u64; - - let meta = StoredMeta { - write_version: write_version_producer.next().unwrap(), - pubkey: **pubkey, - data_len, - }; - (meta, account) - }) - .collect(); - let mut infos: Vec = Vec::with_capacity(with_meta.len()); + assert_eq!(hashes.len(), accounts_and_meta_to_store.len()); + let mut infos: Vec = Vec::with_capacity(accounts_and_meta_to_store.len()); let mut total_append_accounts_us = 0; let mut total_storage_find_us = 0; - while infos.len() < with_meta.len() { + while infos.len() < accounts_and_meta_to_store.len() { let mut storage_find = Measure::start("storage_finder"); let storage = storage_finder( slot, - with_meta[infos.len()].1.data.len() + STORE_META_OVERHEAD, + accounts_and_meta_to_store[infos.len()].1.data.len() + STORE_META_OVERHEAD, ); storage_find.stop(); total_storage_find_us += storage_find.as_us(); let mut append_accounts = Measure::start("append_accounts"); - let rvs = storage - .accounts - .append_accounts(&with_meta[infos.len()..], &hashes[infos.len()..]); + let rvs = storage.accounts.append_accounts( + &accounts_and_meta_to_store[infos.len()..], + &hashes[infos.len()..], + ); + assert!(!rvs.is_empty()); append_accounts.stop(); total_append_accounts_us += append_accounts.as_us(); - if rvs.is_empty() { + if rvs.len() == 1 { storage.set_status(AccountStorageStatus::Full); // See if an account overflows the append vecs in the slot. - let data_len = (with_meta[infos.len()].1.data.len() + STORE_META_OVERHEAD) as u64; + let data_len = (accounts_and_meta_to_store[infos.len()].1.data.len() + + STORE_META_OVERHEAD) as u64; if !self.has_space_available(slot, data_len) { let special_store_size = std::cmp::max(data_len * 2, self.file_size); if self @@ -2150,27 +2770,233 @@ impl AccountsDB { } continue; } - for (offset, (_, account)) in rvs.iter().zip(&with_meta[infos.len()..]) { - storage.add_account(); + + for (offsets, (_, account)) in rvs + .windows(2) + .zip(&accounts_and_meta_to_store[infos.len()..]) + { + let stored_size = offsets[1] - offsets[0]; + storage.add_account(stored_size); infos.push(AccountInfo { store_id: storage.append_vec_id(), - offset: *offset, + offset: offsets[0], + stored_size, lamports: account.lamports, }); } // restore the state to available storage.set_status(AccountStorageStatus::Available); } + self.stats .store_append_accounts .fetch_add(total_append_accounts_us, Ordering::Relaxed); self.stats .store_find_store .fetch_add(total_storage_find_us, Ordering::Relaxed); - infos } + pub fn mark_slot_frozen(&self, slot: Slot) { + if let Some(slot_cache) = self.accounts_cache.slot_cache(slot) { + slot_cache.mark_slot_frozen(); + slot_cache.report_slot_store_metrics(); + } + self.accounts_cache.report_size(); + } + + // Force flush the cached roots, flush any unrooted frozen slots as well if there are + // > MAX_CACHE_SLOTS of them. + pub fn force_flush_accounts_cache(&self) { + self.flush_accounts_cache(true); + } + + pub fn flush_accounts_cache_if_needed(&self) { + self.flush_accounts_cache(false); + } + + fn flush_accounts_cache(&self, force_flush: bool) { + if !force_flush && self.accounts_cache.num_slots() <= MAX_CACHE_SLOTS { + return; + } + + // Flush all roots + let mut flush_roots_elapsed = Measure::start("flush_roots_elapsed"); + let cached_roots = self.accounts_cache.clear_roots(); + for root in &cached_roots { + self.flush_slot_cache(*root); + self.accounts_cache.set_max_flush_root(*root); + } + + // Only add to the uncleaned roots set *after* we've flushed the previous roots, + // so that clean will actually be able to clean the slots. + self.accounts_index.add_uncleaned_roots(cached_roots); + flush_roots_elapsed.stop(); + + // Note we don't purge unrooted slots here because there may be ongoing scans/references + // for those slot, let the Bank::drop() implementation do cleanup instead on dead + // banks + + // If there are > MAX_CACHE_SLOTS, then flush the excess ones to storage + let old_slots = self.accounts_cache.find_older_frozen_slots(MAX_CACHE_SLOTS); + let total_excess_slot_count = old_slots.len(); + let mut unflushable_unrooted_slot_count = 0; + let max_flushed_root = self.accounts_cache.fetch_max_flush_root(); + for old_slot in old_slots { + // Don't flush slots that are known to be unrooted + if old_slot > max_flushed_root { + self.flush_slot_cache(old_slot); + } else { + unflushable_unrooted_slot_count += 1; + } + } + + datapoint_info!( + "accounts_db-cache-limit-slots", + ("total_excess_slot_count", total_excess_slot_count, i64), + ( + "unflushable_unrooted_slot_count", + unflushable_unrooted_slot_count, + i64 + ), + ); + + // Flush a random slot out after every force flush to catch any inconsistencies + // between cache and written state (i.e. should cause a hash mismatch between validators + // that flush and don't flush if such a bug exists). + let num_slots_remaining = self.accounts_cache.num_slots(); + if force_flush && num_slots_remaining >= FLUSH_CACHE_RANDOM_THRESHOLD { + // Don't flush slots that are known to be unrooted + let mut frozen_slots = self.accounts_cache.find_older_frozen_slots(0); + frozen_slots.retain(|s| *s > max_flushed_root); + // Remove a random index 0 <= i < `frozen_slots.len()` + let rand_slot = frozen_slots.choose(&mut thread_rng()); + if let Some(rand_slot) = rand_slot { + info!( + "Flushing random slot: {}, num_remaining: {}", + *rand_slot, num_slots_remaining + ); + self.flush_slot_cache(*rand_slot); + } + } + + inc_new_counter_info!("flush_roots_elapsed", flush_roots_elapsed.as_us() as usize); + } + + fn flush_slot_cache(&self, slot: Slot) { + info!("flush_slot_cache slot: {}", slot); + if let Some(slot_cache) = self.accounts_cache.slot_cache(slot) { + let iter_items: Vec<_> = slot_cache.iter().collect(); + let mut total_size = 0; + let (accounts, hashes): (Vec<(&Pubkey, &Account)>, Vec) = iter_items + .iter() + .map(|iter_item| { + let key = iter_item.key(); + let account = &iter_item.value().account; + let hash = iter_item.value().hash; + total_size += (account.data.len() + STORE_META_OVERHEAD) as u64; + ((key, account), hash) + }) + .unzip(); + let aligned_total_size = self.page_align(total_size); + + // This ensures that all updates are written to an AppendVec, before any + // updates to the index happen, so anybody that sees a real entry in the index, + // will be able to find the account in storage + let flushed_store = + self.create_and_insert_store(slot, aligned_total_size, "flush_slot_cache"); + self.store_accounts_custom( + slot, + &accounts, + &hashes, + Some(Box::new(move |_, _| flushed_store.clone())), + None, + false, + ); + // If the above sizing function is correct, just one AppendVec is enough to hold + // all the data for the slot + assert_eq!( + self.storage + .get_slot_stores(slot) + .unwrap() + .read() + .unwrap() + .len(), + 1 + ); + + // Remove this slot from the cache, which will to AccountsDb readers should look like an + // atomic switch from the cache to storage + assert!(self.accounts_cache.remove_slot(slot).is_some()); + } + } + + fn write_accounts_to_cache( + &self, + slot: Slot, + hashes: &[Hash], + accounts_and_meta_to_store: &[(StoredMeta, &Account)], + ) -> Vec { + assert_eq!(hashes.len(), accounts_and_meta_to_store.len()); + accounts_and_meta_to_store + .iter() + .zip(hashes) + .map(|((meta, account), hash)| { + self.accounts_cache + .store(slot, &meta.pubkey, (**account).clone(), *hash); + AccountInfo { + store_id: CACHE_VIRTUAL_STORAGE_ID, + offset: CACHE_VIRTUAL_OFFSET, + stored_size: CACHE_VIRTUAL_STORED_SIZE, + lamports: account.lamports, + } + }) + .collect() + } + + fn store_accounts_to< + F: FnMut(Slot, usize) -> Arc, + P: Iterator, + >( + &self, + slot: Slot, + accounts: &[(&Pubkey, &Account)], + hashes: &[Hash], + storage_finder: F, + mut write_version_producer: P, + is_cached_store: bool, + ) -> Vec { + let default_account = Account::default(); + let accounts_and_meta_to_store: Vec<(StoredMeta, &Account)> = accounts + .iter() + .map(|(pubkey, account)| { + let account = if account.lamports == 0 { + &default_account + } else { + *account + }; + let data_len = account.data.len() as u64; + let meta = StoredMeta { + write_version: write_version_producer.next().unwrap(), + pubkey: **pubkey, + data_len, + }; + (meta, account) + }) + .collect(); + + if self.caching_enabled && is_cached_store { + self.write_accounts_to_cache(slot, hashes, &accounts_and_meta_to_store) + } else { + self.write_accounts_to_storage( + slot, + hashes, + storage_finder, + &accounts_and_meta_to_store, + ) + } + } + fn report_store_stats(&self) { let mut total_count = 0; let mut min = std::usize::MAX; @@ -2384,34 +3210,38 @@ impl AccountsDB { { let (slot, account_info) = &lock.slot_list()[index]; if account_info.lamports != 0 { - self.storage - .get_account_storage_entry(*slot, account_info.store_id) - .and_then(|store| { - let account = - store.accounts.get_account(account_info.offset)?.0; - let balance = Self::account_balance_for_capitalization( - account_info.lamports, - &account.account_meta.owner, - account.account_meta.executable, - simple_capitalization_enabled, + self.get_account_accessor_from_cache_or_storage( + *slot, + pubkey, + account_info.store_id, + account_info.offset, + ) + .get_loaded_account() + .and_then(|loaded_account| { + let loaded_hash = loaded_account.loaded_hash(); + let balance = Self::account_balance_for_capitalization( + account_info.lamports, + loaded_account.owner(), + loaded_account.executable(), + simple_capitalization_enabled, + ); + + if check_hash { + let computed_hash = loaded_account.compute_hash( + *slot, + &self.cluster_type.expect( + "Cluster type must be set at initialization", + ), + pubkey, ); - - if check_hash { - let hash = Self::hash_stored_account( - *slot, - &account, - &self.cluster_type.expect( - "Cluster type must be set at initialization", - ), - ); - if hash != *account.hash { - mismatch_found.fetch_add(1, Ordering::Relaxed); - return None; - } + if computed_hash != *loaded_hash { + mismatch_found.fetch_add(1, Ordering::Relaxed); + return None; } + } - Some((*pubkey, *account.hash, balance)) - }) + Some((*pubkey, *loaded_hash, balance)) + }) } else { None } @@ -2507,12 +3337,15 @@ impl AccountsDB { let mut scan = Measure::start("scan"); let mut accumulator: Vec> = self.scan_account_storage( slot, - |stored_account: &StoredAccount, + |loaded_account: LoadedAccount, _store_id: AppendVecId, accum: &mut HashMap| { accum.insert( - stored_account.meta.pubkey, - (stored_account.meta.write_version, *stored_account.hash), + *loaded_account.pubkey(), + ( + loaded_account.write_version(), + *loaded_account.loaded_hash(), + ), ); }, ); @@ -2523,6 +3356,7 @@ impl AccountsDB { AccountsDB::merge(&mut account_maps, &maps); } merge.stop(); + let mut accumulate = Measure::start("accumulate"); let hashes: Vec<_> = account_maps .into_iter() @@ -2572,7 +3406,10 @@ impl AccountsDB { mut reclaimed_offsets: Option<&mut AppendVecOffsets>, ) -> HashSet { let mut dead_slots = HashSet::new(); + let mut new_shrink_candidates: ShrinkCandidates = HashMap::new(); for (slot, account_info) in reclaims { + // No cached accounts should make it here + assert_ne!(account_info.store_id, CACHE_VIRTUAL_STORAGE_ID); if let Some(ref mut reclaimed_offsets) = reclaimed_offsets { reclaimed_offsets .entry(account_info.store_id) @@ -2591,9 +3428,38 @@ impl AccountsDB { "AccountDB::accounts_index corrupted. Storage pointed to: {}, expected: {}, should only point to one slot", store.slot(), *slot ); - let count = store.remove_account(); + let count = store.remove_account(account_info.stored_size); if count == 0 { dead_slots.insert(*slot); + } else if self.caching_enabled + && (self.page_align(store.alive_bytes() as u64) as f64 + / store.total_bytes() as f64) + < SHRINK_RATIO + { + // Checking that this single storage entry is ready for shrinking, + // should be a sufficient indication that the slot is ready to be shrunk + // because slots should only have one storage entry, namely the one that was + // created by `flush_slot_cache()`. + { + new_shrink_candidates + .entry(*slot) + .or_default() + .insert(store.append_vec_id(), store); + } + } + } + } + + if self.caching_enabled { + { + let mut shrink_candidate_slots = self.shrink_candidate_slots.lock().unwrap(); + for (slot, slot_shrink_candidates) in new_shrink_candidates { + for (store_id, store) in slot_shrink_candidates { + shrink_candidate_slots + .entry(slot) + .or_default() + .insert(store_id, store); + } } } } @@ -2612,12 +3478,36 @@ impl AccountsDB { dead_slots } - fn clean_dead_slots( + fn finalize_dead_slot_removal( &self, dead_slots: &HashSet, + purged_slot_pubkeys: HashSet<(Slot, Pubkey)>, mut purged_account_slots: Option<&mut AccountSlots>, ) { - let mut measure = Measure::start("clean_dead_slots-ms"); + for (slot, pubkey) in purged_slot_pubkeys { + if let Some(ref mut purged_account_slots) = purged_account_slots { + purged_account_slots.entry(pubkey).or_default().insert(slot); + } + self.accounts_index.unref_from_storage(&pubkey); + } + + for slot in dead_slots.iter() { + self.accounts_index.clean_dead_slot(*slot); + } + { + let mut bank_hashes = self.bank_hashes.write().unwrap(); + for slot in dead_slots.iter() { + bank_hashes.remove(slot); + } + } + } + + fn clean_stored_dead_slots( + &self, + dead_slots: &HashSet, + purged_account_slots: Option<&mut AccountSlots>, + ) { + let mut measure = Measure::start("clean_stored_dead_slots-ms"); let mut stores: Vec> = vec![]; for slot in dead_slots.iter() { if let Some(slot_storage) = self.storage.get_slot_stores(*slot) { @@ -2626,8 +3516,7 @@ impl AccountsDB { } } } - datapoint_debug!("clean_dead_slots", ("stores", stores.len(), i64)); - let slot_pubkeys: HashSet<(Slot, Pubkey)> = { + let purged_slot_pubkeys: HashSet<(Slot, Pubkey)> = { self.thread_pool_clean.install(|| { stores .into_par_iter() @@ -2644,24 +3533,9 @@ impl AccountsDB { }) }) }; - for (slot, pubkey) in slot_pubkeys { - if let Some(ref mut purged_account_slots) = purged_account_slots { - purged_account_slots.entry(pubkey).or_default().insert(slot); - } - self.accounts_index.unref_from_storage(&pubkey); - } + self.finalize_dead_slot_removal(dead_slots, purged_slot_pubkeys, purged_account_slots); measure.stop(); - inc_new_counter_info!("clean_dead_slots-unref-ms", measure.as_ms() as usize); - - for slot in dead_slots.iter() { - self.accounts_index.clean_dead_slot(*slot); - } - { - let mut bank_hashes = self.bank_hashes.write().unwrap(); - for slot in dead_slots.iter() { - bank_hashes.remove(slot); - } - } + inc_new_counter_info!("clean_stored_dead_slots-ms", measure.as_ms() as usize); } fn hash_accounts( @@ -2743,8 +3617,16 @@ impl AccountsDB { } } + pub fn store_cached(&self, slot: Slot, accounts: &[(&Pubkey, &Account)]) { + self.store(slot, accounts, self.caching_enabled); + } + /// Store the account update. - pub fn store(&self, slot: Slot, accounts: &[(&Pubkey, &Account)]) { + pub fn store_uncached(&self, slot: Slot, accounts: &[(&Pubkey, &Account)]) { + self.store(slot, accounts, false); + } + + fn store(&self, slot: Slot, accounts: &[(&Pubkey, &Account)], is_cached_store: bool) { // If all transactions in a batch are errored, // it's possible to get a store with no accounts. if accounts.is_empty() { @@ -2763,7 +3645,7 @@ impl AccountsDB { self.stats .store_hash_accounts .fetch_add(hash_time.as_us(), Ordering::Relaxed); - self.store_accounts_default(slot, accounts, &hashes); + self.store_accounts_default(slot, accounts, &hashes, is_cached_store); self.report_store_timings(); } @@ -2852,11 +3734,12 @@ impl AccountsDB { } } - fn store_accounts_default<'a>( - &'a self, + fn store_accounts_default( + &self, slot: Slot, accounts: &[(&Pubkey, &Account)], hashes: &[Hash], + is_cached_store: bool, ) { self.store_accounts_custom( slot, @@ -2864,6 +3747,7 @@ impl AccountsDB { hashes, None::, None::>>, + is_cached_store, ); } @@ -2874,6 +3758,7 @@ impl AccountsDB { hashes: &[Hash], storage_finder: Option>, write_version_producer: Option>>, + is_cached_store: bool, ) -> StoreAccountsTiming { let storage_finder: StorageFinder<'a> = storage_finder .unwrap_or_else(|| Box::new(move |slot, size| self.find_storage_candidate(slot, size))); @@ -2898,13 +3783,33 @@ impl AccountsDB { hashes, storage_finder, write_version_producer, + is_cached_store, ); store_accounts_time.stop(); self.stats .store_accounts .fetch_add(store_accounts_time.as_us(), Ordering::Relaxed); let mut update_index_time = Measure::start("update_index"); - let reclaims = self.update_index(slot, infos, accounts); + + // If the cache was flushed, then because `update_index` occurs + // after the account are stored by the above `store_accounts_to` + // call and all the accounts are stored, all reads after this point + // will know to not check the cache anymore + let mut reclaims = self.update_index(slot, infos, accounts); + + // For each updated account, `reclaims` should only have at most one + // item (if the account was previously updated in this slot). + // filter out the cached reclaims as those don't actually map + // to anything that needs to be cleaned in the backing storage + // entries + if self.caching_enabled { + reclaims.retain(|(_, r)| r.store_id != CACHE_VIRTUAL_STORAGE_ID); + + if is_cached_store { + assert!(reclaims.is_empty()); + } + } + update_index_time.stop(); self.stats .store_update_index @@ -2933,7 +3838,10 @@ impl AccountsDB { } pub fn add_root(&self, slot: Slot) { - self.accounts_index.add_root(slot) + self.accounts_index.add_root(slot, self.caching_enabled); + if self.caching_enabled { + self.accounts_cache.add_root(slot); + } } pub fn get_snapshot_storages(&self, snapshot_slot: Slot) -> SnapshotStorages { @@ -2974,7 +3882,7 @@ impl AccountsDB { pub fn generate_index(&self) { type AccountsMap<'a> = - DashMap)>>>; + DashMap)>>>; let mut slots = self.storage.all_slots(); #[allow(clippy::stable_sort_primitive)] slots.sort(); @@ -3032,6 +3940,7 @@ impl AccountsDB { let account_info = AccountInfo { store_id, offset: stored_account.offset, + stored_size: stored_account.stored_size, lamports: stored_account.account_meta.lamports, }; self.accounts_index.upsert( @@ -3050,18 +3959,24 @@ impl AccountsDB { // Need to add these last, otherwise older updates will be cleaned for slot in slots { - self.accounts_index.add_root(slot); + self.accounts_index.add_root(slot, false); } - let mut counts = HashMap::new(); + let mut stored_sizes_and_counts = HashMap::new(); for account_entry in self.accounts_index.account_maps.read().unwrap().values() { for (_slot, account_entry) in account_entry.slot_list.read().unwrap().iter() { - *counts.entry(account_entry.store_id).or_insert(0) += 1; + let storage_entry_meta = stored_sizes_and_counts + .entry(account_entry.store_id) + .or_insert((0, 0)); + storage_entry_meta.0 += account_entry.stored_size; + storage_entry_meta.1 += 1; } } for slot_stores in self.storage.0.iter() { for (id, store) in slot_stores.value().read().unwrap().iter() { - if let Some(count) = counts.get(&id) { + // Should be default at this point + assert_eq!(store.alive_bytes(), 0); + if let Some((stored_size, count)) = stored_sizes_and_counts.get(&id) { trace!( "id: {} setting count: {} cur: {}", id, @@ -3069,6 +3984,7 @@ impl AccountsDB { store.count_and_status.read().unwrap().0 ); store.count_and_status.write().unwrap().0 = *count; + store.alive_bytes.store(*stored_size, Ordering::SeqCst); } else { trace!("id: {} clearing count", id); store.count_and_status.write().unwrap().0 = 0; @@ -3157,7 +4073,12 @@ pub mod tests { use assert_matches::assert_matches; use rand::{thread_rng, Rng}; use solana_sdk::{account::Account, hash::HASH_BYTES, pubkey::PUBKEY_BYTES}; - use std::{iter::FromIterator, str::FromStr}; + use std::{ + iter::FromIterator, + str::FromStr, + thread::{sleep, Builder}, + time::Duration, + }; fn linear_ancestors(end_slot: u64) -> Ancestors { let mut ancestors: Ancestors = vec![(0, 0)].into_iter().collect(); @@ -3174,7 +4095,7 @@ pub mod tests { let key = Pubkey::default(); let account0 = Account::new(1, 0, &key); - db.store(0, &[(&key, &account0)]); + db.store_uncached(0, &[(&key, &account0)]); db.add_root(0); let ancestors = vec![(1, 1)].into_iter().collect(); assert_eq!(db.load_slow(&ancestors, &key), Some((account0, 0))); @@ -3187,10 +4108,10 @@ pub mod tests { let key = Pubkey::default(); let account0 = Account::new(1, 0, &key); - db.store(0, &[(&key, &account0)]); + db.store_uncached(0, &[(&key, &account0)]); let account1 = Account::new(0, 0, &key); - db.store(1, &[(&key, &account1)]); + db.store_uncached(1, &[(&key, &account1)]); let ancestors = vec![(1, 1)].into_iter().collect(); assert_eq!(&db.load_slow(&ancestors, &key).unwrap().0, &account1); @@ -3214,10 +4135,10 @@ pub mod tests { let key = Pubkey::default(); let account0 = Account::new(1, 0, &key); - db.store(0, &[(&key, &account0)]); + db.store_uncached(0, &[(&key, &account0)]); let account1 = Account::new(0, 0, &key); - db.store(1, &[(&key, &account1)]); + db.store_uncached(1, &[(&key, &account1)]); db.add_root(0); let ancestors = vec![(1, 1)].into_iter().collect(); @@ -3236,7 +4157,7 @@ pub mod tests { let account0 = Account::new(1, 0, &key); // store value 1 in the "root", i.e. db zero - db.store(0, &[(&key, &account0)]); + db.store_uncached(0, &[(&key, &account0)]); // now we have: // @@ -3249,7 +4170,7 @@ pub mod tests { // store value 0 in one child let account1 = Account::new(0, 0, &key); - db.store(1, &[(&key, &account1)]); + db.store_uncached(1, &[(&key, &account1)]); // masking accounts is done at the Accounts level, at accountsDB we see // original account (but could also accept "None", which is implemented @@ -3315,8 +4236,8 @@ pub mod tests { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, DEFAULT_FILE_SIZE as usize / 3, &pubkey); - db.store(1, &[(&pubkey, &account)]); - db.store(1, &[(&pubkeys[0], &account)]); + db.store_uncached(1, &[(&pubkey, &account)]); + db.store_uncached(1, &[(&pubkeys[0], &account)]); { let slot_0_stores = &db.storage.get_slot_stores(0).unwrap(); let slot_1_stores = &db.storage.get_slot_stores(1).unwrap(); @@ -3347,7 +4268,7 @@ pub mod tests { // overwrite old rooted account version; only the r_slot_0_stores.count() should be // decremented - db.store(2, &[(&pubkeys[0], &account)]); + db.store_uncached(2, &[(&pubkeys[0], &account)]); db.clean_accounts(None); { let slot_0_stores = &db.storage.get_slot_stores(0).unwrap(); @@ -3370,11 +4291,11 @@ pub mod tests { // 1 token in the "root", i.e. db zero let db0 = AccountsDB::new(Vec::new(), &ClusterType::Development); let account0 = Account::new(1, 0, &key); - db0.store(0, &[(&key, &account0)]); + db0.store_uncached(0, &[(&key, &account0)]); // 0 lamports in the child let account1 = Account::new(0, 0, &key); - db0.store(1, &[(&key, &account1)]); + db0.store_uncached(1, &[(&key, &account1)]); // masking accounts is done at the Accounts level, at accountsDB we see // original account @@ -3387,11 +4308,12 @@ pub mod tests { #[test] fn test_remove_unrooted_slot() { let unrooted_slot = 9; - let db = AccountsDB::new(Vec::new(), &ClusterType::Development); + let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); + db.caching_enabled = true; let key = Pubkey::default(); let account0 = Account::new(1, 0, &key); let ancestors: HashMap<_, _> = vec![(unrooted_slot, 1)].into_iter().collect(); - db.store(unrooted_slot, &[(&key, &account0)]); + db.store_cached(unrooted_slot, &[(&key, &account0)]); db.bank_hashes .write() .unwrap() @@ -3419,7 +4341,7 @@ pub mod tests { // Test we can store for the same slot again and get the right information let account0 = Account::new(2, 0, &key); - db.store(unrooted_slot, &[(&key, &account0)]); + db.store_uncached(unrooted_slot, &[(&key, &account0)]); assert_load_account(&db, unrooted_slot, key, 2); } @@ -3430,7 +4352,7 @@ pub mod tests { let db = AccountsDB::new(Vec::new(), &ClusterType::Development); let key = solana_sdk::pubkey::new_rand(); let account0 = Account::new(1, 0, &key); - db.store(unrooted_slot, &[(&key, &account0)]); + db.store_uncached(unrooted_slot, &[(&key, &account0)]); // Purge the slot db.remove_unrooted_slot(unrooted_slot); @@ -3438,7 +4360,7 @@ pub mod tests { // Add a new root let key2 = solana_sdk::pubkey::new_rand(); let new_root = unrooted_slot + 1; - db.store(new_root, &[(&key2, &account0)]); + db.store_uncached(new_root, &[(&key2, &account0)]); db.add_root(new_root); // Simulate reconstruction from snapshot @@ -3466,7 +4388,7 @@ pub mod tests { let account = Account::new((t + 1) as u64, space, &Account::default().owner); pubkeys.push(pubkey); assert!(accounts.load_slow(&ancestors, &pubkey).is_none()); - accounts.store(slot, &[(&pubkey, &account)]); + accounts.store_uncached(slot, &[(&pubkey, &account)]); } for t in 0..num_vote { let pubkey = solana_sdk::pubkey::new_rand(); @@ -3474,7 +4396,7 @@ pub mod tests { pubkeys.push(pubkey); let ancestors = vec![(slot, 0)].into_iter().collect(); assert!(accounts.load_slow(&ancestors, &pubkey).is_none()); - accounts.store(slot, &[(&pubkey, &account)]); + accounts.store_uncached(slot, &[(&pubkey, &account)]); } } @@ -3484,7 +4406,7 @@ pub mod tests { let ancestors = vec![(slot, 0)].into_iter().collect(); if let Some((mut account, _)) = accounts.load_slow(&ancestors, &pubkeys[idx]) { account.lamports += 1; - accounts.store(slot, &[(&pubkeys[idx], &account)]); + accounts.store_uncached(slot, &[(&pubkeys[idx], &account)]); if account.lamports == 0 { let ancestors = vec![(slot, 0)].into_iter().collect(); assert!(accounts.load_slow(&ancestors, &pubkeys[idx]).is_none()); @@ -3561,7 +4483,7 @@ pub mod tests { ) { for idx in 0..num { let account = Account::new((idx + count) as u64, 0, &Account::default().owner); - accounts.store(slot, &[(&pubkeys[idx], &account)]); + accounts.store_uncached(slot, &[(&pubkeys[idx], &account)]); } } @@ -3607,7 +4529,7 @@ pub mod tests { for i in 0..9 { let key = solana_sdk::pubkey::new_rand(); let account = Account::new(i + 1, size as usize / 4, &key); - accounts.store(0, &[(&key, &account)]); + accounts.store_uncached(0, &[(&key, &account)]); keys.push(key); } let ancestors = vec![(0, 0)].into_iter().collect(); @@ -3638,7 +4560,7 @@ pub mod tests { let status = [AccountStorageStatus::Available, AccountStorageStatus::Full]; let pubkey1 = solana_sdk::pubkey::new_rand(); let account1 = Account::new(1, DEFAULT_FILE_SIZE as usize / 2, &pubkey1); - accounts.store(0, &[(&pubkey1, &account1)]); + accounts.store_uncached(0, &[(&pubkey1, &account1)]); { let stores = &accounts.storage.get_slot_stores(0).unwrap(); let r_stores = stores.read().unwrap(); @@ -3649,7 +4571,7 @@ pub mod tests { let pubkey2 = solana_sdk::pubkey::new_rand(); let account2 = Account::new(1, DEFAULT_FILE_SIZE as usize / 2, &pubkey2); - accounts.store(0, &[(&pubkey2, &account2)]); + accounts.store_uncached(0, &[(&pubkey2, &account2)]); { assert_eq!(accounts.storage.0.len(), 1); let stores = &accounts.storage.get_slot_stores(0).unwrap(); @@ -3672,7 +4594,7 @@ pub mod tests { // lots of stores, but 3 storages should be enough for everything for _ in 0..25 { - accounts.store(0, &[(&pubkey1, &account1)]); + accounts.store_uncached(0, &[(&pubkey1, &account1)]); { assert_eq!(accounts.storage.0.len(), 1); let stores = &accounts.storage.get_slot_stores(0).unwrap(); @@ -3692,28 +4614,6 @@ pub mod tests { } } - #[test] - fn test_purge_slot_not_root() { - let accounts = AccountsDB::new(Vec::new(), &ClusterType::Development); - let mut pubkeys: Vec = vec![]; - create_account(&accounts, &mut pubkeys, 0, 1, 0, 0); - let ancestors = vec![(0, 0)].into_iter().collect(); - assert!(accounts.load_slow(&ancestors, &pubkeys[0]).is_some()); - accounts.purge_slot(0); - assert!(accounts.load_slow(&ancestors, &pubkeys[0]).is_none()); - } - - #[test] - fn test_purge_slot_after_root() { - let accounts = AccountsDB::new(Vec::new(), &ClusterType::Development); - let mut pubkeys: Vec = vec![]; - create_account(&accounts, &mut pubkeys, 0, 1, 0, 0); - let ancestors = vec![(0, 0)].into_iter().collect(); - accounts.add_root(0); - accounts.purge_slot(0); - assert!(accounts.load_slow(&ancestors, &pubkeys[0]).is_some()); - } - #[test] fn test_lazy_gc_slot() { solana_logger::setup(); @@ -3724,7 +4624,7 @@ pub mod tests { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, 0, &Account::default().owner); //store an account - accounts.store(0, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); let ancestors = vec![(0, 0)].into_iter().collect(); let id = { let (lock, idx) = accounts @@ -3746,7 +4646,7 @@ pub mod tests { .is_some()); //store causes clean - accounts.store(1, &[(&pubkey, &account)]); + accounts.store_uncached(1, &[(&pubkey, &account)]); //slot is gone accounts.print_accounts_stats("pre-clean"); @@ -3759,20 +4659,6 @@ pub mod tests { } impl AccountsDB { - fn alive_account_count_in_store(&self, slot: Slot) -> usize { - let slot_storage = self.storage.get_slot_stores(slot); - if let Some(slot_storage) = slot_storage { - slot_storage - .read() - .unwrap() - .values() - .map(|store| store.count()) - .sum() - } else { - 0 - } - } - fn all_account_count_in_append_vec(&self, slot: Slot) -> usize { let slot_storage = self.storage.get_slot_stores(slot); if let Some(slot_storage) = slot_storage { @@ -3808,8 +4694,8 @@ pub mod tests { let zero_lamport_account = Account::new(0, 0, &Account::default().owner); // Store two accounts - accounts.store(0, &[(&pubkey1, &account)]); - accounts.store(0, &[(&pubkey2, &account)]); + accounts.store_uncached(0, &[(&pubkey1, &account)]); + accounts.store_uncached(0, &[(&pubkey2, &account)]); // Make sure both accounts are in the same AppendVec in slot 0, which // will prevent pubkey1 from being cleaned up later even when it's a @@ -3830,10 +4716,10 @@ pub mod tests { assert_eq!(account_info1.store_id, account_info2.store_id); // Update account 1 in slot 1 - accounts.store(1, &[(&pubkey1, &account)]); + accounts.store_uncached(1, &[(&pubkey1, &account)]); // Update account 1 as zero lamports account - accounts.store(2, &[(&pubkey1, &zero_lamport_account)]); + accounts.store_uncached(2, &[(&pubkey1, &zero_lamport_account)]); // Pubkey 1 was the only account in slot 1, and it was updated in slot 2, so // slot 1 should be purged @@ -3850,7 +4736,7 @@ pub mod tests { // Slot 1 should be cleaned because all it's accounts are // zero lamports, and are not present in any other slot's // storage entries - assert_eq!(accounts.alive_account_count_in_store(1), 0); + assert_eq!(accounts.alive_account_count_in_slot(1), 0); } #[test] @@ -3863,8 +4749,8 @@ pub mod tests { let zero_lamport_account = Account::new(0, 0, &Account::default().owner); // Store a zero-lamport account - accounts.store(0, &[(&pubkey, &account)]); - accounts.store(1, &[(&pubkey, &zero_lamport_account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); + accounts.store_uncached(1, &[(&pubkey, &zero_lamport_account)]); // Simulate rooting the zero-lamport account, should be a // candidate for cleaning @@ -3880,12 +4766,12 @@ pub mod tests { // Slot 0 should be cleaned because all it's accounts have been // updated in the rooted slot 1 - assert_eq!(accounts.alive_account_count_in_store(0), 0); + assert_eq!(accounts.alive_account_count_in_slot(0), 0); // Slot 1 should be cleaned because all it's accounts are // zero lamports, and are not present in any other slot's // storage entries - assert_eq!(accounts.alive_account_count_in_store(1), 0); + assert_eq!(accounts.alive_account_count_in_slot(1), 0); // zero lamport account, should no longer exist in accounts index // because it has been removed @@ -3900,22 +4786,22 @@ pub mod tests { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, 0, &Account::default().owner); //store an account - accounts.store(0, &[(&pubkey, &account)]); - accounts.store(1, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); + accounts.store_uncached(1, &[(&pubkey, &account)]); // simulate slots are rooted after while accounts.add_root(0); accounts.add_root(1); //even if rooted, old state isn't cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 1); - assert_eq!(accounts.alive_account_count_in_store(1), 1); + assert_eq!(accounts.alive_account_count_in_slot(0), 1); + assert_eq!(accounts.alive_account_count_in_slot(1), 1); accounts.clean_accounts(None); //now old state is cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 0); - assert_eq!(accounts.alive_account_count_in_store(1), 1); + assert_eq!(accounts.alive_account_count_in_slot(0), 0); + assert_eq!(accounts.alive_account_count_in_slot(1), 1); } #[test] @@ -3928,34 +4814,35 @@ pub mod tests { let normal_account = Account::new(1, 0, &Account::default().owner); let zero_account = Account::new(0, 0, &Account::default().owner); //store an account - accounts.store(0, &[(&pubkey1, &normal_account)]); - accounts.store(1, &[(&pubkey1, &zero_account)]); - accounts.store(0, &[(&pubkey2, &normal_account)]); - accounts.store(1, &[(&pubkey2, &normal_account)]); + accounts.store_uncached(0, &[(&pubkey1, &normal_account)]); + accounts.store_uncached(1, &[(&pubkey1, &zero_account)]); + accounts.store_uncached(0, &[(&pubkey2, &normal_account)]); + accounts.store_uncached(1, &[(&pubkey2, &normal_account)]); //simulate slots are rooted after while accounts.add_root(0); accounts.add_root(1); //even if rooted, old state isn't cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 2); - assert_eq!(accounts.alive_account_count_in_store(1), 2); + assert_eq!(accounts.alive_account_count_in_slot(0), 2); + assert_eq!(accounts.alive_account_count_in_slot(1), 2); accounts.clean_accounts(None); //Old state behind zero-lamport account is cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 0); - assert_eq!(accounts.alive_account_count_in_store(1), 2); + assert_eq!(accounts.alive_account_count_in_slot(0), 0); + assert_eq!(accounts.alive_account_count_in_slot(1), 2); } #[test] fn test_clean_old_with_both_normal_and_zero_lamport_accounts() { solana_logger::setup(); - let accounts = AccountsDB::new_with_indexes( + let accounts = AccountsDB::new_with_config( Vec::new(), &ClusterType::Development, spl_token_mint_index_enabled(), + false, ); let pubkey1 = solana_sdk::pubkey::new_rand(); let pubkey2 = solana_sdk::pubkey::new_rand(); @@ -3974,11 +4861,11 @@ pub mod tests { zero_account.data = account_data_with_mint; //store an account - accounts.store(0, &[(&pubkey1, &normal_account)]); - accounts.store(0, &[(&pubkey1, &normal_account)]); - accounts.store(1, &[(&pubkey1, &zero_account)]); - accounts.store(0, &[(&pubkey2, &normal_account)]); - accounts.store(2, &[(&pubkey2, &normal_account)]); + accounts.store_uncached(0, &[(&pubkey1, &normal_account)]); + accounts.store_uncached(0, &[(&pubkey1, &normal_account)]); + accounts.store_uncached(1, &[(&pubkey1, &zero_account)]); + accounts.store_uncached(0, &[(&pubkey2, &normal_account)]); + accounts.store_uncached(2, &[(&pubkey2, &normal_account)]); //simulate slots are rooted after while accounts.add_root(0); @@ -3986,9 +4873,9 @@ pub mod tests { accounts.add_root(2); //even if rooted, old state isn't cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 2); - assert_eq!(accounts.alive_account_count_in_store(1), 1); - assert_eq!(accounts.alive_account_count_in_store(2), 1); + assert_eq!(accounts.alive_account_count_in_slot(0), 2); + assert_eq!(accounts.alive_account_count_in_slot(1), 1); + assert_eq!(accounts.alive_account_count_in_slot(2), 1); // Secondary index should still find both pubkeys let mut found_accounts = HashSet::new(); @@ -4006,12 +4893,12 @@ pub mod tests { accounts.clean_accounts(None); //both zero lamport and normal accounts are cleaned up - assert_eq!(accounts.alive_account_count_in_store(0), 0); + assert_eq!(accounts.alive_account_count_in_slot(0), 0); // The only store to slot 1 was a zero lamport account, should // be purged by zero-lamport cleaning logic because slot 1 is // rooted - assert_eq!(accounts.alive_account_count_in_store(1), 0); - assert_eq!(accounts.alive_account_count_in_store(2), 1); + assert_eq!(accounts.alive_account_count_in_slot(1), 0); + assert_eq!(accounts.alive_account_count_in_slot(2), 1); // `pubkey1`, a zero lamport account, should no longer exist in accounts index // because it has been removed by the clean @@ -4038,8 +4925,8 @@ pub mod tests { // store an account, make it a zero lamport account // in slot 1 - accounts.store(0, &[(&pubkey, &account)]); - accounts.store(1, &[(&pubkey, &zero_account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); + accounts.store_uncached(1, &[(&pubkey, &zero_account)]); // simulate slots are rooted after while accounts.add_root(0); @@ -4047,17 +4934,17 @@ pub mod tests { // Only clean up to account 0, should not purge slot 0 based on // updates in later slots in slot 1 - assert_eq!(accounts.alive_account_count_in_store(0), 1); - assert_eq!(accounts.alive_account_count_in_store(1), 1); + assert_eq!(accounts.alive_account_count_in_slot(0), 1); + assert_eq!(accounts.alive_account_count_in_slot(1), 1); accounts.clean_accounts(Some(0)); - assert_eq!(accounts.alive_account_count_in_store(0), 1); - assert_eq!(accounts.alive_account_count_in_store(1), 1); + assert_eq!(accounts.alive_account_count_in_slot(0), 1); + assert_eq!(accounts.alive_account_count_in_slot(1), 1); assert!(accounts.accounts_index.get(&pubkey, None, None).is_some()); // Now the account can be cleaned up accounts.clean_accounts(Some(1)); - assert_eq!(accounts.alive_account_count_in_store(0), 0); - assert_eq!(accounts.alive_account_count_in_store(1), 0); + assert_eq!(accounts.alive_account_count_in_slot(0), 0); + assert_eq!(accounts.alive_account_count_in_slot(1), 0); // The zero lamport account, should no longer exist in accounts index // because it has been removed @@ -4072,7 +4959,7 @@ pub mod tests { let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, 0, &Account::default().owner); //store an account - accounts.store(0, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); assert_eq!(accounts.accounts_index.uncleaned_roots_len(), 0); // simulate slots are rooted after while @@ -4128,7 +5015,7 @@ pub mod tests { // Overwrite account 30 from slot 0 with lamports=0 into slot 1. // Slot 1 should now have 10 + 1 = 11 accounts let account = Account::new(0, 0, &Account::default().owner); - accounts.store(latest_slot, &[(&pubkeys[30], &account)]); + accounts.store_uncached(latest_slot, &[(&pubkeys[30], &account)]); // Create 10 new accounts in slot 1, should now have 11 + 10 = 21 // accounts @@ -4147,7 +5034,7 @@ pub mod tests { // Overwrite account 31 from slot 0 with lamports=0 into slot 2. // Slot 2 should now have 20 + 1 = 21 accounts let account = Account::new(0, 0, &Account::default().owner); - accounts.store(latest_slot, &[(&pubkeys[31], &account)]); + accounts.store_uncached(latest_slot, &[(&pubkeys[31], &account)]); // Create 10 new accounts in slot 2. Slot 2 should now have // 21 + 10 = 31 accounts @@ -4256,10 +5143,10 @@ pub mod tests { // Step A let mut current_slot = 1; - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); // Store another live account to slot 1 which will prevent any purge // since the store count will not be zero - accounts.store(current_slot, &[(&pubkey2, &account2)]); + accounts.store_uncached(current_slot, &[(&pubkey2, &account2)]); accounts.add_root(current_slot); let (slot1, account_info1) = accounts .accounts_index @@ -4278,7 +5165,7 @@ pub mod tests { // Step B current_slot += 1; let zero_lamport_slot = current_slot; - accounts.store(current_slot, &[(&pubkey, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &zero_lamport_account)]); accounts.add_root(current_slot); assert_load_account(&accounts, current_slot, pubkey, zero_lamport); @@ -4332,12 +5219,12 @@ pub mod tests { let mut current_slot = 1; accounts.set_hash(current_slot, current_slot - 1); - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); accounts.add_root(current_slot); current_slot += 1; accounts.set_hash(current_slot, current_slot - 1); - accounts.store(current_slot, &[(&pubkey, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &zero_lamport_account)]); accounts.add_root(current_slot); assert_load_account(&accounts, current_slot, pubkey, zero_lamport); @@ -4395,12 +5282,12 @@ pub mod tests { let accounts = AccountsDB::new_single(); let mut current_slot = 1; - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&pubkey, &zero_lamport_account)]); - accounts.store(current_slot, &[(&pubkey2, &account2)]); + accounts.store_uncached(current_slot, &[(&pubkey, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&pubkey2, &account2)]); // Store enough accounts such that an additional store for slot 2 is created. while accounts @@ -4412,7 +5299,7 @@ pub mod tests { .len() < 2 { - accounts.store(current_slot, &[(&filler_account_pubkey, &filler_account)]); + accounts.store_uncached(current_slot, &[(&filler_account_pubkey, &filler_account)]); } accounts.add_root(current_slot); @@ -4455,21 +5342,21 @@ pub mod tests { let accounts = AccountsDB::new_single(); let mut current_slot = 1; - accounts.store(current_slot, &[(&pubkey, &account)]); - accounts.store(current_slot, &[(&purged_pubkey1, &account2)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &account2)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey1, &zero_lamport_account)]); - accounts.store(current_slot, &[(&purged_pubkey2, &account3)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey2, &account3)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey2, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey2, &zero_lamport_account)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&dummy_pubkey, &dummy_account)]); + accounts.store_uncached(current_slot, &[(&dummy_pubkey, &dummy_account)]); accounts.add_root(current_slot); accounts.print_accounts_stats("pre_f"); @@ -4533,7 +5420,7 @@ pub mod tests { loop { let account_bal = thread_rng().gen_range(1, 99); account.lamports = account_bal; - db.store(slot, &[(&pubkey, &account)]); + db.store_uncached(slot, &[(&pubkey, &account)]); let (account, slot) = db.load_slow(&HashMap::new(), &pubkey).unwrap_or_else(|| { @@ -4561,11 +5448,11 @@ pub mod tests { let key0 = solana_sdk::pubkey::new_rand(); let account0 = Account::new(1, 0, &key); - db.store(0, &[(&key0, &account0)]); + db.store_uncached(0, &[(&key0, &account0)]); let key1 = solana_sdk::pubkey::new_rand(); let account1 = Account::new(2, 0, &key); - db.store(1, &[(&key1, &account1)]); + db.store_uncached(1, &[(&key1, &account1)]); let ancestors = vec![(0, 0)].into_iter().collect(); let accounts: Vec = @@ -4595,23 +5482,20 @@ pub mod tests { let key0 = solana_sdk::pubkey::new_rand(); let account0 = Account::new(1, 0, &key); - db.store(0, &[(&key0, &account0)]); + db.store_uncached(0, &[(&key0, &account0)]); let key1 = solana_sdk::pubkey::new_rand(); let account1 = Account::new(2, 0, &key); - db.store(1, &[(&key1, &account1)]); + db.store_uncached(1, &[(&key1, &account1)]); db.print_accounts_stats("pre"); let slots: HashSet = vec![1].into_iter().collect(); let purge_keys = vec![(key1, slots)]; - let (_reclaims, dead_keys) = db.purge_keys_exact(purge_keys); + db.purge_keys_exact(purge_keys); let account2 = Account::new(3, 0, &key); - db.store(2, &[(&key1, &account2)]); - - db.accounts_index - .handle_dead_keys(&dead_keys, &HashSet::new()); + db.store_uncached(2, &[(&key1, &account2)]); db.print_accounts_stats("post"); let ancestors = vec![(2, 0)].into_iter().collect(); @@ -4627,7 +5511,7 @@ pub mod tests { let data_len = DEFAULT_FILE_SIZE as usize + 7; let account = Account::new(1, data_len, &key); - db.store(0, &[(&key, &account)]); + db.store_uncached(0, &[(&key, &account)]); let ancestors = vec![(0, 0)].into_iter().collect(); let ret = db.load_slow(&ancestors, &key).unwrap(); @@ -4690,29 +5574,29 @@ pub mod tests { let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); let mut account = Account::new(1, 42, &frozen_pubkey); - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); let ancestors = vec![(0, 0)].into_iter().collect(); db.freeze_accounts(&ancestors, &[frozen_pubkey]); // Store with no account changes is ok - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); // Store with an increase in lamports is ok account.lamports = 2; - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); // Store with an decrease that does not go below the frozen amount of lamports is tolerated account.lamports = 1; - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); // A store of any value over the frozen value of '1' across different slots is also ok account.lamports = 3; - db.store(1, &[(&frozen_pubkey, &account)]); + db.store_uncached(1, &[(&frozen_pubkey, &account)]); account.lamports = 2; - db.store(2, &[(&frozen_pubkey, &account)]); + db.store_uncached(2, &[(&frozen_pubkey, &account)]); account.lamports = 1; - db.store(3, &[(&frozen_pubkey, &account)]); + db.store_uncached(3, &[(&frozen_pubkey, &account)]); } #[test] @@ -4725,14 +5609,14 @@ pub mod tests { let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); let mut account = Account::new(1, 42, &frozen_pubkey); - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); let ancestors = vec![(0, 0)].into_iter().collect(); db.freeze_accounts(&ancestors, &[frozen_pubkey]); // Store with a decrease below the frozen amount of lamports is not ok account.lamports -= 1; - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); } #[test] @@ -4758,13 +5642,13 @@ pub mod tests { let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); let mut account = Account::new(1, 42, &frozen_pubkey); - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); let ancestors = vec![(0, 0)].into_iter().collect(); db.freeze_accounts(&ancestors, &[frozen_pubkey]); account.data[0] = 42; - db.store(0, &[(&frozen_pubkey, &account)]); + db.store_uncached(0, &[(&frozen_pubkey, &account)]); } #[test] @@ -4780,7 +5664,7 @@ pub mod tests { StoredMeta, AccountMeta, [u8; ACCOUNT_DATA_LEN], - usize, // for StoredAccount::offset + usize, // for StoredAccountMeta::offset Hash, ); const INPUT_LEN: usize = std::mem::size_of::(); @@ -4796,11 +5680,12 @@ pub mod tests { let (slot, meta, account_meta, data, offset, hash): InputTuple = unsafe { std::mem::transmute::(blob) }; - let stored_account = StoredAccount { + let stored_account = StoredAccountMeta { meta: &meta, account_meta: &account_meta, data: &data, offset, + stored_size: CACHE_VIRTUAL_STORED_SIZE, hash: &hash, }; let account = stored_account.clone_account(); @@ -4810,7 +5695,7 @@ pub mod tests { assert_eq!( AccountsDB::hash_stored_account(slot, &stored_account, &ClusterType::Development), expected_account_hash, - "StoredAccount's data layout might be changed; update hashing if needed." + "StoredAccountMeta's data layout might be changed; update hashing if needed." ); assert_eq!( AccountsDB::hash_account( @@ -4820,7 +5705,7 @@ pub mod tests { &ClusterType::Development ), expected_account_hash, - "Account-based hashing must be consistent with StoredAccount-based one." + "Account-based hashing must be consistent with StoredAccountMeta-based one." ); } @@ -4835,11 +5720,11 @@ pub mod tests { let account = Account::new(1, some_data_len, &key); let ancestors = vec![(some_slot, 0)].into_iter().collect(); - db.store(some_slot, &[(&key, &account)]); + db.store_uncached(some_slot, &[(&key, &account)]); let mut account = db.load_slow(&ancestors, &key).unwrap().0; account.lamports -= 1; account.executable = true; - db.store(some_slot, &[(&key, &account)]); + db.store_uncached(some_slot, &[(&key, &account)]); db.add_root(some_slot); let bank_hashes = db.bank_hashes.read().unwrap(); @@ -4863,7 +5748,7 @@ pub mod tests { let account = Account::new(1, some_data_len, &key); let ancestors = vec![(some_slot, 0)].into_iter().collect(); - db.store(some_slot, &[(&key, &account)]); + db.store_uncached(some_slot, &[(&key, &account)]); db.add_root(some_slot); db.update_accounts_hash(some_slot, &ancestors, true); assert_matches!( @@ -4905,7 +5790,7 @@ pub mod tests { let account = Account::new(1, some_data_len, &key); let ancestors = vec![(some_slot, 0)].into_iter().collect(); - db.store(some_slot, &[(&key, &account)]); + db.store_uncached(some_slot, &[(&key, &account)]); db.add_root(some_slot); db.update_accounts_hash(some_slot, &ancestors, true); assert_matches!( @@ -4914,7 +5799,7 @@ pub mod tests { ); let native_account_pubkey = solana_sdk::pubkey::new_rand(); - db.store( + db.store_uncached( some_slot, &[( &native_account_pubkey, @@ -4974,7 +5859,7 @@ pub mod tests { db.hash_accounts(some_slot, accounts, &ClusterType::Development); // provide bogus account hashes let some_hash = Hash::new(&[0xca; HASH_BYTES]); - db.store_accounts_default(some_slot, accounts, &[some_hash]); + db.store_accounts_default(some_slot, accounts, &[some_hash], false); db.add_root(some_slot); assert_matches!( db.verify_bank_hash_and_lamports(some_slot, &ancestors, 1, true), @@ -4992,7 +5877,7 @@ pub mod tests { let account = Account::new(lamports, data_len, &solana_sdk::pubkey::new_rand()); // pre-populate with a smaller empty store db.create_and_insert_store(1, 8192, "test_storage_finder"); - db.store(1, &[(&key, &account)]); + db.store_uncached(1, &[(&key, &account)]); } #[test] @@ -5012,7 +5897,7 @@ pub mod tests { let after_slot = base_slot + 1; db.add_root(base_slot); - db.store(base_slot, &[(&key, &account)]); + db.store_uncached(base_slot, &[(&key, &account)]); assert!(db.get_snapshot_storages(before_slot).is_empty()); assert_eq!(1, db.get_snapshot_storages(base_slot).len()); @@ -5028,7 +5913,7 @@ pub mod tests { let base_slot = 0; let after_slot = base_slot + 1; - db.store(base_slot, &[(&key, &account)]); + db.store_uncached(base_slot, &[(&key, &account)]); db.storage .get_slot_stores(base_slot) .unwrap() @@ -5038,7 +5923,7 @@ pub mod tests { db.add_root(base_slot); assert!(db.get_snapshot_storages(after_slot).is_empty()); - db.store(base_slot, &[(&key, &account)]); + db.store_uncached(base_slot, &[(&key, &account)]); assert_eq!(1, db.get_snapshot_storages(after_slot).len()); } @@ -5051,7 +5936,7 @@ pub mod tests { let base_slot = 0; let after_slot = base_slot + 1; - db.store(base_slot, &[(&key, &account)]); + db.store_uncached(base_slot, &[(&key, &account)]); assert!(db.get_snapshot_storages(after_slot).is_empty()); db.add_root(base_slot); @@ -5067,7 +5952,7 @@ pub mod tests { let base_slot = 0; let after_slot = base_slot + 1; - db.store(base_slot, &[(&key, &account)]); + db.store_uncached(base_slot, &[(&key, &account)]); db.add_root(base_slot); assert_eq!(1, db.get_snapshot_storages(after_slot).len()); @@ -5079,7 +5964,7 @@ pub mod tests { .values() .next() .unwrap() - .remove_account(); + .remove_account(0); assert!(db.get_snapshot_storages(after_slot).is_empty()); } @@ -5089,7 +5974,7 @@ pub mod tests { let accounts = AccountsDB::new(Vec::new(), &ClusterType::Development); let pubkey = solana_sdk::pubkey::new_rand(); let account = Account::new(1, 0, &Account::default().owner); - accounts.store(0, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); let storage_entry = accounts .storage .get_slot_stores(0) @@ -5100,8 +5985,8 @@ pub mod tests { .next() .unwrap() .clone(); - storage_entry.remove_account(); - storage_entry.remove_account(); + storage_entry.remove_account(0); + storage_entry.remove_account(0); } #[test] @@ -5129,29 +6014,29 @@ pub mod tests { // create intermediate updates to purged_pubkey1 so that // generate_index must add slots as root last at once current_slot += 1; - accounts.store(current_slot, &[(&pubkey, &account)]); - accounts.store(current_slot, &[(&purged_pubkey1, &account2)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &account2)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey1, &account2)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &account2)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey1, &account2)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &account2)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey1, &zero_lamport_account)]); - accounts.store(current_slot, &[(&purged_pubkey2, &account3)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey1, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey2, &account3)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&purged_pubkey2, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&purged_pubkey2, &zero_lamport_account)]); accounts.add_root(current_slot); current_slot += 1; - accounts.store(current_slot, &[(&dummy_pubkey, &dummy_account)]); + accounts.store_uncached(current_slot, &[(&dummy_pubkey, &dummy_account)]); accounts.add_root(current_slot); accounts.print_count_and_status("before reconstruct"); @@ -5189,17 +6074,17 @@ pub mod tests { // A: Initialize AccountsDB with pubkey1 and pubkey2 current_slot += 1; - accounts.store(current_slot, &[(&pubkey1, &account)]); - accounts.store(current_slot, &[(&pubkey2, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey1, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey2, &account)]); accounts.add_root(current_slot); // B: Test multiple updates to pubkey1 in a single slot/storage current_slot += 1; - assert_eq!(0, accounts.alive_account_count_in_store(current_slot)); + assert_eq!(0, accounts.alive_account_count_in_slot(current_slot)); assert_eq!(1, accounts.ref_count_for_pubkey(&pubkey1)); - accounts.store(current_slot, &[(&pubkey1, &account2)]); - accounts.store(current_slot, &[(&pubkey1, &account2)]); - assert_eq!(1, accounts.alive_account_count_in_store(current_slot)); + accounts.store_uncached(current_slot, &[(&pubkey1, &account2)]); + accounts.store_uncached(current_slot, &[(&pubkey1, &account2)]); + assert_eq!(1, accounts.alive_account_count_in_slot(current_slot)); // Stores to same pubkey, same slot only count once towards the // ref count assert_eq!(2, accounts.ref_count_for_pubkey(&pubkey1)); @@ -5208,14 +6093,14 @@ pub mod tests { // C: Yet more update to trigger lazy clean of step A current_slot += 1; assert_eq!(2, accounts.ref_count_for_pubkey(&pubkey1)); - accounts.store(current_slot, &[(&pubkey1, &account3)]); + accounts.store_uncached(current_slot, &[(&pubkey1, &account3)]); assert_eq!(3, accounts.ref_count_for_pubkey(&pubkey1)); accounts.add_root(current_slot); // D: Make pubkey1 0-lamport; also triggers clean of step B current_slot += 1; assert_eq!(3, accounts.ref_count_for_pubkey(&pubkey1)); - accounts.store(current_slot, &[(&pubkey1, &zero_lamport_account)]); + accounts.store_uncached(current_slot, &[(&pubkey1, &zero_lamport_account)]); accounts.clean_accounts(None); assert_eq!( @@ -5228,7 +6113,7 @@ pub mod tests { // E: Avoid missing bank hash error current_slot += 1; - accounts.store(current_slot, &[(&dummy_pubkey, &dummy_account)]); + accounts.store_uncached(current_slot, &[(&dummy_pubkey, &dummy_account)]); accounts.add_root(current_slot); assert_load_account(&accounts, current_slot, pubkey1, zero_lamport); @@ -5249,7 +6134,7 @@ pub mod tests { // F: Finally, make Step A cleanable current_slot += 1; - accounts.store(current_slot, &[(&pubkey2, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey2, &account)]); accounts.add_root(current_slot); // Do clean @@ -5262,11 +6147,11 @@ pub mod tests { } #[test] - fn test_clean_dead_slots_empty() { + fn test_clean_stored_dead_slots_empty() { let accounts = AccountsDB::new_single(); let mut dead_slots = HashSet::new(); dead_slots.insert(10); - accounts.clean_dead_slots(&dead_slots, None); + accounts.clean_stored_dead_slots(&dead_slots, None); } #[test] @@ -5274,7 +6159,7 @@ pub mod tests { let accounts = AccountsDB::new_single(); for _ in 0..10 { - assert_eq!(0, accounts.process_stale_slot()); + accounts.shrink_candidate_slots(); } accounts.shrink_all_slots(); @@ -5282,14 +6167,15 @@ pub mod tests { #[test] fn test_shrink_next_slots() { - let accounts = AccountsDB::new_single(); + let mut accounts = AccountsDB::new_single(); + accounts.caching_enabled = false; let mut current_slot = 7; assert_eq!( vec![None, None, None], (0..3) - .map(|_| accounts.next_shrink_slot()) + .map(|_| accounts.next_shrink_slot_v1()) .collect::>() ); @@ -5298,7 +6184,7 @@ pub mod tests { assert_eq!( vec![Some(7), Some(7), Some(7)], (0..3) - .map(|_| accounts.next_shrink_slot()) + .map(|_| accounts.next_shrink_slot_v1()) .collect::>() ); @@ -5306,7 +6192,7 @@ pub mod tests { accounts.add_root(current_slot); let slots = (0..6) - .map(|_| accounts.next_shrink_slot()) + .map(|_| accounts.next_shrink_slot_v1()) .collect::>(); // Because the origin of this data is HashMap (not BTreeMap), key order is arbitrary per cycle. @@ -5318,11 +6204,12 @@ pub mod tests { #[test] fn test_shrink_reset_uncleaned_roots() { - let accounts = AccountsDB::new_single(); + let mut accounts = AccountsDB::new_single(); + accounts.caching_enabled = false; - accounts.reset_uncleaned_roots(); + accounts.reset_uncleaned_roots_v1(); assert_eq!( - *accounts.shrink_candidate_slots.lock().unwrap(), + *accounts.shrink_candidate_slots_v1.lock().unwrap(), vec![] as Vec ); @@ -5330,18 +6217,18 @@ pub mod tests { accounts.add_root(1); accounts.add_root(2); - accounts.reset_uncleaned_roots(); - let actual_slots = accounts.shrink_candidate_slots.lock().unwrap().clone(); + accounts.reset_uncleaned_roots_v1(); + let actual_slots = accounts.shrink_candidate_slots_v1.lock().unwrap().clone(); assert_eq!(actual_slots, vec![] as Vec); - accounts.reset_uncleaned_roots(); - let mut actual_slots = accounts.shrink_candidate_slots.lock().unwrap().clone(); + accounts.reset_uncleaned_roots_v1(); + let mut actual_slots = accounts.shrink_candidate_slots_v1.lock().unwrap().clone(); actual_slots.sort_unstable(); assert_eq!(actual_slots, vec![0, 1, 2]); accounts.accounts_index.clear_roots(); let mut actual_slots = (0..5) - .map(|_| accounts.next_shrink_slot()) + .map(|_| accounts.next_shrink_slot_v1()) .collect::>(); actual_slots.sort(); assert_eq!(actual_slots, vec![None, None, Some(0), Some(1), Some(2)],); @@ -5368,7 +6255,7 @@ pub mod tests { current_slot += 1; for pubkey in &pubkeys { - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); } let shrink_slot = current_slot; accounts.add_root(current_slot); @@ -5378,7 +6265,7 @@ pub mod tests { let updated_pubkeys = &pubkeys[0..pubkey_count - pubkey_count_after_shrink]; for pubkey in updated_pubkeys { - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); } accounts.add_root(current_slot); @@ -5414,7 +6301,7 @@ pub mod tests { } #[test] - fn test_shrink_stale_slots_skipped() { + fn test_shrink_candidate_slots() { solana_logger::setup(); let accounts = AccountsDB::new_single(); @@ -5434,7 +6321,7 @@ pub mod tests { current_slot += 1; for pubkey in &pubkeys { - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); } let shrink_slot = current_slot; accounts.add_root(current_slot); @@ -5444,7 +6331,65 @@ pub mod tests { let updated_pubkeys = &pubkeys[0..pubkey_count - pubkey_count_after_shrink]; for pubkey in updated_pubkeys { - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); + } + accounts.add_root(current_slot); + accounts.clean_accounts(None); + + assert_eq!( + pubkey_count, + accounts.all_account_count_in_append_vec(shrink_slot) + ); + + // Only, try to shrink stale slots, nothing happens because 90/100 + // is not small enough to do a shrink + accounts.shrink_candidate_slots(); + assert_eq!( + pubkey_count, + accounts.all_account_count_in_append_vec(shrink_slot) + ); + + // Now, do full-shrink. + accounts.shrink_all_slots(); + assert_eq!( + pubkey_count_after_shrink, + accounts.all_account_count_in_append_vec(shrink_slot) + ); + } + + #[test] + fn test_shrink_stale_slots_skipped() { + solana_logger::setup(); + + let mut accounts = AccountsDB::new_single(); + accounts.caching_enabled = false; + + let pubkey_count = 30000; + let pubkeys: Vec<_> = (0..pubkey_count) + .map(|_| solana_sdk::pubkey::new_rand()) + .collect(); + + let some_lamport = 223; + let no_data = 0; + let owner = Account::default().owner; + + let account = Account::new(some_lamport, no_data, &owner); + + let mut current_slot = 0; + + current_slot += 1; + for pubkey in &pubkeys { + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); + } + let shrink_slot = current_slot; + accounts.add_root(current_slot); + + current_slot += 1; + let pubkey_count_after_shrink = 25000; + let updated_pubkeys = &pubkeys[0..pubkey_count - pubkey_count_after_shrink]; + + for pubkey in updated_pubkeys { + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); } accounts.add_root(current_slot); @@ -5456,7 +6401,7 @@ pub mod tests { ); // Only, try to shrink stale slots. - accounts.shrink_all_stale_slots(); + accounts.shrink_all_stale_slots_v1(); assert_eq!( pubkey_count, accounts.all_account_count_in_append_vec(shrink_slot) @@ -5480,21 +6425,25 @@ pub mod tests { let info0 = AccountInfo { store_id: 0, offset: 0, + stored_size: 0, lamports: 0, }; let info1 = AccountInfo { store_id: 1, offset: 0, + stored_size: 0, lamports: 0, }; let info2 = AccountInfo { store_id: 2, offset: 0, + stored_size: 0, lamports: 0, }; let info3 = AccountInfo { store_id: 3, offset: 0, + stored_size: 0, lamports: 0, }; let mut reclaims = vec![]; @@ -5552,10 +6501,10 @@ pub mod tests { info3, &mut reclaims, ); - accounts_index.add_root(0); - accounts_index.add_root(1); - accounts_index.add_root(2); - accounts_index.add_root(3); + accounts_index.add_root(0, false); + accounts_index.add_root(1, false); + accounts_index.add_root(2, false); + accounts_index.add_root(3, false); let mut purges = HashMap::new(); let (key0_entry, _) = accounts_index.get(&key0, None, None).unwrap(); purges.insert(key0, accounts_index.roots_and_ref_count(&key0_entry, None)); @@ -5730,7 +6679,7 @@ pub mod tests { let accounts = AccountsDB::new_single(); let account = Account::default(); let pubkey = solana_sdk::pubkey::new_rand(); - accounts.store(0, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); let slot_stores = accounts.storage.get_slot_stores(0).unwrap(); let mut total_len = 0; for (_id, store) in slot_stores.read().unwrap().iter() { @@ -5751,14 +6700,14 @@ pub mod tests { for i in 0..num_accounts { let account = Account::new((i + 1) as u64, size, &Pubkey::default()); let pubkey = solana_sdk::pubkey::new_rand(); - accounts.store(0, &[(&pubkey, &account)]); + accounts.store_uncached(0, &[(&pubkey, &account)]); keys.push(pubkey); } accounts.add_root(0); for (i, key) in keys[1..].iter().enumerate() { let account = Account::new((1 + i + num_accounts) as u64, size, &Pubkey::default()); - accounts.store(1, &[(key, &account)]); + accounts.store_uncached(1, &[(key, &account)]); } accounts.add_root(1); accounts.clean_accounts(None); @@ -5775,7 +6724,7 @@ pub mod tests { i + 20, &Pubkey::default(), ); - accounts.store(2, &[(key, &account)]); + accounts.store_uncached(2, &[(key, &account)]); account_refs.push(account); } assert!(accounts.recycle_stores.read().unwrap().len() < num_stores); @@ -5797,8 +6746,8 @@ pub mod tests { let zero_lamport_account = Account::new(0, 0, &Account::default().owner); // Store zero lamport account into slots 0 and 1, root both slots - db.store(0, &[(&account_key, &zero_lamport_account)]); - db.store(1, &[(&account_key, &zero_lamport_account)]); + db.store_uncached(0, &[(&account_key, &zero_lamport_account)]); + db.store_uncached(1, &[(&account_key, &zero_lamport_account)]); db.add_root(0); db.add_root(1); @@ -5811,4 +6760,354 @@ pub mod tests { Some((zero_lamport_account, 1)) ); } + + #[test] + fn test_store_load_cached() { + let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); + db.caching_enabled = true; + let key = Pubkey::default(); + let account0 = Account::new(1, 0, &key); + let slot = 0; + db.store_cached(slot, &[(&key, &account0)]); + + // Load with no ancestors and no root will return nothing + assert!(db.load_slow(&HashMap::new(), &key).is_none()); + + // Load with ancestors not equal to `slot` will return nothing + let ancestors = vec![(slot + 1, 1)].into_iter().collect(); + assert!(db.load_slow(&ancestors, &key).is_none()); + + // Load with ancestors equal to `slot` will return the account + let ancestors = vec![(slot, 1)].into_iter().collect(); + assert_eq!( + db.load_slow(&ancestors, &key), + Some((account0.clone(), slot)) + ); + + // Adding root will return the account even without ancestors + db.add_root(slot); + assert_eq!(db.load_slow(&HashMap::new(), &key), Some((account0, slot))); + } + + #[test] + fn test_store_flush_load_cached() { + let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); + db.caching_enabled = true; + let key = Pubkey::default(); + let account0 = Account::new(1, 0, &key); + let slot = 0; + db.store_cached(slot, &[(&key, &account0)]); + db.mark_slot_frozen(slot); + + // No root was added yet, requires an ancestor to find + // the account + db.force_flush_accounts_cache(); + let ancestors = vec![(slot, 1)].into_iter().collect(); + assert_eq!( + db.load_slow(&ancestors, &key), + Some((account0.clone(), slot)) + ); + + // Add root then flush + db.add_root(slot); + db.force_flush_accounts_cache(); + assert_eq!(db.load_slow(&HashMap::new(), &key), Some((account0, slot))); + } + + #[test] + fn test_flush_accounts_cache() { + let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); + db.caching_enabled = true; + let account0 = Account::new(1, 0, &Pubkey::default()); + + let unrooted_slot = 4; + let root5 = 5; + let root6 = 6; + let unrooted_key = solana_sdk::pubkey::new_rand(); + let key5 = solana_sdk::pubkey::new_rand(); + let key6 = solana_sdk::pubkey::new_rand(); + db.store_cached(unrooted_slot, &[(&unrooted_key, &account0)]); + db.store_cached(root5, &[(&key5, &account0)]); + db.store_cached(root6, &[(&key6, &account0)]); + for slot in &[unrooted_slot, root5, root6] { + db.mark_slot_frozen(*slot); + } + db.add_root(root5); + db.add_root(root6); + + // Unrooted slot should be able to be fetched before the flush + let ancestors = vec![(unrooted_slot, 1)].into_iter().collect(); + assert_eq!( + db.load_slow(&ancestors, &unrooted_key), + Some((account0.clone(), unrooted_slot)) + ); + db.force_flush_accounts_cache(); + + // After the flush, the unrooted slot is still in the cache + assert!(db.load_slow(&ancestors, &unrooted_key).is_some()); + assert!(db + .accounts_index + .get_account_read_entry(&unrooted_key) + .is_some()); + assert_eq!(db.accounts_cache.num_slots(), 1); + assert!(db.accounts_cache.slot_cache(unrooted_slot).is_some()); + assert_eq!( + db.load_slow(&HashMap::new(), &key5), + Some((account0.clone(), root5)) + ); + assert_eq!( + db.load_slow(&HashMap::new(), &key6), + Some((account0, root6)) + ); + } + + #[test] + fn test_flush_accounts_cache_if_needed() { + run_test_flush_accounts_cache_if_needed(0, 2 * MAX_CACHE_SLOTS); + run_test_flush_accounts_cache_if_needed(2 * MAX_CACHE_SLOTS, 0); + run_test_flush_accounts_cache_if_needed(MAX_CACHE_SLOTS - 1, 0); + run_test_flush_accounts_cache_if_needed(0, MAX_CACHE_SLOTS - 1); + run_test_flush_accounts_cache_if_needed(MAX_CACHE_SLOTS, 0); + run_test_flush_accounts_cache_if_needed(0, MAX_CACHE_SLOTS); + run_test_flush_accounts_cache_if_needed(2 * MAX_CACHE_SLOTS, 2 * MAX_CACHE_SLOTS); + run_test_flush_accounts_cache_if_needed(MAX_CACHE_SLOTS - 1, MAX_CACHE_SLOTS - 1); + run_test_flush_accounts_cache_if_needed(MAX_CACHE_SLOTS, MAX_CACHE_SLOTS); + } + + fn run_test_flush_accounts_cache_if_needed(num_roots: usize, num_unrooted: usize) { + let mut db = AccountsDB::new(Vec::new(), &ClusterType::Development); + db.caching_enabled = true; + let account0 = Account::new(1, 0, &Pubkey::default()); + let mut keys = vec![]; + let num_slots = 2 * MAX_CACHE_SLOTS; + for i in 0..num_roots + num_unrooted { + let key = Pubkey::new_unique(); + db.store_cached(i as Slot, &[(&key, &account0)]); + keys.push(key); + db.mark_slot_frozen(i as Slot); + if i < num_roots { + db.add_root(i as Slot); + } + } + + db.flush_accounts_cache_if_needed(); + + let total_slots = num_roots + num_unrooted; + // If there's <= the max size, then nothing will be flushed from the slot + if total_slots <= MAX_CACHE_SLOTS { + assert_eq!(db.accounts_cache.num_slots(), total_slots); + } else { + // Otherwise, all the roots are flushed, and only at most MAX_CACHE_SLOTS + // of the unrooted slots are kept in the cache + let expected_size = std::cmp::min(num_unrooted, MAX_CACHE_SLOTS); + if expected_size > 0 { + for unrooted_slot in total_slots - expected_size..total_slots { + assert!(db + .accounts_cache + .slot_cache(unrooted_slot as Slot) + .is_some()); + } + } + } + + // Should still be able to fetch all the accounts after flush + for (slot, key) in (0..num_slots as Slot).zip(keys) { + let ancestors = if slot < num_roots as Slot { + HashMap::new() + } else { + vec![(slot, 1)].into_iter().collect() + }; + assert_eq!( + db.load_slow(&ancestors, &key), + Some((account0.clone(), slot)) + ); + } + } + + #[test] + fn test_flush_cache_clean() { + let caching_enabled = true; + let db = Arc::new(AccountsDB::new_with_config( + Vec::new(), + &ClusterType::Development, + HashSet::new(), + caching_enabled, + )); + + let account_key = Pubkey::new_unique(); + let zero_lamport_account = Account::new(0, 0, &Account::default().owner); + let slot1_account = Account::new(1, 1, &Account::default().owner); + db.store_cached(0, &[(&account_key, &zero_lamport_account)]); + db.store_cached(1, &[(&account_key, &slot1_account)]); + + db.add_root(0); + db.add_root(1); + + // Clean should not remove anything yet as nothing has been flushed + db.clean_accounts(None); + let account = db + .do_load(&Ancestors::default(), &account_key, Some(0)) + .unwrap(); + assert_eq!(account.0.lamports, 0); + + // Flush, then clean again. Should not need another root to initiate the cleaning + // because `accounts_index.uncleaned_roots` should be correct + db.force_flush_accounts_cache(); + db.clean_accounts(None); + assert!(db + .do_load(&Ancestors::default(), &account_key, Some(0)) + .is_none()); + } + + #[test] + fn test_scan_flush_accounts_cache_then_clean_drop() { + let caching_enabled = true; + let db = Arc::new(AccountsDB::new_with_config( + Vec::new(), + &ClusterType::Development, + HashSet::new(), + caching_enabled, + )); + let db_ = db.clone(); + let account_key = Pubkey::new_unique(); + let account_key2 = Pubkey::new_unique(); + let zero_lamport_account = Account::new(0, 0, &Account::default().owner); + let slot1_account = Account::new(1, 1, &Account::default().owner); + let slot2_account = Account::new(2, 1, &Account::default().owner); + let exit = Arc::new(AtomicBool::new(false)); + let exit_ = exit.clone(); + let ready = Arc::new(AtomicBool::new(false)); + let ready_ = ready.clone(); + + /* + Store zero lamport account into slots 0, 1, 2 where + root slots are 0, 2, and slot 1 is unrooted. + 0 (root) + / \ + 1 2 (root) + */ + db.store_cached(0, &[(&account_key, &zero_lamport_account)]); + db.store_cached(1, &[(&account_key, &slot1_account)]); + db.store_cached(2, &[(&account_key, &slot2_account)]); + // Fodder for the scan so that the lock on `account_key` is not held + db.store_cached(2, &[(&account_key2, &slot2_account)]); + db.add_root(0); + let max_scan_root = 0; + let scan_ancestors: Arc = Arc::new(vec![(0, 1), (1, 1)].into_iter().collect()); + let scan_ancestors_ = scan_ancestors.clone(); + let t_scan = Builder::new() + .name("scan".to_string()) + .spawn(move || { + db_.scan_accounts( + &scan_ancestors_, + |_collector: &mut Vec<(Pubkey, Account)>, maybe_account| { + ready_.store(true, Ordering::Relaxed); + if let Some((pubkey, _, _)) = maybe_account { + // Do the wait on account_key2, because clean is happening + // on account_key1's index and we don't want to block the clean. + if *pubkey == account_key2 { + loop { + if exit_.load(Ordering::Relaxed) { + break; + } else { + sleep(Duration::from_millis(10)); + } + } + } + } + }, + ); + }) + .unwrap(); + + // Wait for scan to start + while !ready.load(Ordering::Relaxed) { + sleep(Duration::from_millis(10)); + } + + // Add a new root 2 + db.add_root(2); + + // Flush the cache, slot 1 should remain in the cache, everything else should be flushed + db.force_flush_accounts_cache(); + assert_eq!(db.accounts_cache.num_slots(), 1); + assert!(db.accounts_cache.slot_cache(1).is_some()); + + // Run clean, unrooted slot 1 should not be purged, and still readable from the cache, + // because we're still doing a scan on it. + db.clean_accounts(None); + let account = db + .do_load(&scan_ancestors, &account_key, Some(max_scan_root)) + .unwrap(); + assert_eq!(account.0.lamports, slot1_account.lamports); + + // When the scan is over, clean should not panic and should not purge something + // still in the cache. + exit.store(true, Ordering::Relaxed); + t_scan.join().unwrap(); + db.clean_accounts(None); + let account = db + .do_load(&scan_ancestors, &account_key, Some(max_scan_root)) + .unwrap(); + assert_eq!(account.0.lamports, slot1_account.lamports); + + // Simulate dropping the bank, which finally removes the slot from the cache + db.purge_slot(1); + assert!(db + .do_load(&scan_ancestors, &account_key, Some(max_scan_root)) + .is_none()); + } + + #[test] + fn test_alive_bytes() { + let caching_enabled = true; + let accounts_db = AccountsDB::new_with_config( + Vec::new(), + &ClusterType::Development, + HashSet::new(), + caching_enabled, + ); + let slot: Slot = 0; + let num_keys = 10; + + for data_size in 0..num_keys { + let account = Account::new(1, data_size, &Pubkey::default()); + accounts_db.store_cached(slot, &[(&Pubkey::new_unique(), &account)]); + } + + accounts_db.add_root(slot); + accounts_db.force_flush_accounts_cache(); + + let mut storage_maps: Vec> = accounts_db + .storage + .get_slot_stores(slot) + .map(|res| res.read().unwrap().values().cloned().collect()) + .unwrap_or_default(); + + // Flushing cache should only create one storage entry + assert_eq!(storage_maps.len(), 1); + let storage0 = storage_maps.pop().unwrap(); + let accounts = storage0.accounts.accounts(0); + + for account in accounts { + let before_size = storage0.alive_bytes.load(Ordering::Relaxed); + let account_info = accounts_db + .accounts_index + .get_account_read_entry(&account.meta.pubkey) + .map(|locked_entry| { + // Should only be one entry per key, since every key was only stored to slot 0 + locked_entry.slot_list()[0].clone() + }) + .unwrap(); + let removed_data_size = account_info.1.stored_size; + // Fetching the account from storage should return the same + // stored size as in the index. + assert_eq!(removed_data_size, account.stored_size); + assert_eq!(account_info.0, slot); + let reclaims = vec![account_info]; + accounts_db.remove_dead_accounts(&reclaims, None, None); + let after_size = storage0.alive_bytes.load(Ordering::Relaxed); + assert_eq!(before_size, after_size + account.stored_size); + } + } } diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index 3f99779711..8c062a8709 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -34,6 +34,22 @@ pub type AccountMap = BTreeMap; type AccountMapEntry = Arc>; +pub trait IsCached { + fn is_cached(&self) -> bool; +} + +impl IsCached for bool { + fn is_cached(&self) -> bool { + false + } +} + +impl IsCached for u64 { + fn is_cached(&self) -> bool { + false + } +} + enum ScanTypes> { Unindexed(Option), Indexed(IndexKey), @@ -220,7 +236,7 @@ pub struct AccountsIndex { ongoing_scan_roots: RwLock>, } -impl AccountsIndex { +impl AccountsIndex { fn iter(&self, range: Option) -> AccountsIndexIterator where R: RangeBounds, @@ -592,22 +608,24 @@ impl AccountsIndex { pub fn purge_exact( &self, pubkey: &Pubkey, - slots: HashSet, + slots_to_purge: &HashSet, + reclaims: &mut SlotList, account_indexes: &HashSet, - ) -> (SlotList, bool) { + ) -> bool { let res = { let mut write_account_map_entry = self.get_account_write_entry(pubkey).unwrap(); write_account_map_entry.slot_list_mut(|slot_list| { - let reclaims = slot_list - .iter() - .filter(|(slot, _)| slots.contains(&slot)) - .cloned() - .collect(); - slot_list.retain(|(slot, _)| !slots.contains(slot)); - (reclaims, slot_list.is_empty()) + slot_list.retain(|(slot, item)| { + let should_purge = slots_to_purge.contains(&slot); + if should_purge { + reclaims.push((*slot, item.clone())); + } + !should_purge + }); + slot_list.is_empty() }) }; - self.purge_secondary_indexes_by_inner_key(pubkey, Some(&slots), account_indexes); + self.purge_secondary_indexes_by_inner_key(pubkey, Some(&slots_to_purge), account_indexes); res } @@ -820,7 +838,7 @@ impl AccountsIndex { let mut purged_slots: HashSet = HashSet::new(); list.retain(|(slot, value)| { - let should_purge = Self::can_purge(max_root, *slot); + let should_purge = Self::can_purge(max_root, *slot) && !value.is_cached(); if should_purge { reclaims.push((*slot, value.clone())); purged_slots.insert(*slot); @@ -831,6 +849,7 @@ impl AccountsIndex { self.purge_secondary_indexes_by_inner_key(pubkey, Some(&purged_slots), account_indexes); } + // `is_cached` closure is needed to work around the generic (`T`) indexed type. pub fn clean_rooted_entries( &self, pubkey: &Pubkey, @@ -851,28 +870,6 @@ impl AccountsIndex { } } - pub fn clean_unrooted_entries_by_slot( - &self, - purge_slot: Slot, - pubkey: &Pubkey, - reclaims: &mut SlotList, - account_indexes: &HashSet, - ) { - if let Some(mut locked_entry) = self.get_account_write_entry(pubkey) { - locked_entry.slot_list_mut(|slot_list| { - slot_list.retain(|(slot, entry)| { - if *slot == purge_slot { - reclaims.push((*slot, entry.clone())); - } - *slot != purge_slot - }); - }); - } - - let purge_slot: HashSet = vec![purge_slot].into_iter().collect(); - self.purge_secondary_indexes_by_inner_key(pubkey, Some(&purge_slot), account_indexes); - } - pub fn can_purge(max_root: Slot, slot: Slot) -> bool { slot < max_root } @@ -881,11 +878,24 @@ impl AccountsIndex { self.roots_tracker.read().unwrap().roots.contains(&slot) } - pub fn add_root(&self, slot: Slot) { + pub fn add_root(&self, slot: Slot, caching_enabled: bool) { let mut w_roots_tracker = self.roots_tracker.write().unwrap(); w_roots_tracker.roots.insert(slot); - w_roots_tracker.uncleaned_roots.insert(slot); - w_roots_tracker.max_root = std::cmp::max(slot, w_roots_tracker.max_root); + // we delay cleaning until flushing! + if !caching_enabled { + w_roots_tracker.uncleaned_roots.insert(slot); + } + // `AccountsDb::flush_accounts_cache()` relies on roots being added in order + assert!(slot >= w_roots_tracker.max_root); + w_roots_tracker.max_root = slot; + } + + pub fn add_uncleaned_roots(&self, roots: I) + where + I: IntoIterator, + { + let mut w_roots_tracker = self.roots_tracker.write().unwrap(); + w_roots_tracker.uncleaned_roots.extend(roots); } fn max_root(&self) -> Slot { @@ -895,10 +905,29 @@ impl AccountsIndex { /// Remove the slot when the storage for the slot is freed /// Accounts no longer reference this slot. pub fn clean_dead_slot(&self, slot: Slot) { - let mut w_roots_tracker = self.roots_tracker.write().unwrap(); - w_roots_tracker.roots.remove(&slot); - w_roots_tracker.uncleaned_roots.remove(&slot); - w_roots_tracker.previous_uncleaned_roots.remove(&slot); + if self.is_root(slot) { + let (roots_len, uncleaned_roots_len, previous_uncleaned_roots_len) = { + let mut w_roots_tracker = self.roots_tracker.write().unwrap(); + w_roots_tracker.roots.remove(&slot); + w_roots_tracker.uncleaned_roots.remove(&slot); + w_roots_tracker.previous_uncleaned_roots.remove(&slot); + ( + w_roots_tracker.roots.len(), + w_roots_tracker.uncleaned_roots.len(), + w_roots_tracker.previous_uncleaned_roots.len(), + ) + }; + datapoint_info!( + "accounts_index_roots_len", + ("roots_len", roots_len as i64, i64), + ("uncleaned_roots_len", uncleaned_roots_len as i64, i64), + ( + "previous_uncleaned_roots_len", + previous_uncleaned_roots_len as i64, + i64 + ), + ); + } } pub fn reset_uncleaned_roots(&self, max_clean_root: Option) -> HashSet { @@ -1136,7 +1165,7 @@ pub mod tests { ); } - index.add_root(root_slot); + index.add_root(root_slot, false); (index, pubkeys) } @@ -1272,7 +1301,7 @@ pub mod tests { fn test_is_root() { let index = AccountsIndex::::default(); assert!(!index.is_root(0)); - index.add_root(0); + index.add_root(0, false); assert!(index.is_root(0)); } @@ -1292,7 +1321,7 @@ pub mod tests { ); assert!(gc.is_empty()); - index.add_root(0); + index.add_root(0, false); let (list, idx) = index.get(&key.pubkey(), None, None).unwrap(); assert_eq!(list.slot_list()[idx], (0, true)); } @@ -1300,8 +1329,8 @@ pub mod tests { #[test] fn test_clean_first() { let index = AccountsIndex::::default(); - index.add_root(0); - index.add_root(1); + index.add_root(0, false); + index.add_root(1, false); index.clean_dead_slot(0); assert!(index.is_root(1)); assert!(!index.is_root(0)); @@ -1311,8 +1340,8 @@ pub mod tests { fn test_clean_last() { //this behavior might be undefined, clean up should only occur on older slots let index = AccountsIndex::::default(); - index.add_root(0); - index.add_root(1); + index.add_root(0, false); + index.add_root(1, false); index.clean_dead_slot(1); assert!(!index.is_root(1)); assert!(index.is_root(0)); @@ -1322,8 +1351,8 @@ pub mod tests { fn test_clean_and_unclean_slot() { let index = AccountsIndex::::default(); assert_eq!(0, index.roots_tracker.read().unwrap().uncleaned_roots.len()); - index.add_root(0); - index.add_root(1); + index.add_root(0, false); + index.add_root(1, false); assert_eq!(2, index.roots_tracker.read().unwrap().uncleaned_roots.len()); assert_eq!( @@ -1348,8 +1377,8 @@ pub mod tests { .len() ); - index.add_root(2); - index.add_root(3); + index.add_root(2, false); + index.add_root(3, false); assert_eq!(4, index.roots_tracker.read().unwrap().roots.len()); assert_eq!(2, index.roots_tracker.read().unwrap().uncleaned_roots.len()); assert_eq!( @@ -1500,9 +1529,9 @@ pub mod tests { true, &mut gc, ); - index.add_root(0); - index.add_root(1); - index.add_root(3); + index.add_root(0, false); + index.add_root(1, false); + index.add_root(3, false); index.upsert( 4, &key.pubkey(), @@ -1559,7 +1588,7 @@ pub mod tests { let purges = index.purge_roots(&key.pubkey()); assert_eq!(purges, (vec![], false)); - index.add_root(1); + index.add_root(1, false); let purges = index.purge_roots(&key.pubkey()); assert_eq!(purges, (vec![(1, 10)], true)); @@ -1584,7 +1613,7 @@ pub mod tests { assert!(index.latest_slot(None, &slot_slice, None).is_none()); // Given a root, should return the root - index.add_root(5); + index.add_root(5, false); assert_eq!(index.latest_slot(None, &slot_slice, None).unwrap(), 1); // Given a max_root == root, should still return the root @@ -1666,7 +1695,12 @@ pub mod tests { slots.len() ); - index.purge_exact(&account_key, slots.into_iter().collect(), account_index); + index.purge_exact( + &account_key, + &slots.into_iter().collect(), + &mut vec![], + account_index, + ); assert!(secondary_index.index.is_empty()); assert!(secondary_index.reverse_index.is_empty()); @@ -1716,9 +1750,9 @@ pub mod tests { // Add a later root, earlier slots should be reclaimed slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; - index.add_root(1); + index.add_root(1, false); // Note 2 is not a root - index.add_root(5); + index.add_root(5, false); reclaims = vec![]; index.purge_older_root_entries( &Pubkey::default(), @@ -1732,7 +1766,7 @@ pub mod tests { // Add a later root that is not in the list, should not affect the outcome slot_list = vec![(1, true), (2, true), (5, true), (9, true)]; - index.add_root(6); + index.add_root(6, false); reclaims = vec![]; index.purge_older_root_entries( &Pubkey::default(), @@ -1977,7 +2011,7 @@ pub mod tests { assert!(secondary_index.get(&secondary_key1).is_empty()); assert_eq!(secondary_index.get(&secondary_key2), vec![account_key]); - // If another fork reintroduces secondary_key1, then it should be readded to the + // If another fork reintroduces secondary_key1, then it should be re-added to the // index let fork = slot + 1; index.upsert( @@ -1993,7 +2027,7 @@ pub mod tests { // If we set a root at fork, and clean, then the secondary_key1 should no longer // be findable - index.add_root(fork); + index.add_root(fork, false); index .get_account_write_entry(&account_key) .unwrap() diff --git a/runtime/src/append_vec.rs b/runtime/src/append_vec.rs index 5eb23f43dd..adb2162ee6 100644 --- a/runtime/src/append_vec.rs +++ b/runtime/src/append_vec.rs @@ -54,19 +54,31 @@ pub struct AccountMeta { pub rent_epoch: Epoch, } +impl<'a> From<&'a Account> for AccountMeta { + fn from(account: &'a Account) -> Self { + Self { + lamports: account.lamports, + owner: account.owner, + executable: account.executable, + rent_epoch: account.rent_epoch, + } + } +} + /// References to Memory Mapped memory /// The Account is stored separately from its data, so getting the actual account requires a clone #[derive(PartialEq, Debug)] -pub struct StoredAccount<'a> { +pub struct StoredAccountMeta<'a> { pub meta: &'a StoredMeta, /// account data pub account_meta: &'a AccountMeta, pub data: &'a [u8], pub offset: usize, + pub stored_size: usize, pub hash: &'a Hash, } -impl<'a> StoredAccount<'a> { +impl<'a> StoredAccountMeta<'a> { pub fn clone_account(&self) -> Account { Account { lamports: self.account_meta.lamports, @@ -366,17 +378,19 @@ impl AppendVec { Some((unsafe { &*ptr }, next)) } - pub fn get_account<'a>(&'a self, offset: usize) -> Option<(StoredAccount<'a>, usize)> { + pub fn get_account<'a>(&'a self, offset: usize) -> Option<(StoredAccountMeta<'a>, usize)> { let (meta, next): (&'a StoredMeta, _) = self.get_type(offset)?; let (account_meta, next): (&'a AccountMeta, _) = self.get_type(next)?; let (hash, next): (&'a Hash, _) = self.get_type(next)?; let (data, next) = self.get_slice(next, meta.data_len as usize)?; + let stored_size = next - offset; Some(( - StoredAccount { + StoredAccountMeta { meta, account_meta, data, offset, + stored_size, hash, }, next, @@ -392,7 +406,7 @@ impl AppendVec { self.path.clone() } - pub fn accounts(&self, mut start: usize) -> Vec { + pub fn accounts(&self, mut start: usize) -> Vec { let mut accounts = vec![]; while let Some((account, next)) = self.get_account(start) { accounts.push(account); @@ -411,12 +425,7 @@ impl AppendVec { let mut rv = Vec::with_capacity(accounts.len()); for ((stored_meta, account), hash) in accounts.iter().zip(hashes) { let meta_ptr = stored_meta as *const StoredMeta; - let account_meta = AccountMeta { - lamports: account.lamports, - owner: account.owner, - executable: account.executable, - rent_epoch: account.rent_epoch, - }; + let account_meta = AccountMeta::from(*account); let account_meta_ptr = &account_meta as *const AccountMeta; let data_len = stored_meta.data_len as usize; let data_ptr = account.data.as_ptr(); @@ -433,6 +442,11 @@ impl AppendVec { break; } } + + // The last entry in this offset needs to be the u64 aligned offset, because that's + // where the *next* entry will begin to be stored. + rv.push(u64_align!(*offset)); + rv } @@ -442,9 +456,12 @@ impl AppendVec { account: &Account, hash: Hash, ) -> Option { - self.append_accounts(&[(storage_meta, account)], &[hash]) - .first() - .cloned() + let res = self.append_accounts(&[(storage_meta, account)], &[hash]); + if res.len() == 1 { + None + } else { + res.first().cloned() + } } } @@ -511,7 +528,7 @@ pub mod tests { } } - impl<'a> StoredAccount<'a> { + impl<'a> StoredAccountMeta<'a> { #[allow(clippy::cast_ref_to_mut)] fn set_data_len_unsafe(&self, new_data_len: u64) { // UNSAFE: cast away & (= const ref) to &mut to force to mutate append-only (=read-only) AppendVec diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index fd87a30dfb..7ce87e404d 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -861,13 +861,22 @@ impl Default for BlockhashQueue { impl Bank { pub fn new(genesis_config: &GenesisConfig) -> Self { - Self::new_with_paths(&genesis_config, Vec::new(), &[], None, None, HashSet::new()) + Self::new_with_paths( + &genesis_config, + Vec::new(), + &[], + None, + None, + HashSet::new(), + false, + ) } #[cfg(test)] - pub(crate) fn new_with_indexes( + pub(crate) fn new_with_config( genesis_config: &GenesisConfig, account_indexes: HashSet, + accounts_db_caching_enabled: bool, ) -> Self { Self::new_with_paths( &genesis_config, @@ -876,6 +885,7 @@ impl Bank { None, None, account_indexes, + accounts_db_caching_enabled, ) } @@ -886,16 +896,18 @@ impl Bank { debug_keys: Option>>, additional_builtins: Option<&Builtins>, account_indexes: HashSet, + accounts_db_caching_enabled: bool, ) -> Self { let mut bank = Self::default(); bank.ancestors.insert(bank.slot(), 0); bank.transaction_debug_keys = debug_keys; bank.cluster_type = Some(genesis_config.cluster_type); - bank.rc.accounts = Arc::new(Accounts::new_with_indexes( + bank.rc.accounts = Arc::new(Accounts::new_with_config( paths, &genesis_config.cluster_type, account_indexes, + accounts_db_caching_enabled, )); bank.process_genesis_config(genesis_config); bank.finish_init(genesis_config, additional_builtins); @@ -1998,7 +2010,6 @@ impl Bank { // record and commit are finished, those transactions will be // committed before this write lock can be obtained here. let mut hash = self.hash.write().unwrap(); - if *hash == Hash::default() { // finish up any deferred changes to account state self.collect_rent_eagerly(); @@ -2010,12 +2021,19 @@ impl Bank { // freeze is a one-way trip, idempotent self.freeze_started.store(true, Relaxed); *hash = self.hash_internal_state(); + self.rc.accounts.accounts_db.mark_slot_frozen(self.slot()); } } // Should not be called outside of startup, will race with // concurrent cleaning logic in AccountsBackgroundService pub fn exhaustively_free_unused_resource(&self) { + let mut flush = Measure::start("flush"); + // Flush all the rooted accounts. Must be called after `squash()`, + // so that AccountsDb knows what the roots are. + self.force_flush_accounts_cache(); + flush.stop(); + let mut clean = Measure::start("clean"); // Don't clean the slot we're snapshotting because it may have zero-lamport // accounts that were included in the bank delta hash when the bank was frozen, @@ -2030,9 +2048,10 @@ impl Bank { info!( "exhaustively_free_unused_resource() + flush: {}, clean: {}, shrink: {}", - clean, shrink, + flush, clean, shrink, ); } @@ -3144,7 +3163,7 @@ impl Bank { } let mut write_time = Measure::start("write_time"); - self.rc.accounts.store_accounts( + self.rc.accounts.store_cached( self.slot(), txs, iteration_order, @@ -3841,7 +3860,9 @@ impl Bank { pub fn store_account(&self, pubkey: &Pubkey, account: &Account) { assert!(!self.freeze_started()); - self.rc.accounts.store_slow(self.slot(), pubkey, account); + self.rc + .accounts + .store_slow_cached(self.slot(), pubkey, account); if Stakes::is_stake(account) { self.stakes @@ -3851,6 +3872,17 @@ impl Bank { } } + pub fn force_flush_accounts_cache(&self) { + self.rc.accounts.accounts_db.force_flush_accounts_cache() + } + + pub fn flush_accounts_cache_if_needed(&self) { + self.rc + .accounts + .accounts_db + .flush_accounts_cache_if_needed() + } + fn store_account_and_update_capitalization(&self, pubkey: &Pubkey, new_account: &Account) { if let Some(old_account) = self.get_account(&pubkey) { match new_account.lamports.cmp(&old_account.lamports) { @@ -4538,7 +4570,7 @@ impl Bank { budget_recovery_delta: usize, ) -> usize { if consumed_budget == 0 { - let shrunken_account_count = self.rc.accounts.accounts_db.process_stale_slot(); + let shrunken_account_count = self.rc.accounts.accounts_db.process_stale_slot_v1(); if shrunken_account_count > 0 { datapoint_info!( "stale_slot_shrink", @@ -4550,6 +4582,10 @@ impl Bank { consumed_budget.saturating_sub(budget_recovery_delta) } + pub fn shrink_candidate_slots(&self) -> usize { + self.rc.accounts.accounts_db.shrink_candidate_slots() + } + pub fn secp256k1_program_enabled(&self) -> bool { self.feature_set .is_active(&feature_set::secp256k1_program_enabled::id()) @@ -4874,6 +4910,7 @@ fn is_simple_vote_transaction(transaction: &Transaction) -> bool { pub(crate) mod tests { use super::*; use crate::{ + accounts_db::SHRINK_RATIO, accounts_index::{AccountMap, Ancestors, ITER_BATCH_SIZE}, genesis_utils::{ activate_all_features, bootstrap_validator_stake_lamports, @@ -6726,7 +6763,7 @@ pub(crate) mod tests { .accounts .accounts_db .accounts_index - .add_root(genesis_bank1.slot() + 1); + .add_root(genesis_bank1.slot() + 1, false); bank1_without_zero .rc .accounts @@ -6997,10 +7034,14 @@ pub(crate) mod tests { let pubkey = solana_sdk::pubkey::new_rand(); let tx = system_transaction::transfer(&mint_keypair, &pubkey, 0, blockhash); bank.process_transaction(&tx).unwrap(); + bank.freeze(); bank.squash(); bank = Arc::new(new_from_parent(&bank)); } + bank.freeze(); + bank.squash(); + bank.force_flush_accounts_cache(); let hash = bank.update_accounts_hash(); bank.clean_accounts(false); assert_eq!(bank.update_accounts_hash(), hash); @@ -7037,9 +7078,11 @@ pub(crate) mod tests { assert!(bank0.verify_bank_hash()); // Squash and then verify hash_internal value + bank0.freeze(); bank0.squash(); assert!(bank0.verify_bank_hash()); + bank1.freeze(); bank1.squash(); bank1.update_accounts_hash(); assert!(bank1.verify_bank_hash()); @@ -7047,6 +7090,7 @@ pub(crate) mod tests { // keypair should have 0 tokens on both forks assert_eq!(bank0.get_account(&keypair.pubkey()), None); assert_eq!(bank1.get_account(&keypair.pubkey()), None); + bank1.force_flush_accounts_cache(); bank1.clean_accounts(false); assert!(bank1.verify_bank_hash()); @@ -8714,7 +8758,11 @@ pub(crate) mod tests { let (genesis_config, _mint_keypair) = create_genesis_config(500); let mut account_indexes = HashSet::new(); account_indexes.insert(AccountIndex::ProgramId); - let bank = Arc::new(Bank::new_with_indexes(&genesis_config, account_indexes)); + let bank = Arc::new(Bank::new_with_config( + &genesis_config, + account_indexes, + false, + )); let address = Pubkey::new_unique(); let program_id = Pubkey::new_unique(); @@ -10151,6 +10199,94 @@ pub(crate) mod tests { assert_eq!(42, bank.get_balance(&program2_pubkey)); } + fn get_shrink_account_size() -> usize { + let (genesis_config, _mint_keypair) = create_genesis_config(1_000_000_000); + + // Set root for bank 0, with caching enabled + let mut bank0 = Arc::new(Bank::new_with_config( + &genesis_config, + HashSet::new(), + false, + )); + bank0.restore_old_behavior_for_fragile_tests(); + goto_end_of_slot(Arc::::get_mut(&mut bank0).unwrap()); + bank0.freeze(); + bank0.squash(); + + let sizes = bank0 + .rc + .accounts + .scan_slot(0, |stored_account| Some(stored_account.stored_size())); + + // Create an account such that it takes SHRINK_RATIO of the total account space for + // the slot, so when it gets pruned, the storage entry will become a shrink candidate. + let bank0_total_size: usize = sizes.into_iter().sum(); + let pubkey0_size = (bank0_total_size as f64 / (1.0 - SHRINK_RATIO)).ceil(); + assert!(pubkey0_size / (pubkey0_size + bank0_total_size as f64) > SHRINK_RATIO); + pubkey0_size as usize + } + + #[test] + fn test_shrink_candidate_slots_cached() { + solana_logger::setup(); + + let (genesis_config, _mint_keypair) = create_genesis_config(1_000_000_000); + let pubkey0 = solana_sdk::pubkey::new_rand(); + let pubkey1 = solana_sdk::pubkey::new_rand(); + let pubkey2 = solana_sdk::pubkey::new_rand(); + + // Set root for bank 0, with caching enabled + let mut bank0 = Arc::new(Bank::new_with_config(&genesis_config, HashSet::new(), true)); + bank0.restore_old_behavior_for_fragile_tests(); + + let pubkey0_size = get_shrink_account_size(); + let account0 = Account::new(1000, pubkey0_size as usize, &Pubkey::new_unique()); + bank0.store_account(&pubkey0, &account0); + + goto_end_of_slot(Arc::::get_mut(&mut bank0).unwrap()); + bank0.freeze(); + bank0.squash(); + + // Store some lamports in bank 1 + let some_lamports = 123; + let mut bank1 = Arc::new(new_from_parent(&bank0)); + bank1.deposit(&pubkey1, some_lamports); + bank1.deposit(&pubkey2, some_lamports); + goto_end_of_slot(Arc::::get_mut(&mut bank1).unwrap()); + + // Store some lamports for pubkey1 in bank 2, root bank 2 + let mut bank2 = Arc::new(new_from_parent(&bank1)); + bank2.deposit(&pubkey1, some_lamports); + bank2.store_account(&pubkey0, &account0); + goto_end_of_slot(Arc::::get_mut(&mut bank2).unwrap()); + bank2.freeze(); + bank2.squash(); + bank2.force_flush_accounts_cache(); + + // Clean accounts, which should add earlier slots to the shrink + // candidate set + bank2.clean_accounts(false); + + // Slots 0 and 1 should be candidates for shrinking, but slot 2 + // shouldn't because none of its accounts are outdated by a later + // root + assert_eq!(bank2.shrink_candidate_slots(), 2); + let alive_counts: Vec = (0..3) + .map(|slot| { + bank2 + .rc + .accounts + .accounts_db + .alive_account_count_in_slot(slot) + }) + .collect(); + + // No more slots should be shrunk + assert_eq!(bank2.shrink_candidate_slots(), 0); + // alive_counts represents the count of alive accounts in the three slots 0,1,2 + assert_eq!(alive_counts, vec![9, 1, 7]); + } + #[test] fn test_process_stale_slot_with_budget() { solana_logger::setup(); @@ -11120,6 +11256,7 @@ pub(crate) mod tests { None, Some(&builtins), HashSet::new(), + false, )); // move to next epoch to create now deprecated rewards sysvar intentionally let bank1 = Arc::new(Bank::new_from_parent( @@ -11347,100 +11484,6 @@ pub(crate) mod tests { ); } - fn setup_bank_with_removable_zero_lamport_account() -> Arc { - let (genesis_config, _mint_keypair) = create_genesis_config(2000); - let bank0 = Bank::new(&genesis_config); - bank0.freeze(); - - let bank1 = Arc::new(Bank::new_from_parent( - &Arc::new(bank0), - &Pubkey::default(), - 1, - )); - - let zero_lamport_pubkey = solana_sdk::pubkey::new_rand(); - - bank1.store_account_and_update_capitalization( - &zero_lamport_pubkey, - &Account::new(0, 0, &Pubkey::default()), - ); - // Store another account in a separate AppendVec than `zero_lamport_pubkey` - // (guaranteed because of large file size). We need this to ensure slot is - // not cleaned up after clean is called, so that the bank hash still exists - // when we call rehash() later in this test. - let large_account_pubkey = solana_sdk::pubkey::new_rand(); - bank1.store_account_and_update_capitalization( - &large_account_pubkey, - &Account::new( - 1000, - bank1.rc.accounts.accounts_db.file_size() as usize, - &Pubkey::default(), - ), - ); - assert_ne!( - bank1 - .rc - .accounts - .accounts_db - .get_append_vec_id(&large_account_pubkey, 1) - .unwrap(), - bank1 - .rc - .accounts - .accounts_db - .get_append_vec_id(&zero_lamport_pubkey, 1) - .unwrap() - ); - - // Make sure rent collection doesn't overwrite `large_account_pubkey`, which - // keeps slot 1 alive in the accounts database. Otherwise, slot 1 and it's bank - // hash would be removed from accounts, preventing `rehash()` from succeeding - bank1.restore_old_behavior_for_fragile_tests(); - bank1.freeze(); - let bank1_hash = bank1.hash(); - - let bank2 = Bank::new_from_parent(&bank1, &Pubkey::default(), 2); - bank2.freeze(); - - // Set a root so clean will happen on this slot - bank1.squash(); - - // All accounts other than `zero_lamport_pubkey` should be updated, which - // means clean should be able to delete the `zero_lamport_pubkey` - bank2.squash(); - - // Bank 1 hash should not change - bank1.rehash(); - let new_bank1_hash = bank1.hash(); - assert_eq!(bank1_hash, new_bank1_hash); - - bank1 - } - - #[test] - fn test_clean_zero_lamport_account_different_hash() { - let bank1 = setup_bank_with_removable_zero_lamport_account(); - let old_hash = bank1.hash(); - - // `zero_lamport_pubkey` should have been deleted, hashes will not match - bank1.clean_accounts(false); - bank1.rehash(); - let new_bank1_hash = bank1.hash(); - assert_ne!(old_hash, new_bank1_hash); - } - - #[test] - fn test_clean_zero_lamport_account_same_hash() { - let bank1 = setup_bank_with_removable_zero_lamport_account(); - let old_hash = bank1.hash(); - - // `zero_lamport_pubkey` will not be deleted, hashes will match - bank1.clean_accounts(true); - bank1.rehash(); - let new_bank1_hash = bank1.hash(); - assert_eq!(old_hash, new_bank1_hash); - } - #[test] fn test_program_is_native_loader() { let (genesis_config, mint_keypair) = create_genesis_config(50000); @@ -11531,7 +11574,7 @@ pub(crate) mod tests { assert!(!debug.is_empty()); } - fn test_store_scan_consistency(update_f: F) + fn test_store_scan_consistency(accounts_db_caching_enabled: bool, update_f: F) where F: Fn(Arc, crossbeam_channel::Sender>, Arc>, Pubkey, u64) + std::marker::Send, @@ -11544,7 +11587,11 @@ pub(crate) mod tests { ) .genesis_config; genesis_config.rent = Rent::free(); - let bank0 = Arc::new(Bank::new(&genesis_config)); + let bank0 = Arc::new(Bank::new_with_config( + &genesis_config, + HashSet::new(), + accounts_db_caching_enabled, + )); // Set up pubkeys to write to let total_pubkeys = ITER_BATCH_SIZE * 10; @@ -11590,7 +11637,7 @@ pub(crate) mod tests { bank_to_scan_receiver.recv_timeout(Duration::from_millis(10)) { let accounts = bank_to_scan.get_program_accounts(&program_id); - // Should never seen empty accounts because no slot ever deleted + // Should never see empty accounts because no slot ever deleted // any of the original accounts, and the scan should reflect the // account state at some frozen slot `X` (no partial updates). assert!(!accounts.is_empty()); @@ -11640,87 +11687,91 @@ pub(crate) mod tests { #[test] fn test_store_scan_consistency_unrooted() { - test_store_scan_consistency( - |bank0, bank_to_scan_sender, pubkeys_to_modify, program_id, starting_lamports| { - let mut current_major_fork_bank = bank0; - loop { - let mut current_minor_fork_bank = current_major_fork_bank.clone(); - let num_new_banks = 2; - let lamports = current_minor_fork_bank.slot() + starting_lamports + 1; - // Modify banks on the two banks on the minor fork - for pubkeys_to_modify in &pubkeys_to_modify - .iter() - .chunks(pubkeys_to_modify.len() / num_new_banks) - { - current_minor_fork_bank = Arc::new(Bank::new_from_parent( - ¤t_minor_fork_bank, - &solana_sdk::pubkey::new_rand(), - current_minor_fork_bank.slot() + 2, - )); - let account = Account::new(lamports, 0, &program_id); - // Write partial updates to each of the banks in the minor fork so if any of them - // get cleaned up, there will be keys with the wrong account value/missing. - for key in pubkeys_to_modify { - current_minor_fork_bank.store_account(key, &account); + for accounts_db_caching_enabled in &[false, true] { + test_store_scan_consistency( + *accounts_db_caching_enabled, + |bank0, bank_to_scan_sender, pubkeys_to_modify, program_id, starting_lamports| { + let mut current_major_fork_bank = bank0; + loop { + let mut current_minor_fork_bank = current_major_fork_bank.clone(); + let num_new_banks = 2; + let lamports = current_minor_fork_bank.slot() + starting_lamports + 1; + // Modify banks on the two banks on the minor fork + for pubkeys_to_modify in &pubkeys_to_modify + .iter() + .chunks(pubkeys_to_modify.len() / num_new_banks) + { + current_minor_fork_bank = Arc::new(Bank::new_from_parent( + ¤t_minor_fork_bank, + &solana_sdk::pubkey::new_rand(), + current_minor_fork_bank.slot() + 2, + )); + let account = Account::new(lamports, 0, &program_id); + // Write partial updates to each of the banks in the minor fork so if any of them + // get cleaned up, there will be keys with the wrong account value/missing. + for key in pubkeys_to_modify { + current_minor_fork_bank.store_account(key, &account); + } + current_minor_fork_bank.freeze(); } - current_minor_fork_bank.freeze(); + + // All the parent banks made in this iteration of the loop + // are currently discoverable, previous parents should have + // been squashed + assert_eq!( + current_minor_fork_bank.clone().parents_inclusive().len(), + num_new_banks + 1, + ); + + // `next_major_bank` needs to be sandwiched between the minor fork banks + // That way, after the squash(), the minor fork has the potential to see a + // *partial* clean of the banks < `next_major_bank`. + current_major_fork_bank = Arc::new(Bank::new_from_parent( + ¤t_major_fork_bank, + &solana_sdk::pubkey::new_rand(), + current_minor_fork_bank.slot() - 1, + )); + let lamports = current_major_fork_bank.slot() + starting_lamports + 1; + let account = Account::new(lamports, 0, &program_id); + for key in pubkeys_to_modify.iter() { + // Store rooted updates to these pubkeys such that the minor + // fork updates to the same keys will be deleted by clean + current_major_fork_bank.store_account(key, &account); + } + + // Send the last new bank to the scan thread to perform the scan. + // Meanwhile this thread will continually set roots on a separate fork + // and squash. + /* + bank 0 + / \ + minor bank 1 \ + / current_major_fork_bank + minor bank 2 + + */ + // The capacity of the channel is 1 so that this thread will wait for the scan to finish before starting + // the next iteration, allowing the scan to stay in sync with these updates + // such that every scan will see this interruption. + if bank_to_scan_sender.send(current_minor_fork_bank).is_err() { + // Channel was disconnected, exit + return; + } + current_major_fork_bank.freeze(); + current_major_fork_bank.squash(); + // Try to get cache flush/clean to overlap with the scan + current_major_fork_bank.force_flush_accounts_cache(); + current_major_fork_bank.clean_accounts(false); } - - // All the parent banks made in this iteration of the loop - // are currently discoverable, previous parents should have - // been squashed - assert_eq!( - current_minor_fork_bank.clone().parents_inclusive().len(), - num_new_banks + 1, - ); - - // `next_major_bank` needs to be sandwiched between the minor fork banks - // That way, after the squash(), the minor fork has the potential to see a - // *partial* clean of the banks < `next_major_bank`. - current_major_fork_bank = Arc::new(Bank::new_from_parent( - ¤t_major_fork_bank, - &solana_sdk::pubkey::new_rand(), - current_minor_fork_bank.slot() - 1, - )); - let lamports = current_major_fork_bank.slot() + starting_lamports + 1; - let account = Account::new(lamports, 0, &program_id); - for key in pubkeys_to_modify.iter() { - // Store rooted updates to these pubkeys such that the minor - // fork updates to the same keys will be deleted by clean - current_major_fork_bank.store_account(key, &account); - } - - // Send the last new bank to the scan thread to perform the scan. - // Meanwhile this thread will continually set roots on a separate fork - // and squash. - /* - bank 0 - / \ - minor bank 1 \ - / current_major_fork_bank - minor bank 2 - - */ - // The capacity of the channel is 1 so that this thread will wait for the scan to finish before starting - // the next iteration, allowing the scan to stay in sync with these updates - // such that every scan will see this interruption. - current_major_fork_bank.freeze(); - current_major_fork_bank.squash(); - if bank_to_scan_sender.send(current_minor_fork_bank).is_err() { - // Channel was disconnected, exit - return; - } - - // Try to get clean to overlap with the scan - current_major_fork_bank.clean_accounts(false); - } - }, - ) + }, + ) + } } #[test] fn test_store_scan_consistency_root() { test_store_scan_consistency( + false, |bank0, bank_to_scan_sender, pubkeys_to_modify, program_id, starting_lamports| { let mut current_bank = bank0.clone(); let mut prev_bank = bank0; @@ -11742,7 +11793,10 @@ pub(crate) mod tests { // Channel was disconnected, exit return; } + + current_bank.freeze(); current_bank.squash(); + current_bank.force_flush_accounts_cache(); current_bank.clean_accounts(true); prev_bank = current_bank.clone(); current_bank = Arc::new(Bank::new_from_parent( diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 95ba4943b4..a9a9884a28 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(RUSTC_WITH_SPECIALIZATION, feature(specialization))] pub mod accounts; pub mod accounts_background_service; +pub mod accounts_cache; pub mod accounts_db; pub mod accounts_index; pub mod append_vec; diff --git a/runtime/src/serde_snapshot.rs b/runtime/src/serde_snapshot.rs index 61482593e8..8067da14b1 100644 --- a/runtime/src/serde_snapshot.rs +++ b/runtime/src/serde_snapshot.rs @@ -117,6 +117,7 @@ where .deserialize_from::(reader) } +#[allow(clippy::too_many_arguments)] pub(crate) fn bank_from_stream( serde_style: SerdeStyle, stream: &mut BufReader, @@ -127,6 +128,7 @@ pub(crate) fn bank_from_stream( debug_keys: Option>>, additional_builtins: Option<&Builtins>, account_indexes: HashSet, + caching_enabled: bool, ) -> std::result::Result where R: Read, @@ -146,6 +148,7 @@ where debug_keys, additional_builtins, account_indexes, + caching_enabled, )?; Ok(bank) }}; @@ -223,6 +226,7 @@ impl<'a, C: TypeContext<'a>> Serialize for SerializableAccountsDB<'a, C> { #[cfg(RUSTC_WITH_SPECIALIZATION)] impl<'a, C> IgnoreAsHelper for SerializableAccountsDB<'a, C> {} +#[allow(clippy::too_many_arguments)] fn reconstruct_bank_from_fields( bank_fields: BankFieldsToDeserialize, accounts_db_fields: AccountsDbFields, @@ -233,6 +237,7 @@ fn reconstruct_bank_from_fields( debug_keys: Option>>, additional_builtins: Option<&Builtins>, account_indexes: HashSet, + caching_enabled: bool, ) -> Result where E: Into, @@ -244,6 +249,7 @@ where append_vecs_path, &genesis_config.cluster_type, account_indexes, + caching_enabled, )?; accounts_db.freeze_accounts(&bank_fields.ancestors, frozen_account_pubkeys); @@ -265,13 +271,18 @@ fn reconstruct_accountsdb_from_fields( stream_append_vecs_path: P, cluster_type: &ClusterType, account_indexes: HashSet, + caching_enabled: bool, ) -> Result where E: Into, P: AsRef, { - let mut accounts_db = - AccountsDB::new_with_indexes(account_paths.to_vec(), cluster_type, account_indexes); + let mut accounts_db = AccountsDB::new_with_config( + account_paths.to_vec(), + cluster_type, + account_indexes, + caching_enabled, + ); let AccountsDbFields(storage, version, slot, bank_hash_info) = accounts_db_fields; // convert to two level map of slot -> id -> account storage entry @@ -365,6 +376,10 @@ where ); } + if max_id > AppendVecId::MAX / 2 { + panic!("Storage id {} larger than allowed max", max_id); + } + accounts_db.next_id.store(max_id + 1, Ordering::Relaxed); accounts_db .write_version diff --git a/runtime/src/serde_snapshot/tests.rs b/runtime/src/serde_snapshot/tests.rs index 6abfefadba..710c254883 100644 --- a/runtime/src/serde_snapshot/tests.rs +++ b/runtime/src/serde_snapshot/tests.rs @@ -70,6 +70,7 @@ where stream_append_vecs_path, &ClusterType::Development, HashSet::new(), + false, ) } @@ -121,7 +122,8 @@ where fn test_accounts_serialize_style(serde_style: SerdeStyle) { solana_logger::setup(); let (_accounts_dir, paths) = get_temp_accounts_paths(4).unwrap(); - let accounts = Accounts::new(paths, &ClusterType::Development); + let accounts = + Accounts::new_with_config(paths, &ClusterType::Development, HashSet::new(), false); let mut pubkeys: Vec = vec![]; create_test_accounts(&accounts, &mut pubkeys, 100, 0); @@ -181,7 +183,9 @@ fn test_bank_serialize_style(serde_style: SerdeStyle) { let key3 = Keypair::new(); bank2.deposit(&key3.pubkey(), 0); + bank2.freeze(); bank2.squash(); + bank2.force_flush_accounts_cache(); let snapshot_storages = bank2.get_snapshot_storages(); let mut buf = vec![]; @@ -214,6 +218,7 @@ fn test_bank_serialize_style(serde_style: SerdeStyle) { None, None, HashSet::new(), + false, ) .unwrap(); dbank.src = ref_sc; diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index b83ce343e5..b0211aca58 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -581,6 +581,7 @@ pub fn remove_snapshot>(slot: Slot, snapshot_path: P) -> Result<( Ok(()) } +#[allow(clippy::too_many_arguments)] pub fn bank_from_archive>( account_paths: &[PathBuf], frozen_account_pubkeys: &[Pubkey], @@ -591,6 +592,7 @@ pub fn bank_from_archive>( debug_keys: Option>>, additional_builtins: Option<&Builtins>, account_indexes: HashSet, + accounts_db_caching_enabled: bool, ) -> Result { // Untar the snapshot into a temporary directory let unpack_dir = tempfile::Builder::new() @@ -616,6 +618,7 @@ pub fn bank_from_archive>( debug_keys, additional_builtins, account_indexes, + accounts_db_caching_enabled, )?; if !bank.verify_snapshot_bank() { @@ -752,6 +755,7 @@ pub fn untar_snapshot_in, Q: AsRef>( Ok(()) } +#[allow(clippy::too_many_arguments)] fn rebuild_bank_from_snapshots

( snapshot_version: &str, account_paths: &[PathBuf], @@ -762,6 +766,7 @@ fn rebuild_bank_from_snapshots

( debug_keys: Option>>, additional_builtins: Option<&Builtins>, account_indexes: HashSet, + accounts_db_caching_enabled: bool, ) -> Result where P: AsRef, @@ -799,6 +804,7 @@ where debug_keys, additional_builtins, account_indexes, + accounts_db_caching_enabled, ), }?) })?; diff --git a/runtime/src/system_instruction_processor.rs b/runtime/src/system_instruction_processor.rs index 9fa9f2a3aa..1441ef395c 100644 --- a/runtime/src/system_instruction_processor.rs +++ b/runtime/src/system_instruction_processor.rs @@ -1144,7 +1144,9 @@ mod tests { #[test] fn test_create_zero_lamport_with_clean() { with_create_zero_lamport(|bank| { + bank.freeze(); bank.squash(); + bank.force_flush_accounts_cache(); // do clean and assert that it actually did its job assert_eq!(3, bank.get_snapshot_storages().len()); bank.clean_accounts(false); diff --git a/runtime/tests/accounts.rs b/runtime/tests/accounts.rs index 2ee328d07c..5593feae07 100644 --- a/runtime/tests/accounts.rs +++ b/runtime/tests/accounts.rs @@ -25,7 +25,7 @@ fn test_shrink_and_clean() { if exit_for_shrink.load(Ordering::Relaxed) { break; } - accounts_for_shrink.process_stale_slot(); + accounts_for_shrink.process_stale_slot_v1(); }); let mut alive_accounts = vec![]; @@ -45,7 +45,7 @@ fn test_shrink_and_clean() { for (pubkey, account) in alive_accounts.iter_mut() { account.lamports -= 1; - accounts.store(current_slot, &[(&pubkey, &account)]); + accounts.store_uncached(current_slot, &[(&pubkey, &account)]); } accounts.add_root(current_slot); } @@ -108,7 +108,7 @@ fn test_bad_bank_hash() { .iter() .map(|idx| (&accounts_keys[*idx].0, &accounts_keys[*idx].1)) .collect(); - db.store(some_slot, &account_refs); + db.store_uncached(some_slot, &account_refs); for (key, account) in &account_refs { assert_eq!( diff --git a/validator/src/main.rs b/validator/src/main.rs index 5e89a40ad3..fe89fc1ac9 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1423,6 +1423,11 @@ pub fn main() { .value_name("INDEX") .help("Enable an accounts index, indexed by the selected account field"), ) + .arg( + Arg::with_name("accounts_db_caching_enabled") + .long("accounts-db-caching-enabled") + .help("Enable accounts caching"), + ) .get_matches(); let identity_keypair = Arc::new(keypair_of(&matches, "identity").unwrap_or_else(Keypair::new)); @@ -1593,6 +1598,7 @@ pub fn main() { poh_pinned_cpu_core: value_of(&matches, "poh_pinned_cpu_core") .unwrap_or(poh_service::DEFAULT_PINNED_CPU_CORE), account_indexes, + accounts_db_caching_enabled: matches.is_present("accounts_db_caching_enabled"), ..ValidatorConfig::default() };