diff --git a/clap-utils/src/input_validators.rs b/clap-utils/src/input_validators.rs index e9a8b9f3f9..e96fa46580 100644 --- a/clap-utils/src/input_validators.rs +++ b/clap-utils/src/input_validators.rs @@ -237,16 +237,16 @@ where is_parsable_generic::(slot) } -pub fn is_bin(bins: T) -> Result<(), String> +pub fn is_pow2(bins: T) -> Result<(), String> where T: AsRef + Display, { bins.as_ref() .parse::() - .map_err(|e| format!("Unable to parse bins, provided: {}, err: {}", bins, e)) + .map_err(|e| format!("Unable to parse, provided: {}, err: {}", bins, e)) .and_then(|v| { if !v.is_power_of_two() { - Err(format!("Bins must be a power of 2: {}", v)) + Err(format!("Must be a power of 2: {}", v)) } else { Ok(()) } diff --git a/core/src/accounts_hash_verifier.rs b/core/src/accounts_hash_verifier.rs index 62ff6af61d..ce5c56bffa 100644 --- a/core/src/accounts_hash_verifier.rs +++ b/core/src/accounts_hash_verifier.rs @@ -135,6 +135,7 @@ impl AccountsHashVerifier { false, None, None, // this will fail with filler accounts + None, // this code path is only for testing, so use default # passes here ) .unwrap(); diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index d004c202b5..564f21c95a 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -12,7 +12,7 @@ use serde_json::json; use solana_clap_utils::{ input_parsers::{cluster_type_of, pubkey_of, pubkeys_of}, input_validators::{ - is_bin, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot, is_valid_percentage, + is_parsable, is_pow2, is_pubkey, is_pubkey_or_keypair, is_slot, is_valid_percentage, }, }; use solana_entry::entry::Entry; @@ -870,7 +870,7 @@ fn main() { let accounts_index_bins = Arg::with_name("accounts_index_bins") .long("accounts-index-bins") .value_name("BINS") - .validator(is_bin) + .validator(is_pow2) .takes_value(true) .help("Number of bins to divide the accounts index into"); let accounts_index_limit = Arg::with_name("accounts_index_memory_limit_mb") @@ -1996,6 +1996,7 @@ fn main() { index: Some(accounts_index_config), accounts_hash_cache_path: Some(ledger_path.clone()), filler_account_count, + ..AccountsDbConfig::default() }); let process_options = ProcessOptions { diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index a80a327196..bb0eed9700 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -93,12 +93,7 @@ pub const DEFAULT_NUM_DIRS: u32 = 4; // When calculating hashes, it is helpful to break the pubkeys found into bins based on the pubkey value. // More bins means smaller vectors to sort, copy, etc. pub const PUBKEY_BINS_FOR_CALCULATING_HASHES: usize = 65536; -// # of passes should be a function of the total # of accounts that are active. -// higher passes = slower total time, lower dynamic memory usage -// lower passes = faster total time, higher dynamic memory usage -// passes=2 cuts dynamic memory usage in approximately half. -pub const NUM_SCAN_PASSES: usize = 2; -pub const BINS_PER_PASS: usize = PUBKEY_BINS_FOR_CALCULATING_HASHES / NUM_SCAN_PASSES; +pub const NUM_SCAN_PASSES_DEFAULT: usize = 2; // Without chunks, we end up with 1 output vec for each outer snapshot storage. // This results in too many vectors to be efficient. @@ -131,11 +126,13 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_TESTING), accounts_hash_cache_path: None, filler_account_count: None, + hash_calc_num_passes: None, }; pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS), accounts_hash_cache_path: None, filler_account_count: None, + hash_calc_num_passes: None, }; pub type BinnedHashData = Vec>; @@ -145,6 +142,7 @@ pub struct AccountsDbConfig { pub index: Option, pub accounts_hash_cache_path: Option, pub filler_account_count: Option, + pub hash_calc_num_passes: Option, } struct FoundStoredAccount<'a> { @@ -1053,6 +1051,12 @@ pub struct AccountsDb { filler_account_count: usize, pub filler_account_suffix: Option, + + // # of passes should be a function of the total # of accounts that are active. + // higher passes = slower total time, lower dynamic memory usage + // lower passes = faster total time, higher dynamic memory usage + // passes=2 cuts dynamic memory usage in approximately half. + pub num_hash_scan_passes: Option, } #[derive(Debug, Default)] @@ -1491,12 +1495,30 @@ type GenerateIndexAccountsMap<'a> = HashMap>; impl AccountsDb { pub fn default_for_tests() -> Self { - Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None) + Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None, None) + } + + /// return (num_hash_scan_passes, bins_per_pass) + fn bins_per_pass(num_hash_scan_passes: Option) -> (usize, usize) { + let num_hash_scan_passes = num_hash_scan_passes.unwrap_or(NUM_SCAN_PASSES_DEFAULT); + let bins_per_pass = PUBKEY_BINS_FOR_CALCULATING_HASHES / num_hash_scan_passes; + assert!( + num_hash_scan_passes <= PUBKEY_BINS_FOR_CALCULATING_HASHES, + "num_hash_scan_passes must be <= {}", + PUBKEY_BINS_FOR_CALCULATING_HASHES + ); + assert_eq!( + bins_per_pass * num_hash_scan_passes, + PUBKEY_BINS_FOR_CALCULATING_HASHES + ); // evenly divisible + + (num_hash_scan_passes, bins_per_pass) } fn default_with_accounts_index( accounts_index: AccountInfoAccountsIndex, accounts_hash_cache_path: Option, + num_hash_scan_passes: Option, ) -> Self { let num_threads = get_thread_count(); const MAX_READ_ONLY_CACHE_DATA_SIZE: usize = 200_000_000; @@ -1513,6 +1535,10 @@ impl AccountsDb { let mut bank_hashes = HashMap::new(); bank_hashes.insert(0, BankHashInfo::default()); + + // validate inside here + Self::bins_per_pass(num_hash_scan_passes); + AccountsDb { accounts_index, storage: AccountStorage::default(), @@ -1559,6 +1585,7 @@ impl AccountsDb { accounts_update_notifier: None, filler_account_count: 0, filler_account_suffix: None, + num_hash_scan_passes, } } @@ -1607,7 +1634,13 @@ impl AccountsDb { accounts_update_notifier, filler_account_count, filler_account_suffix, - ..Self::default_with_accounts_index(accounts_index, accounts_hash_cache_path) + ..Self::default_with_accounts_index( + accounts_index, + accounts_hash_cache_path, + accounts_db_config + .as_ref() + .and_then(|cfg| cfg.hash_calc_num_passes), + ) }; if paths_is_empty { // Create a temporary set of accounts directories, used primarily @@ -5408,6 +5441,7 @@ impl AccountsDb { } else { None }, + self.num_hash_scan_passes, ) } else { self.calculate_accounts_hash(slot, ancestors, check_hash) @@ -5608,21 +5642,19 @@ impl AccountsDb { &AccountInfoAccountsIndex, )>, filler_account_suffix: Option<&Pubkey>, + num_hash_scan_passes: Option, ) -> Result<(Hash, u64), BankHashVerificationError> { + let (num_hash_scan_passes, bins_per_pass) = Self::bins_per_pass(num_hash_scan_passes); let mut scan_and_hash = move || { - assert_eq!( - BINS_PER_PASS * NUM_SCAN_PASSES, - PUBKEY_BINS_FOR_CALCULATING_HASHES - ); // evenly divisible let mut previous_pass = PreviousPass::default(); let mut final_result = (Hash::default(), 0); let cache_hash_data = CacheHashData::new(&accounts_hash_cache_path); - for pass in 0..NUM_SCAN_PASSES { + for pass in 0..num_hash_scan_passes { let bounds = Range { - start: pass * BINS_PER_PASS, - end: (pass + 1) * BINS_PER_PASS, + start: pass * bins_per_pass, + end: (pass + 1) * bins_per_pass, }; let result = Self::scan_snapshot_stores_with_cache( @@ -5642,9 +5674,9 @@ impl AccountsDb { let (hash, lamports, for_next_pass) = hash.rest_of_hash_calculation( result, &mut stats, - pass == NUM_SCAN_PASSES - 1, + pass == num_hash_scan_passes - 1, previous_pass, - BINS_PER_PASS, + bins_per_pass, ); previous_pass = for_next_pass; final_result = (hash, lamports); @@ -7669,6 +7701,7 @@ pub mod tests { false, None, None, + None, ) .unwrap(); let expected_hash = Hash::from_str("GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn").unwrap(); @@ -7693,6 +7726,7 @@ pub mod tests { false, None, None, + None, ) .unwrap(); diff --git a/validator/src/main.rs b/validator/src/main.rs index 8a0774d2a9..ad63017ad7 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -10,7 +10,7 @@ use { solana_clap_utils::{ input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of}, input_validators::{ - is_bin, is_keypair, is_keypair_or_ask_keyword, is_parsable, is_pubkey, + is_keypair, is_keypair_or_ask_keyword, is_parsable, is_pow2, is_pubkey, is_pubkey_or_keypair, is_slot, is_valid_percentage, }, keypair::SKIP_SEED_PHRASE_VALIDATION_ARG, @@ -1396,10 +1396,18 @@ pub fn main() { Arg::with_name("accounts_index_bins") .long("accounts-index-bins") .value_name("BINS") - .validator(is_bin) + .validator(is_pow2) .takes_value(true) .help("Number of bins to divide the accounts index into"), ) + .arg( + Arg::with_name("accounts_hash_num_passes") + .long("accounts-hash-num-passes") + .value_name("PASSES") + .validator(is_pow2) + .takes_value(true) + .help("Number of passes to calculate the hash of all accounts"), + ) .arg( Arg::with_name("accounts_index_path") .long("accounts-index-path") @@ -1987,11 +1995,17 @@ pub fn main() { } let filler_account_count = value_t!(matches, "accounts_filler_count", usize).ok(); - let accounts_db_config = Some(AccountsDbConfig { + let mut accounts_db_config = AccountsDbConfig { index: Some(accounts_index_config), accounts_hash_cache_path: Some(ledger_path.clone()), filler_account_count, - }); + ..AccountsDbConfig::default() + }; + + if let Some(passes) = value_t!(matches, "accounts_hash_num_passes", usize).ok() { + accounts_db_config.hash_calc_num_passes = Some(passes); + } + let accounts_db_config = Some(accounts_db_config); let accountsdb_repl_service_config = if matches.is_present("enable_accountsdb_repl") { let accountsdb_repl_bind_address = if matches.is_present("accountsdb_repl_bind_address") {