add cli for --accounts-hash-num-passes (#20827)

This commit is contained in:
Jeff Washington (jwash)
2021-10-25 09:45:46 -05:00
committed by GitHub
parent e03dc9e8e3
commit 43ea579f63
5 changed files with 76 additions and 26 deletions

View File

@ -237,16 +237,16 @@ where
is_parsable_generic::<Slot, _>(slot) is_parsable_generic::<Slot, _>(slot)
} }
pub fn is_bin<T>(bins: T) -> Result<(), String> pub fn is_pow2<T>(bins: T) -> Result<(), String>
where where
T: AsRef<str> + Display, T: AsRef<str> + Display,
{ {
bins.as_ref() bins.as_ref()
.parse::<usize>() .parse::<usize>()
.map_err(|e| format!("Unable to parse bins, provided: {}, err: {}", bins, e)) .map_err(|e| format!("Unable to parse, provided: {}, err: {}", bins, e))
.and_then(|v| { .and_then(|v| {
if !v.is_power_of_two() { if !v.is_power_of_two() {
Err(format!("Bins must be a power of 2: {}", v)) Err(format!("Must be a power of 2: {}", v))
} else { } else {
Ok(()) Ok(())
} }

View File

@ -135,6 +135,7 @@ impl AccountsHashVerifier {
false, false,
None, None,
None, // this will fail with filler accounts None, // this will fail with filler accounts
None, // this code path is only for testing, so use default # passes here
) )
.unwrap(); .unwrap();

View File

@ -12,7 +12,7 @@ use serde_json::json;
use solana_clap_utils::{ use solana_clap_utils::{
input_parsers::{cluster_type_of, pubkey_of, pubkeys_of}, input_parsers::{cluster_type_of, pubkey_of, pubkeys_of},
input_validators::{ input_validators::{
is_bin, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot, is_valid_percentage, is_parsable, is_pow2, is_pubkey, is_pubkey_or_keypair, is_slot, is_valid_percentage,
}, },
}; };
use solana_entry::entry::Entry; use solana_entry::entry::Entry;
@ -870,7 +870,7 @@ fn main() {
let accounts_index_bins = Arg::with_name("accounts_index_bins") let accounts_index_bins = Arg::with_name("accounts_index_bins")
.long("accounts-index-bins") .long("accounts-index-bins")
.value_name("BINS") .value_name("BINS")
.validator(is_bin) .validator(is_pow2)
.takes_value(true) .takes_value(true)
.help("Number of bins to divide the accounts index into"); .help("Number of bins to divide the accounts index into");
let accounts_index_limit = Arg::with_name("accounts_index_memory_limit_mb") let accounts_index_limit = Arg::with_name("accounts_index_memory_limit_mb")
@ -1996,6 +1996,7 @@ fn main() {
index: Some(accounts_index_config), index: Some(accounts_index_config),
accounts_hash_cache_path: Some(ledger_path.clone()), accounts_hash_cache_path: Some(ledger_path.clone()),
filler_account_count, filler_account_count,
..AccountsDbConfig::default()
}); });
let process_options = ProcessOptions { let process_options = ProcessOptions {

View File

@ -93,12 +93,7 @@ pub const DEFAULT_NUM_DIRS: u32 = 4;
// When calculating hashes, it is helpful to break the pubkeys found into bins based on the pubkey value. // When calculating hashes, it is helpful to break the pubkeys found into bins based on the pubkey value.
// More bins means smaller vectors to sort, copy, etc. // More bins means smaller vectors to sort, copy, etc.
pub const PUBKEY_BINS_FOR_CALCULATING_HASHES: usize = 65536; pub const PUBKEY_BINS_FOR_CALCULATING_HASHES: usize = 65536;
// # of passes should be a function of the total # of accounts that are active. pub const NUM_SCAN_PASSES_DEFAULT: usize = 2;
// higher passes = slower total time, lower dynamic memory usage
// lower passes = faster total time, higher dynamic memory usage
// passes=2 cuts dynamic memory usage in approximately half.
pub const NUM_SCAN_PASSES: usize = 2;
pub const BINS_PER_PASS: usize = PUBKEY_BINS_FOR_CALCULATING_HASHES / NUM_SCAN_PASSES;
// Without chunks, we end up with 1 output vec for each outer snapshot storage. // Without chunks, we end up with 1 output vec for each outer snapshot storage.
// This results in too many vectors to be efficient. // This results in too many vectors to be efficient.
@ -131,11 +126,13 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig {
index: Some(ACCOUNTS_INDEX_CONFIG_FOR_TESTING), index: Some(ACCOUNTS_INDEX_CONFIG_FOR_TESTING),
accounts_hash_cache_path: None, accounts_hash_cache_path: None,
filler_account_count: None, filler_account_count: None,
hash_calc_num_passes: None,
}; };
pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig { pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig {
index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS), index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS),
accounts_hash_cache_path: None, accounts_hash_cache_path: None,
filler_account_count: None, filler_account_count: None,
hash_calc_num_passes: None,
}; };
pub type BinnedHashData = Vec<Vec<CalculateHashIntermediate>>; pub type BinnedHashData = Vec<Vec<CalculateHashIntermediate>>;
@ -145,6 +142,7 @@ pub struct AccountsDbConfig {
pub index: Option<AccountsIndexConfig>, pub index: Option<AccountsIndexConfig>,
pub accounts_hash_cache_path: Option<PathBuf>, pub accounts_hash_cache_path: Option<PathBuf>,
pub filler_account_count: Option<usize>, pub filler_account_count: Option<usize>,
pub hash_calc_num_passes: Option<usize>,
} }
struct FoundStoredAccount<'a> { struct FoundStoredAccount<'a> {
@ -1053,6 +1051,12 @@ pub struct AccountsDb {
filler_account_count: usize, filler_account_count: usize,
pub filler_account_suffix: Option<Pubkey>, pub filler_account_suffix: Option<Pubkey>,
// # of passes should be a function of the total # of accounts that are active.
// higher passes = slower total time, lower dynamic memory usage
// lower passes = faster total time, higher dynamic memory usage
// passes=2 cuts dynamic memory usage in approximately half.
pub num_hash_scan_passes: Option<usize>,
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -1491,12 +1495,30 @@ type GenerateIndexAccountsMap<'a> = HashMap<Pubkey, IndexAccountMapEntry<'a>>;
impl AccountsDb { impl AccountsDb {
pub fn default_for_tests() -> Self { pub fn default_for_tests() -> Self {
Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None) Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None, None)
}
/// return (num_hash_scan_passes, bins_per_pass)
fn bins_per_pass(num_hash_scan_passes: Option<usize>) -> (usize, usize) {
let num_hash_scan_passes = num_hash_scan_passes.unwrap_or(NUM_SCAN_PASSES_DEFAULT);
let bins_per_pass = PUBKEY_BINS_FOR_CALCULATING_HASHES / num_hash_scan_passes;
assert!(
num_hash_scan_passes <= PUBKEY_BINS_FOR_CALCULATING_HASHES,
"num_hash_scan_passes must be <= {}",
PUBKEY_BINS_FOR_CALCULATING_HASHES
);
assert_eq!(
bins_per_pass * num_hash_scan_passes,
PUBKEY_BINS_FOR_CALCULATING_HASHES
); // evenly divisible
(num_hash_scan_passes, bins_per_pass)
} }
fn default_with_accounts_index( fn default_with_accounts_index(
accounts_index: AccountInfoAccountsIndex, accounts_index: AccountInfoAccountsIndex,
accounts_hash_cache_path: Option<PathBuf>, accounts_hash_cache_path: Option<PathBuf>,
num_hash_scan_passes: Option<usize>,
) -> Self { ) -> Self {
let num_threads = get_thread_count(); let num_threads = get_thread_count();
const MAX_READ_ONLY_CACHE_DATA_SIZE: usize = 200_000_000; const MAX_READ_ONLY_CACHE_DATA_SIZE: usize = 200_000_000;
@ -1513,6 +1535,10 @@ impl AccountsDb {
let mut bank_hashes = HashMap::new(); let mut bank_hashes = HashMap::new();
bank_hashes.insert(0, BankHashInfo::default()); bank_hashes.insert(0, BankHashInfo::default());
// validate inside here
Self::bins_per_pass(num_hash_scan_passes);
AccountsDb { AccountsDb {
accounts_index, accounts_index,
storage: AccountStorage::default(), storage: AccountStorage::default(),
@ -1559,6 +1585,7 @@ impl AccountsDb {
accounts_update_notifier: None, accounts_update_notifier: None,
filler_account_count: 0, filler_account_count: 0,
filler_account_suffix: None, filler_account_suffix: None,
num_hash_scan_passes,
} }
} }
@ -1607,7 +1634,13 @@ impl AccountsDb {
accounts_update_notifier, accounts_update_notifier,
filler_account_count, filler_account_count,
filler_account_suffix, filler_account_suffix,
..Self::default_with_accounts_index(accounts_index, accounts_hash_cache_path) ..Self::default_with_accounts_index(
accounts_index,
accounts_hash_cache_path,
accounts_db_config
.as_ref()
.and_then(|cfg| cfg.hash_calc_num_passes),
)
}; };
if paths_is_empty { if paths_is_empty {
// Create a temporary set of accounts directories, used primarily // Create a temporary set of accounts directories, used primarily
@ -5408,6 +5441,7 @@ impl AccountsDb {
} else { } else {
None None
}, },
self.num_hash_scan_passes,
) )
} else { } else {
self.calculate_accounts_hash(slot, ancestors, check_hash) self.calculate_accounts_hash(slot, ancestors, check_hash)
@ -5608,21 +5642,19 @@ impl AccountsDb {
&AccountInfoAccountsIndex, &AccountInfoAccountsIndex,
)>, )>,
filler_account_suffix: Option<&Pubkey>, filler_account_suffix: Option<&Pubkey>,
num_hash_scan_passes: Option<usize>,
) -> Result<(Hash, u64), BankHashVerificationError> { ) -> Result<(Hash, u64), BankHashVerificationError> {
let (num_hash_scan_passes, bins_per_pass) = Self::bins_per_pass(num_hash_scan_passes);
let mut scan_and_hash = move || { let mut scan_and_hash = move || {
assert_eq!(
BINS_PER_PASS * NUM_SCAN_PASSES,
PUBKEY_BINS_FOR_CALCULATING_HASHES
); // evenly divisible
let mut previous_pass = PreviousPass::default(); let mut previous_pass = PreviousPass::default();
let mut final_result = (Hash::default(), 0); let mut final_result = (Hash::default(), 0);
let cache_hash_data = CacheHashData::new(&accounts_hash_cache_path); let cache_hash_data = CacheHashData::new(&accounts_hash_cache_path);
for pass in 0..NUM_SCAN_PASSES { for pass in 0..num_hash_scan_passes {
let bounds = Range { let bounds = Range {
start: pass * BINS_PER_PASS, start: pass * bins_per_pass,
end: (pass + 1) * BINS_PER_PASS, end: (pass + 1) * bins_per_pass,
}; };
let result = Self::scan_snapshot_stores_with_cache( let result = Self::scan_snapshot_stores_with_cache(
@ -5642,9 +5674,9 @@ impl AccountsDb {
let (hash, lamports, for_next_pass) = hash.rest_of_hash_calculation( let (hash, lamports, for_next_pass) = hash.rest_of_hash_calculation(
result, result,
&mut stats, &mut stats,
pass == NUM_SCAN_PASSES - 1, pass == num_hash_scan_passes - 1,
previous_pass, previous_pass,
BINS_PER_PASS, bins_per_pass,
); );
previous_pass = for_next_pass; previous_pass = for_next_pass;
final_result = (hash, lamports); final_result = (hash, lamports);
@ -7669,6 +7701,7 @@ pub mod tests {
false, false,
None, None,
None, None,
None,
) )
.unwrap(); .unwrap();
let expected_hash = Hash::from_str("GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn").unwrap(); let expected_hash = Hash::from_str("GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn").unwrap();
@ -7693,6 +7726,7 @@ pub mod tests {
false, false,
None, None,
None, None,
None,
) )
.unwrap(); .unwrap();

View File

@ -10,7 +10,7 @@ use {
solana_clap_utils::{ solana_clap_utils::{
input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of}, input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of},
input_validators::{ input_validators::{
is_bin, is_keypair, is_keypair_or_ask_keyword, is_parsable, is_pubkey, is_keypair, is_keypair_or_ask_keyword, is_parsable, is_pow2, is_pubkey,
is_pubkey_or_keypair, is_slot, is_valid_percentage, is_pubkey_or_keypair, is_slot, is_valid_percentage,
}, },
keypair::SKIP_SEED_PHRASE_VALIDATION_ARG, keypair::SKIP_SEED_PHRASE_VALIDATION_ARG,
@ -1396,10 +1396,18 @@ pub fn main() {
Arg::with_name("accounts_index_bins") Arg::with_name("accounts_index_bins")
.long("accounts-index-bins") .long("accounts-index-bins")
.value_name("BINS") .value_name("BINS")
.validator(is_bin) .validator(is_pow2)
.takes_value(true) .takes_value(true)
.help("Number of bins to divide the accounts index into"), .help("Number of bins to divide the accounts index into"),
) )
.arg(
Arg::with_name("accounts_hash_num_passes")
.long("accounts-hash-num-passes")
.value_name("PASSES")
.validator(is_pow2)
.takes_value(true)
.help("Number of passes to calculate the hash of all accounts"),
)
.arg( .arg(
Arg::with_name("accounts_index_path") Arg::with_name("accounts_index_path")
.long("accounts-index-path") .long("accounts-index-path")
@ -1987,11 +1995,17 @@ pub fn main() {
} }
let filler_account_count = value_t!(matches, "accounts_filler_count", usize).ok(); let filler_account_count = value_t!(matches, "accounts_filler_count", usize).ok();
let accounts_db_config = Some(AccountsDbConfig { let mut accounts_db_config = AccountsDbConfig {
index: Some(accounts_index_config), index: Some(accounts_index_config),
accounts_hash_cache_path: Some(ledger_path.clone()), accounts_hash_cache_path: Some(ledger_path.clone()),
filler_account_count, filler_account_count,
}); ..AccountsDbConfig::default()
};
if let Some(passes) = value_t!(matches, "accounts_hash_num_passes", usize).ok() {
accounts_db_config.hash_calc_num_passes = Some(passes);
}
let accounts_db_config = Some(accounts_db_config);
let accountsdb_repl_service_config = if matches.is_present("enable_accountsdb_repl") { let accountsdb_repl_service_config = if matches.is_present("enable_accountsdb_repl") {
let accountsdb_repl_bind_address = if matches.is_present("accountsdb_repl_bind_address") { let accountsdb_repl_bind_address = if matches.is_present("accountsdb_repl_bind_address") {