HashCalc: Perf: use refs for hashes (#21280)

This commit is contained in:
Jeff Washington (jwash)
2021-11-16 10:30:55 -06:00
committed by GitHub
parent 7e600bd451
commit ae497715cc

View File

@ -5,8 +5,8 @@ use solana_sdk::{
hash::{Hash, Hasher}, hash::{Hash, Hasher},
pubkey::Pubkey, pubkey::Pubkey,
}; };
use std::borrow::Borrow;
use std::{convert::TryInto, sync::Mutex}; use std::{convert::TryInto, sync::Mutex};
pub const ZERO_RAW_LAMPORTS_SENTINEL: u64 = std::u64::MAX; pub const ZERO_RAW_LAMPORTS_SENTINEL: u64 = std::u64::MAX;
pub const MERKLE_FANOUT: usize = 16; pub const MERKLE_FANOUT: usize = 16;
@ -308,7 +308,7 @@ impl AccountsHash {
// This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs. // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
// The caller provides a function to return a slice from the source data. // The caller provides a function to return a slice from the source data.
pub fn compute_merkle_root_from_slices<'a, F>( pub fn compute_merkle_root_from_slices<'a, F, T>(
total_hashes: usize, total_hashes: usize,
fanout: usize, fanout: usize,
max_levels_per_pass: Option<usize>, max_levels_per_pass: Option<usize>,
@ -316,7 +316,8 @@ impl AccountsHash {
specific_level_count: Option<usize>, specific_level_count: Option<usize>,
) -> (Hash, Vec<Hash>) ) -> (Hash, Vec<Hash>)
where where
F: Fn(usize) -> &'a [Hash] + std::marker::Sync, F: Fn(usize) -> &'a [T] + std::marker::Sync,
T: Borrow<Hash> + std::marker::Sync + 'a,
{ {
if total_hashes == 0 { if total_hashes == 0 {
return (Hasher::default().result(), vec![]); return (Hasher::default().result(), vec![]);
@ -334,7 +335,7 @@ impl AccountsHash {
let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk); let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
// initial fetch - could return entire slice // initial fetch - could return entire slice
let data: &[Hash] = get_hash_slice_starting_at_index(0); let data = get_hash_slice_starting_at_index(0);
let data_len = data.len(); let data_len = data.len();
let result: Vec<_> = (0..chunks) let result: Vec<_> = (0..chunks)
@ -370,7 +371,7 @@ impl AccountsHash {
data_len = data.len(); data_len = data.len();
data_index = 0; data_index = 0;
} }
hasher.hash(data[data_index].as_ref()); hasher.hash(data[data_index].borrow().as_ref());
data_index += 1; data_index += 1;
} }
} else { } else {
@ -425,7 +426,7 @@ impl AccountsHash {
data_len = data.len(); data_len = data.len();
data_index = 0; data_index = 0;
} }
hasher_k.hash(data[data_index].as_ref()); hasher_k.hash(data[data_index].borrow().as_ref());
data_index += 1; data_index += 1;
i += 1; i += 1;
} }
@ -509,12 +510,12 @@ impl AccountsHash {
.expect("overflow is detected while summing capitalization") .expect("overflow is detected while summing capitalization")
} }
fn de_dup_and_eliminate_zeros( fn de_dup_and_eliminate_zeros<'a>(
&self, &self,
sorted_data_by_pubkey: Vec<Vec<Vec<CalculateHashIntermediate>>>, sorted_data_by_pubkey: &'a [Vec<Vec<CalculateHashIntermediate>>],
stats: &mut HashStats, stats: &mut HashStats,
max_bin: usize, max_bin: usize,
) -> (Vec<Vec<Hash>>, u64) { ) -> (Vec<Vec<&'a Hash>>, u64) {
// 1. eliminate zero lamport accounts // 1. eliminate zero lamport accounts
// 2. pick the highest slot or (slot = and highest version) of each pubkey // 2. pick the highest slot or (slot = and highest version) of each pubkey
// 3. produce this output: // 3. produce this output:
@ -523,11 +524,11 @@ impl AccountsHash {
// b. lamports // b. lamports
let mut zeros = Measure::start("eliminate zeros"); let mut zeros = Measure::start("eliminate zeros");
let min_max_sum_entries_hashes = Mutex::new((usize::MAX, usize::MIN, 0u64, 0usize, 0usize)); let min_max_sum_entries_hashes = Mutex::new((usize::MAX, usize::MIN, 0u64, 0usize, 0usize));
let hashes: Vec<Vec<Hash>> = (0..max_bin) let hashes: Vec<Vec<&Hash>> = (0..max_bin)
.into_par_iter() .into_par_iter()
.map(|bin| { .map(|bin| {
let (hashes, lamports_bin, unreduced_entries_count) = let (hashes, lamports_bin, unreduced_entries_count) =
self.de_dup_accounts_in_parallel(&sorted_data_by_pubkey, bin); self.de_dup_accounts_in_parallel(sorted_data_by_pubkey, bin);
{ {
let mut lock = min_max_sum_entries_hashes.lock().unwrap(); let mut lock = min_max_sum_entries_hashes.lock().unwrap();
let (mut min, mut max, mut lamports_sum, mut entries, mut hash_total) = *lock; let (mut min, mut max, mut lamports_sum, mut entries, mut hash_total) = *lock;
@ -600,11 +601,11 @@ impl AccountsHash {
// a. vec: individual hashes in pubkey order // a. vec: individual hashes in pubkey order
// b. lamport sum // b. lamport sum
// c. unreduced count (ie. including duplicates and zero lamport) // c. unreduced count (ie. including duplicates and zero lamport)
fn de_dup_accounts_in_parallel( fn de_dup_accounts_in_parallel<'a>(
&self, &self,
pubkey_division: &[Vec<Vec<CalculateHashIntermediate>>], pubkey_division: &'a [Vec<Vec<CalculateHashIntermediate>>],
pubkey_bin: usize, pubkey_bin: usize,
) -> (Vec<Hash>, u64, usize) { ) -> (Vec<&'a Hash>, u64, usize) {
let len = pubkey_division.len(); let len = pubkey_division.len();
let mut item_len = 0; let mut item_len = 0;
let mut indexes = vec![0; len]; let mut indexes = vec![0; len];
@ -626,7 +627,7 @@ impl AccountsHash {
} }
}); });
let mut overall_sum = 0; let mut overall_sum = 0;
let mut hashes: Vec<Hash> = Vec::with_capacity(item_len); let mut hashes: Vec<&Hash> = Vec::with_capacity(item_len);
let mut duplicate_pubkey_indexes = Vec::with_capacity(len); let mut duplicate_pubkey_indexes = Vec::with_capacity(len);
let filler_accounts_enabled = self.filler_accounts_enabled(); let filler_accounts_enabled = self.filler_accounts_enabled();
@ -676,7 +677,7 @@ impl AccountsHash {
overall_sum = Self::checked_cast_for_capitalization( overall_sum = Self::checked_cast_for_capitalization(
item.lamports as u128 + overall_sum as u128, item.lamports as u128 + overall_sum as u128,
); );
hashes.push(item.hash); hashes.push(&item.hash);
} }
if !duplicate_pubkey_indexes.is_empty() { if !duplicate_pubkey_indexes.is_empty() {
// skip past duplicate keys in earlier slots // skip past duplicate keys in earlier slots
@ -718,14 +719,23 @@ impl AccountsHash {
max_bin: usize, max_bin: usize,
) -> (Hash, u64, PreviousPass) { ) -> (Hash, u64, PreviousPass) {
let (mut hashes, mut total_lamports) = let (mut hashes, mut total_lamports) =
self.de_dup_and_eliminate_zeros(data_sections_by_pubkey, stats, max_bin); self.de_dup_and_eliminate_zeros(&data_sections_by_pubkey, stats, max_bin);
total_lamports += previous_state.lamports; total_lamports += previous_state.lamports;
let mut _remaining_unhashed = None;
if !previous_state.remaining_unhashed.is_empty() { if !previous_state.remaining_unhashed.is_empty() {
// These items were not hashed last iteration because they didn't divide evenly. // These items were not hashed last iteration because they didn't divide evenly.
// These are hashes for pubkeys that are < the pubkeys we are looking at now, so their hashes go first in order. // These are hashes for pubkeys that are < the pubkeys we are looking at now, so their hashes go first in order.
hashes.insert(0, previous_state.remaining_unhashed); _remaining_unhashed = Some(previous_state.remaining_unhashed);
hashes.insert(
0,
_remaining_unhashed
.as_ref()
.unwrap()
.iter()
.collect::<Vec<_>>(),
);
previous_state.remaining_unhashed = Vec::new(); previous_state.remaining_unhashed = Vec::new();
} }
@ -748,7 +758,7 @@ impl AccountsHash {
let mut i = hash_total - left_over_hashes; let mut i = hash_total - left_over_hashes;
while i < hash_total { while i < hash_total {
let data = cumulative.get_slice(&hashes, i); let data = cumulative.get_slice(&hashes, i);
next_pass.remaining_unhashed.extend(data); next_pass.remaining_unhashed.extend(data.iter().cloned());
i += data.len(); i += data.len();
} }
@ -780,7 +790,9 @@ impl AccountsHash {
// we never made partial progress, so hash everything now // we never made partial progress, so hash everything now
hashes.into_iter().for_each(|v| { hashes.into_iter().for_each(|v| {
if !v.is_empty() { if !v.is_empty() {
next_pass.reduced_hashes.push(v); next_pass
.reduced_hashes
.push(v.into_iter().cloned().collect());
} }
}); });
} }
@ -1211,9 +1223,9 @@ pub mod tests {
#[test] #[test]
fn test_accountsdb_de_dup_accounts_zero_chunks() { fn test_accountsdb_de_dup_accounts_zero_chunks() {
let (hashes, lamports, _) = AccountsHash::default() let vec = [vec![vec![CalculateHashIntermediate::default()]]];
.de_dup_accounts_in_parallel(&[vec![vec![CalculateHashIntermediate::default()]]], 0); let (hashes, lamports, _) = AccountsHash::default().de_dup_accounts_in_parallel(&vec, 0);
assert_eq!(vec![Hash::default()], hashes); assert_eq!(vec![&Hash::default()], hashes);
assert_eq!(lamports, 0); assert_eq!(lamports, 0);
} }
@ -1222,32 +1234,27 @@ pub mod tests {
solana_logger::setup(); solana_logger::setup();
let accounts_hash = AccountsHash::default(); let accounts_hash = AccountsHash::default();
let (hashes, lamports) = accounts_hash.de_dup_and_eliminate_zeros( let vec = vec![vec![], vec![]];
vec![vec![], vec![]], let (hashes, lamports) =
&mut HashStats::default(), accounts_hash.de_dup_and_eliminate_zeros(&vec, &mut HashStats::default(), one_range());
one_range(),
);
assert_eq!( assert_eq!(
vec![Hash::default(); 0], vec![&Hash::default(); 0],
hashes.into_iter().flatten().collect::<Vec<_>>() hashes.into_iter().flatten().collect::<Vec<_>>()
); );
assert_eq!(lamports, 0); assert_eq!(lamports, 0);
let vec = vec![];
let (hashes, lamports) = accounts_hash.de_dup_and_eliminate_zeros( let (hashes, lamports) =
vec![], accounts_hash.de_dup_and_eliminate_zeros(&vec, &mut HashStats::default(), zero_range());
&mut HashStats::default(), let empty: Vec<Vec<&Hash>> = Vec::default();
zero_range(),
);
let empty: Vec<Vec<Hash>> = Vec::default();
assert_eq!(empty, hashes); assert_eq!(empty, hashes);
assert_eq!(lamports, 0); assert_eq!(lamports, 0);
let (hashes, lamports, _) = accounts_hash.de_dup_accounts_in_parallel(&[], 1); let (hashes, lamports, _) = accounts_hash.de_dup_accounts_in_parallel(&[], 1);
assert_eq!(vec![Hash::default(); 0], hashes); assert_eq!(vec![&Hash::default(); 0], hashes);
assert_eq!(lamports, 0); assert_eq!(lamports, 0);
let (hashes, lamports, _) = accounts_hash.de_dup_accounts_in_parallel(&[], 2); let (hashes, lamports, _) = accounts_hash.de_dup_accounts_in_parallel(&[], 2);
assert_eq!(vec![Hash::default(); 0], hashes); assert_eq!(vec![&Hash::default(); 0], hashes);
assert_eq!(lamports, 0); assert_eq!(lamports, 0);
} }
@ -1330,18 +1337,21 @@ pub mod tests {
let slice = &slice2[..]; let slice = &slice2[..];
let (hashes2, lamports2, _) = hash.de_dup_accounts_in_parallel(slice, 0); let (hashes2, lamports2, _) = hash.de_dup_accounts_in_parallel(slice, 0);
let (hashes3, lamports3, _) = hash.de_dup_accounts_in_parallel(slice, 0); let (hashes3, lamports3, _) = hash.de_dup_accounts_in_parallel(slice, 0);
let vec = slice.to_vec();
let (hashes4, lamports4) = hash.de_dup_and_eliminate_zeros( let (hashes4, lamports4) = hash.de_dup_and_eliminate_zeros(
slice.to_vec(), &vec,
&mut HashStats::default(), &mut HashStats::default(),
end - start, end - start,
); );
let vec = slice.to_vec();
let (hashes5, lamports5) = hash.de_dup_and_eliminate_zeros( let (hashes5, lamports5) = hash.de_dup_and_eliminate_zeros(
slice.to_vec(), &vec,
&mut HashStats::default(), &mut HashStats::default(),
end - start, end - start,
); );
let vec = slice.to_vec();
let (hashes6, lamports6) = hash.de_dup_and_eliminate_zeros( let (hashes6, lamports6) = hash.de_dup_and_eliminate_zeros(
slice.to_vec(), &vec,
&mut HashStats::default(), &mut HashStats::default(),
end - start, end - start,
); );
@ -1451,10 +1461,9 @@ pub mod tests {
} }
fn test_de_dup_accounts_in_parallel( fn test_de_dup_accounts_in_parallel(
account_maps: &[CalculateHashIntermediate], account_maps: &[Vec<Vec<CalculateHashIntermediate>>],
) -> (Vec<Hash>, u64, usize) { ) -> (Vec<&Hash>, u64, usize) {
AccountsHash::default() AccountsHash::default().de_dup_accounts_in_parallel(account_maps, 0)
.de_dup_accounts_in_parallel(&vec![vec![account_maps.to_vec()]][..], 0)
} }
#[test] #[test]
@ -1467,14 +1476,16 @@ pub mod tests {
let val = CalculateHashIntermediate::new(hash, 1, key); let val = CalculateHashIntermediate::new(hash, 1, key);
account_maps.push(val.clone()); account_maps.push(val.clone());
let result = test_de_dup_accounts_in_parallel(&account_maps[..]); let vecs = vec![vec![account_maps.to_vec()]];
assert_eq!(result, (vec![val.hash], val.lamports as u64, 1)); let result = test_de_dup_accounts_in_parallel(&vecs);
assert_eq!(result, (vec![&val.hash], val.lamports as u64, 1));
// zero original lamports, higher version // zero original lamports, higher version
let val = CalculateHashIntermediate::new(hash, ZERO_RAW_LAMPORTS_SENTINEL, key); let val = CalculateHashIntermediate::new(hash, ZERO_RAW_LAMPORTS_SENTINEL, key);
account_maps.push(val); // has to be after previous entry since account_maps are in slot order account_maps.push(val); // has to be after previous entry since account_maps are in slot order
let result = test_de_dup_accounts_in_parallel(&account_maps[..]); let vecs = vec![vec![account_maps.to_vec()]];
let result = test_de_dup_accounts_in_parallel(&vecs);
assert_eq!(result, (vec![], 0, 2)); assert_eq!(result, (vec![], 0, 2));
} }
@ -1866,7 +1877,7 @@ pub mod tests {
)], )],
]; ];
AccountsHash::default().de_dup_and_eliminate_zeros( AccountsHash::default().de_dup_and_eliminate_zeros(
vec![input], &[input],
&mut HashStats::default(), &mut HashStats::default(),
2, // accounts above are in 2 groups 2, // accounts above are in 2 groups
); );