Improve load_largest_accounts more (#15785)
* Add load_largest_accounts bench * Check lamports before address filter * Use BinaryHeap, add Accounts test * Use pubkey reference in the min-heap Also, flatten code with early returns Co-authored-by: Greg Fitzgerald <greg@solana.com>
This commit is contained in:
		@@ -7,7 +7,7 @@ use dashmap::DashMap;
 | 
			
		||||
use rand::Rng;
 | 
			
		||||
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 | 
			
		||||
use solana_runtime::{
 | 
			
		||||
    accounts::{create_test_accounts, Accounts},
 | 
			
		||||
    accounts::{create_test_accounts, AccountAddressFilter, Accounts},
 | 
			
		||||
    bank::*,
 | 
			
		||||
};
 | 
			
		||||
use solana_sdk::{
 | 
			
		||||
@@ -360,3 +360,25 @@ fn bench_dashmap_iter(bencher: &mut Bencher) {
 | 
			
		||||
        );
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[bench]
 | 
			
		||||
fn bench_load_largest_accounts(b: &mut Bencher) {
 | 
			
		||||
    let accounts =
 | 
			
		||||
        Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false);
 | 
			
		||||
    let mut rng = rand::thread_rng();
 | 
			
		||||
    for _ in 0..10_000 {
 | 
			
		||||
        let lamports = rng.gen();
 | 
			
		||||
        let pubkey = Pubkey::new_unique();
 | 
			
		||||
        let account = AccountSharedData::new(lamports, 0, &Pubkey::default());
 | 
			
		||||
        accounts.store_slow_uncached(0, &pubkey, &account);
 | 
			
		||||
    }
 | 
			
		||||
    let ancestors = vec![(0, 0)].into_iter().collect();
 | 
			
		||||
    b.iter(|| {
 | 
			
		||||
        accounts.load_largest_accounts(
 | 
			
		||||
            &ancestors,
 | 
			
		||||
            20,
 | 
			
		||||
            &HashSet::new(),
 | 
			
		||||
            AccountAddressFilter::Exclude,
 | 
			
		||||
        )
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -31,7 +31,8 @@ use solana_sdk::{
 | 
			
		||||
    transaction::{Transaction, TransactionError},
 | 
			
		||||
};
 | 
			
		||||
use std::{
 | 
			
		||||
    collections::{hash_map, HashMap, HashSet},
 | 
			
		||||
    cmp::Reverse,
 | 
			
		||||
    collections::{hash_map, BinaryHeap, HashMap, HashSet},
 | 
			
		||||
    ops::RangeBounds,
 | 
			
		||||
    path::PathBuf,
 | 
			
		||||
    sync::{Arc, Mutex},
 | 
			
		||||
@@ -538,36 +539,42 @@ impl Accounts {
 | 
			
		||||
        filter_by_address: &HashSet<Pubkey>,
 | 
			
		||||
        filter: AccountAddressFilter,
 | 
			
		||||
    ) -> Vec<(Pubkey, u64)> {
 | 
			
		||||
        self.accounts_db
 | 
			
		||||
            .scan_accounts(ancestors, |collector: &mut Vec<(Pubkey, u64)>, option| {
 | 
			
		||||
                if let Some(data) = option
 | 
			
		||||
                    .filter(|(pubkey, account, _)| {
 | 
			
		||||
                        let should_include_pubkey = match filter {
 | 
			
		||||
                            AccountAddressFilter::Exclude => !filter_by_address.contains(&pubkey),
 | 
			
		||||
                            AccountAddressFilter::Include => filter_by_address.contains(&pubkey),
 | 
			
		||||
        if num == 0 {
 | 
			
		||||
            return vec![];
 | 
			
		||||
        }
 | 
			
		||||
        let account_balances = self.accounts_db.scan_accounts(
 | 
			
		||||
            ancestors,
 | 
			
		||||
            |collector: &mut BinaryHeap<Reverse<(u64, Pubkey)>>, option| {
 | 
			
		||||
                if let Some((pubkey, account, _slot)) = option {
 | 
			
		||||
                    if account.lamports == 0 {
 | 
			
		||||
                        return;
 | 
			
		||||
                    }
 | 
			
		||||
                    let contains_address = filter_by_address.contains(pubkey);
 | 
			
		||||
                    let collect = match filter {
 | 
			
		||||
                        AccountAddressFilter::Exclude => !contains_address,
 | 
			
		||||
                        AccountAddressFilter::Include => contains_address,
 | 
			
		||||
                    };
 | 
			
		||||
                        should_include_pubkey && account.lamports != 0
 | 
			
		||||
                    })
 | 
			
		||||
                    .map(|(pubkey, account, _slot)| (*pubkey, account.lamports))
 | 
			
		||||
                {
 | 
			
		||||
                    let index_of_first_smaller_account =
 | 
			
		||||
                        collector.iter().position(|&(entry_pubkey, entry_balance)| {
 | 
			
		||||
                            entry_balance < data.1
 | 
			
		||||
                                || (entry_balance == data.1 && entry_pubkey > data.0)
 | 
			
		||||
                        });
 | 
			
		||||
                    match index_of_first_smaller_account {
 | 
			
		||||
                        Some(i) => {
 | 
			
		||||
                            collector.insert(i, data);
 | 
			
		||||
                    if !collect {
 | 
			
		||||
                        return;
 | 
			
		||||
                    }
 | 
			
		||||
                        None => {
 | 
			
		||||
                            if collector.len() <= num {
 | 
			
		||||
                                collector.push(data);
 | 
			
		||||
                    if collector.len() == num {
 | 
			
		||||
                        let Reverse(entry) = collector
 | 
			
		||||
                            .peek()
 | 
			
		||||
                            .expect("BinaryHeap::peek should succeed when len > 0");
 | 
			
		||||
                        if *entry >= (account.lamports, *pubkey) {
 | 
			
		||||
                            return;
 | 
			
		||||
                        }
 | 
			
		||||
                        collector.pop();
 | 
			
		||||
                    }
 | 
			
		||||
                    collector.push(Reverse((account.lamports, *pubkey)));
 | 
			
		||||
                }
 | 
			
		||||
                    collector.truncate(num);
 | 
			
		||||
                }
 | 
			
		||||
            })
 | 
			
		||||
            },
 | 
			
		||||
        );
 | 
			
		||||
        account_balances
 | 
			
		||||
            .into_sorted_vec()
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|Reverse((balance, pubkey))| (pubkey, balance))
 | 
			
		||||
            .collect()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn calculate_capitalization(
 | 
			
		||||
@@ -2395,4 +2402,129 @@ mod tests {
 | 
			
		||||
            &next_blockhash
 | 
			
		||||
        ));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_load_largest_accounts() {
 | 
			
		||||
        let accounts =
 | 
			
		||||
            Accounts::new_with_config(Vec::new(), &ClusterType::Development, HashSet::new(), false);
 | 
			
		||||
 | 
			
		||||
        let pubkey0 = Pubkey::new_unique();
 | 
			
		||||
        let account0 = AccountSharedData::new(42, 0, &Pubkey::default());
 | 
			
		||||
        accounts.store_slow_uncached(0, &pubkey0, &account0);
 | 
			
		||||
        let pubkey1 = Pubkey::new_unique();
 | 
			
		||||
        let account1 = AccountSharedData::new(42, 0, &Pubkey::default());
 | 
			
		||||
        accounts.store_slow_uncached(0, &pubkey1, &account1);
 | 
			
		||||
        let pubkey2 = Pubkey::new_unique();
 | 
			
		||||
        let account2 = AccountSharedData::new(41, 0, &Pubkey::default());
 | 
			
		||||
        accounts.store_slow_uncached(0, &pubkey2, &account2);
 | 
			
		||||
 | 
			
		||||
        let ancestors = vec![(0, 0)].into_iter().collect();
 | 
			
		||||
        let all_pubkeys: HashSet<_> = vec![pubkey0, pubkey1, pubkey2].into_iter().collect();
 | 
			
		||||
 | 
			
		||||
        // num == 0 should always return empty set
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                0,
 | 
			
		||||
                &HashSet::new(),
 | 
			
		||||
                AccountAddressFilter::Exclude
 | 
			
		||||
            ),
 | 
			
		||||
            vec![]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                0,
 | 
			
		||||
                &all_pubkeys,
 | 
			
		||||
                AccountAddressFilter::Include
 | 
			
		||||
            ),
 | 
			
		||||
            vec![]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        // list should be sorted by balance, then pubkey, descending
 | 
			
		||||
        assert!(pubkey1 > pubkey0);
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                1,
 | 
			
		||||
                &HashSet::new(),
 | 
			
		||||
                AccountAddressFilter::Exclude
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                2,
 | 
			
		||||
                &HashSet::new(),
 | 
			
		||||
                AccountAddressFilter::Exclude
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42), (pubkey0, 42)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                3,
 | 
			
		||||
                &HashSet::new(),
 | 
			
		||||
                AccountAddressFilter::Exclude
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42), (pubkey0, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        // larger num should not affect results
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                6,
 | 
			
		||||
                &HashSet::new(),
 | 
			
		||||
                AccountAddressFilter::Exclude
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42), (pubkey0, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        // AccountAddressFilter::Exclude should exclude entry
 | 
			
		||||
        let exclude1: HashSet<_> = vec![pubkey1].into_iter().collect();
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(&ancestors, 1, &exclude1, AccountAddressFilter::Exclude),
 | 
			
		||||
            vec![(pubkey0, 42)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(&ancestors, 2, &exclude1, AccountAddressFilter::Exclude),
 | 
			
		||||
            vec![(pubkey0, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(&ancestors, 3, &exclude1, AccountAddressFilter::Exclude),
 | 
			
		||||
            vec![(pubkey0, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        // AccountAddressFilter::Include should limit entries
 | 
			
		||||
        let include1_2: HashSet<_> = vec![pubkey1, pubkey2].into_iter().collect();
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                1,
 | 
			
		||||
                &include1_2,
 | 
			
		||||
                AccountAddressFilter::Include
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                2,
 | 
			
		||||
                &include1_2,
 | 
			
		||||
                AccountAddressFilter::Include
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
        assert_eq!(
 | 
			
		||||
            accounts.load_largest_accounts(
 | 
			
		||||
                &ancestors,
 | 
			
		||||
                3,
 | 
			
		||||
                &include1_2,
 | 
			
		||||
                AccountAddressFilter::Include
 | 
			
		||||
            ),
 | 
			
		||||
            vec![(pubkey1, 42), (pubkey2, 41)]
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user