AcctIdx: introduce BucketApi for access to a specific bucket (#20359)

2021-10-04 12:48:09 -04:00
parent fb8a7cfa92
commit 8da2eb980a
4 changed files with 194 additions and 113 deletions
--- a/bucket_map/src/bucket_api.rs
+++ b/bucket_map/src/bucket_api.rs
@@ -0,0 +1,134 @@
 use crate::bucket::Bucket;
 use crate::bucket_item::BucketItem;
 use crate::bucket_map::BucketMapError;
 use crate::bucket_stats::BucketMapStats;
 use crate::{MaxSearch, RefCount};
 use solana_sdk::pubkey::Pubkey;
 use std::ops::RangeBounds;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::{RwLock, RwLockWriteGuard};
 type LockedBucket<T> = Arc<RwLock<Option<Bucket<T>>>>;
 pub struct BucketApi<T: Clone + Copy> {
    drives: Arc<Vec<PathBuf>>,
    max_search: MaxSearch,
    pub stats: Arc<BucketMapStats>,
    bucket: LockedBucket<T>,
 }
 impl<T: Clone + Copy> BucketApi<T> {
    pub fn new(
        drives: Arc<Vec<PathBuf>>,
        max_search: MaxSearch,
        stats: Arc<BucketMapStats>,
    ) -> Self {
        Self {
            drives,
            max_search,
            stats,
            bucket: Arc::default(),
        }
    }
    /// Get the items for bucket
    pub fn items_in_range<R>(&self, range: &Option<&R>) -> Vec<BucketItem<T>>
    where
        R: RangeBounds<Pubkey>,
    {
        self.bucket
            .read()
            .unwrap()
            .as_ref()
            .map(|bucket| bucket.items_in_range(range))
            .unwrap_or_default()
    }
    /// Get the Pubkeys
    pub fn keys(&self) -> Vec<Pubkey> {
        self.bucket
            .read()
            .unwrap()
            .as_ref()
            .map_or_else(Vec::default, |bucket| bucket.keys())
    }
    /// Get the values for Pubkey `key`
    pub fn read_value(&self, key: &Pubkey) -> Option<(Vec<T>, RefCount)> {
        self.bucket.read().unwrap().as_ref().and_then(|bucket| {
            bucket
                .read_value(key)
                .map(|(value, ref_count)| (value.to_vec(), ref_count))
        })
    }
    pub fn bucket_len(&self) -> u64 {
        self.bucket
            .read()
            .unwrap()
            .as_ref()
            .map(|bucket| bucket.bucket_len())
            .unwrap_or_default()
    }
    pub fn delete_key(&self, key: &Pubkey) {
        let mut bucket = self.get_write_bucket();
        if let Some(bucket) = bucket.as_mut() {
            bucket.delete_key(key)
        }
    }
    fn get_write_bucket(&self) -> RwLockWriteGuard<Option<Bucket<T>>> {
        let mut bucket = self.bucket.write().unwrap();
        if bucket.is_none() {
            *bucket = Some(Bucket::new(
                Arc::clone(&self.drives),
                self.max_search,
                Arc::clone(&self.stats),
            ));
        }
        bucket
    }
    pub fn addref(&self, key: &Pubkey) -> Option<RefCount> {
        self.get_write_bucket()
            .as_mut()
            .and_then(|bucket| bucket.addref(key))
    }
    pub fn unref(&self, key: &Pubkey) -> Option<RefCount> {
        self.get_write_bucket()
            .as_mut()
            .and_then(|bucket| bucket.unref(key))
    }
    pub fn insert(&self, pubkey: &Pubkey, value: (&[T], RefCount)) {
        let mut bucket = self.get_write_bucket();
        bucket.as_mut().unwrap().insert(pubkey, value)
    }
    pub fn grow(&self, err: BucketMapError) {
        let mut bucket = self.get_write_bucket();
        bucket.as_mut().unwrap().grow(err)
    }
    pub fn update<F>(&self, key: &Pubkey, updatefn: F)
    where
        F: Fn(Option<(&[T], RefCount)>) -> Option<(Vec<T>, RefCount)>,
    {
        let mut bucket = self.get_write_bucket();
        bucket.as_mut().unwrap().update(key, updatefn)
    }
    pub fn try_write(
        &self,
        pubkey: &Pubkey,
        value: (&[T], RefCount),
    ) -> Result<(), BucketMapError> {
        let mut bucket = self.get_write_bucket();
        bucket.as_mut().unwrap().try_write(pubkey, value.0, value.1)
    }
 }
--- a/bucket_map/src/bucket_map.rs
+++ b/bucket_map/src/bucket_map.rs
@@ -1,17 +1,14 @@
 //! BucketMap is a mostly contention free concurrent map backed by MmapMut
-use crate::bucket::Bucket;
+use crate::bucket_api::BucketApi;
 use crate::bucket_item::BucketItem;
 use crate::bucket_stats::BucketMapStats;
 use crate::{MaxSearch, RefCount};
 use solana_sdk::pubkey::Pubkey;
 use std::convert::TryInto;
 use std::fmt::Debug;
 use std::fs;
 use std::ops::RangeBounds;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::{RwLock, RwLockWriteGuard};
 use tempfile::TempDir;
 #[derive(Debug, Default, Clone)]
@@ -33,10 +30,9 @@ impl BucketMapConfig {
 }
 pub struct BucketMap<T: Clone + Copy + Debug> {
-    buckets: Vec<RwLock<Option<Bucket<T>>>>,
+    buckets: Vec<Arc<BucketApi<T>>>,
    drives: Arc<Vec<PathBuf>>,
    max_buckets_pow2: u8,
    max_search: MaxSearch,
    pub stats: Arc<BucketMapStats>,
    pub temp_dir: Option<TempDir>,
 }
@@ -71,8 +67,6 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
            config.max_buckets.is_power_of_two(),
            "Max number of buckets must be a power of two"
        );
        let mut buckets = Vec::with_capacity(config.max_buckets);
        buckets.resize_with(config.max_buckets, || RwLock::new(None));
        let stats = Arc::new(BucketMapStats::default());
        // this should be <= 1 << DEFAULT_CAPACITY or we end up searching the same items over and over - probably not a big deal since it is so small anyway
        const MAX_SEARCH: MaxSearch = 32;
@@ -88,6 +82,15 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
        });
        let drives = Arc::new(drives);
        let mut buckets = Vec::with_capacity(config.max_buckets);
        buckets.resize_with(config.max_buckets, || {
            Arc::new(BucketApi::new(
                Arc::clone(&drives),
                max_search,
                Arc::clone(&stats),
            ))
        });
        // A simple log2 function that is correct if x is a power of two
        let log2 = |x: usize| usize::BITS - x.leading_zeros() - 1;
@@ -96,7 +99,6 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
            drives,
            max_buckets_pow2: log2(config.max_buckets) as u8,
            stats,
            max_search,
            temp_dir,
        }
    }
@@ -112,92 +114,24 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
        self.buckets.len()
    }
    pub fn bucket_len(&self, ix: usize) -> u64 {
        self.buckets[ix]
            .read()
            .unwrap()
            .as_ref()
            .map(|bucket| bucket.bucket_len())
            .unwrap_or_default()
    }
    /// Get the items for bucket `ix` in `range`
    pub fn items_in_range<R>(&self, ix: usize, range: &Option<&R>) -> Vec<BucketItem<T>>
    where
        R: RangeBounds<Pubkey>,
    {
        self.buckets[ix]
            .read()
            .unwrap()
            .as_ref()
            .map(|bucket| bucket.items_in_range(range))
            .unwrap_or_default()
    }
    /// Get the Pubkeys for bucket `ix`
    pub fn keys(&self, ix: usize) -> Vec<Pubkey> {
        self.buckets[ix]
            .read()
            .unwrap()
            .as_ref()
            .map_or_else(Vec::default, |bucket| bucket.keys())
    }
    /// Get the values for Pubkey `key`
    pub fn read_value(&self, key: &Pubkey) -> Option<(Vec<T>, RefCount)> {
-        let ix = self.bucket_ix(key);
+        self.get_bucket(key).read_value(key)
        self.buckets[ix]
            .read()
            .unwrap()
            .as_ref()
            .and_then(|bucket| {
                bucket
                    .read_value(key)
                    .map(|(value, ref_count)| (value.to_vec(), ref_count))
            })
    }
    /// Delete the Pubkey `key`
    pub fn delete_key(&self, key: &Pubkey) {
-        let ix = self.bucket_ix(key);
+        self.get_bucket(key).delete_key(key);
        if let Some(bucket) = self.buckets[ix].write().unwrap().as_mut() {
            bucket.delete_key(key);
        }
    }
    /// Update Pubkey `key`'s value with 'value'
-    pub fn insert(&self, ix: usize, key: &Pubkey, value: (&[T], RefCount)) {
+    pub fn insert(&self, key: &Pubkey, value: (&[T], RefCount)) {
-        let mut bucket = self.get_bucket(ix);
+        self.get_bucket(key).insert(key, value)
        bucket.as_mut().unwrap().insert(key, value)
    }
    fn get_bucket(&self, ix: usize) -> RwLockWriteGuard<Option<Bucket<T>>> {
        let mut bucket = self.buckets[ix].write().unwrap();
        if bucket.is_none() {
            *bucket = Some(Bucket::new(
                Arc::clone(&self.drives),
                self.max_search,
                Arc::clone(&self.stats),
            ));
        }
        bucket
    }
    /// Update Pubkey `key`'s value with 'value'
-    pub fn try_insert(
+    pub fn try_insert(&self, key: &Pubkey, value: (&[T], RefCount)) -> Result<(), BucketMapError> {
-        &self,
+        self.get_bucket(key).try_write(key, value)
        ix: usize,
        key: &Pubkey,
        value: (&[T], RefCount),
    ) -> Result<(), BucketMapError> {
        let mut bucket = self.get_bucket(ix);
        bucket.as_mut().unwrap().try_write(key, value.0, value.1)
    }
    /// if err is a grow error, then grow the appropriate piece
    pub fn grow(&self, ix: usize, err: BucketMapError) {
        let mut bucket = self.get_bucket(ix);
        bucket.as_mut().unwrap().grow(err);
    }
    /// Update Pubkey `key`'s value with function `updatefn`
@@ -205,9 +139,15 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
    where
        F: Fn(Option<(&[T], RefCount)>) -> Option<(Vec<T>, RefCount)>,
    {
-        let ix = self.bucket_ix(key);
+        self.get_bucket(key).update(key, updatefn)
-        let mut bucket = self.get_bucket(ix);
+    }
-        bucket.as_mut().unwrap().update(key, updatefn)
+
    pub fn get_bucket(&self, key: &Pubkey) -> &Arc<BucketApi<T>> {
        self.get_bucket_from_index(self.bucket_ix(key))
    }
    pub fn get_bucket_from_index(&self, ix: usize) -> &Arc<BucketApi<T>> {
        &self.buckets[ix]
    }
    /// Get the bucket index for Pubkey `key`
@@ -223,15 +163,15 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
    /// Increment the refcount for Pubkey `key`
    pub fn addref(&self, key: &Pubkey) -> Option<RefCount> {
        let ix = self.bucket_ix(key);
-        let mut bucket = self.buckets[ix].write().unwrap();
+        let bucket = &self.buckets[ix];
-        bucket.as_mut()?.addref(key)
+        bucket.addref(key)
    }
    /// Decrement the refcount for Pubkey `key`
    pub fn unref(&self, key: &Pubkey) -> Option<RefCount> {
        let ix = self.bucket_ix(key);
-        let mut bucket = self.buckets[ix].write().unwrap();
+        let bucket = &self.buckets[ix];
-        bucket.as_mut()?.unref(key)
+        bucket.unref(key)
    }
 }
@@ -247,6 +187,7 @@ mod tests {
    use rand::thread_rng;
    use rand::Rng;
    use std::collections::HashMap;
    use std::sync::RwLock;
    #[test]
    fn bucket_map_test_insert() {
@@ -263,21 +204,21 @@ mod tests {
            let key = Pubkey::new_unique();
            let config = BucketMapConfig::new(1 << 1);
            let index = BucketMap::new(config);
-            let ix = index.bucket_ix(&key);
+            let bucket = index.get_bucket(&key);
            if pass == 0 {
-                index.insert(ix, &key, (&[0], 0));
+                index.insert(&key, (&[0], 0));
            } else {
-                let result = index.try_insert(ix, &key, (&[0], 0));
+                let result = index.try_insert(&key, (&[0], 0));
                assert!(result.is_err());
                assert_eq!(index.read_value(&key), None);
                if pass == 2 {
                    // another call to try insert again - should still return an error
-                    let result = index.try_insert(ix, &key, (&[0], 0));
+                    let result = index.try_insert(&key, (&[0], 0));
                    assert!(result.is_err());
                    assert_eq!(index.read_value(&key), None);
                }
-                index.grow(ix, result.unwrap_err());
+                bucket.grow(result.unwrap_err());
-                let result = index.try_insert(ix, &key, (&[0], 0));
+                let result = index.try_insert(&key, (&[0], 0));
                assert!(result.is_ok());
            }
            assert_eq!(index.read_value(&key), Some((vec![0], 0)));
@@ -289,9 +230,9 @@ mod tests {
        let key = Pubkey::new_unique();
        let config = BucketMapConfig::new(1 << 1);
        let index = BucketMap::new(config);
-        index.insert(index.bucket_ix(&key), &key, (&[0], 0));
+        index.insert(&key, (&[0], 0));
        assert_eq!(index.read_value(&key), Some((vec![0], 0)));
-        index.insert(index.bucket_ix(&key), &key, (&[1], 0));
+        index.insert(&key, (&[1], 0));
        assert_eq!(index.read_value(&key), Some((vec![1], 0)));
    }
@@ -465,8 +406,8 @@ mod tests {
                    let mut r = vec![];
                    for bin in 0..map.num_buckets() {
                        r.append(
-                            &mut map
+                            &mut map.buckets[bin]
-                                .items_in_range(bin, &None::<&std::ops::RangeInclusive<Pubkey>>),
+                                .items_in_range(&None::<&std::ops::RangeInclusive<Pubkey>>),
                        );
                    }
                    r
@@ -505,7 +446,7 @@ mod tests {
                let insert = thread_rng().gen_range(0, 2) == 0;
                maps.iter().for_each(|map| {
                    if insert {
-                        map.insert(map.bucket_ix(&k), &k, (&v.0, v.1))
+                        map.insert(&k, (&v.0, v.1))
                    } else {
                        map.update(&k, |current| {
                            assert!(current.is_none());
@@ -524,7 +465,7 @@ mod tests {
                    let insert = thread_rng().gen_range(0, 2) == 0;
                    maps.iter().for_each(|map| {
                        if insert {
-                            map.insert(map.bucket_ix(&k), &k, (&v, rc))
+                            map.insert(&k, (&v, rc))
                        } else {
                            map.update(&k, |current| {
                                assert_eq!(current, v_old.map(|(v, rc)| (&v[..], *rc)), "{}", k);
--- a/bucket_map/src/lib.rs
+++ b/bucket_map/src/lib.rs
@@ -1,5 +1,6 @@
 #![allow(clippy::integer_arithmetic)]
 mod bucket;
 pub mod bucket_api;
 mod bucket_item;
 pub mod bucket_map;
 mod bucket_stats;
--- a/runtime/src/in_mem_accounts_index.rs
+++ b/runtime/src/in_mem_accounts_index.rs
@@ -6,6 +6,7 @@ use crate::bucket_map_holder::{Age, BucketMapHolder};
 use crate::bucket_map_holder_stats::BucketMapHolderStats;
 use rand::thread_rng;
 use rand::Rng;
 use solana_bucket_map::bucket_api::BucketApi;
 use solana_measure::measure::Measure;
 use solana_sdk::{clock::Slot, pubkey::Pubkey};
 use std::collections::{hash_map::Entry, HashMap};
@@ -28,6 +29,8 @@ pub struct InMemAccountsIndex<T: IndexValue> {
    storage: Arc<BucketMapHolder<T>>,
    bin: usize,
    bucket: Option<Arc<BucketApi<SlotT<T>>>>,
    // pubkey ranges that this bin must hold in the cache while the range is present in this vec
    pub(crate) cache_ranges_held: CacheRangesHeld,
    // true while ranges are being manipulated. Used to keep an async flush from removing things while a range is being held.
@@ -51,6 +54,11 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
            map_internal: RwLock::default(),
            storage: Arc::clone(storage),
            bin,
            bucket: storage
                .disk
                .as_ref()
                .map(|disk| disk.get_bucket_from_index(bin))
                .map(Arc::clone),
            cache_ranges_held: CacheRangesHeld::default(),
            stop_flush: AtomicU64::default(),
            bin_dirty: AtomicBool::default(),
@@ -111,7 +119,7 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
    }
    fn load_from_disk(&self, pubkey: &Pubkey) -> Option<(SlotList<T>, RefCount)> {
-        self.storage.disk.as_ref().and_then(|disk| {
+        self.bucket.as_ref().and_then(|disk| {
            let m = Measure::start("load_disk_found_count");
            let entry_disk = disk.read_value(pubkey);
            match &entry_disk {
@@ -209,7 +217,7 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
    }
    fn delete_disk_key(&self, pubkey: &Pubkey) {
-        if let Some(disk) = self.storage.disk.as_ref() {
+        if let Some(disk) = self.bucket.as_ref() {
            disk.delete_key(pubkey)
        }
    }
@@ -579,8 +587,8 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
        let m = Measure::start("range");
        // load from disk
-        if let Some(disk) = self.storage.disk.as_ref() {
+        if let Some(disk) = self.bucket.as_ref() {
-            let items = disk.items_in_range(self.bin, range);
+            let items = disk.items_in_range(range);
            let mut map = self.map().write().unwrap();
            let future_age = self.storage.future_age_to_flush();
            for item in items {
@@ -668,7 +676,7 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
        loop {
            let mut removes = Vec::default();
            let mut removes_random = Vec::default();
-            let disk = self.storage.disk.as_ref().unwrap();
+            let disk = self.bucket.as_ref().unwrap();
            let mut updates = Vec::default();
            let m = Measure::start("flush_scan");
@@ -706,11 +714,8 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
                            continue; // marked dirty after we grabbed it above, so handle this the next time this bucket is flushed
                        }
                        flush_entries_updated_on_disk += 1;
-                        disk_resize = disk.try_insert(
+                        disk_resize =
-                            self.bin,
+                            disk.try_write(&k, (&v.slot_list.read().unwrap(), v.ref_count()));
                            &k,
                            (&v.slot_list.read().unwrap(), v.ref_count()),
                        );
                    }
                    if disk_resize.is_err() {
                        // disk needs to resize, so mark all unprocessed items as dirty again so we pick them up after the resize
@@ -745,7 +750,7 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
                Err(err) => {
                    // grow the bucket, outside of all in-mem locks.
                    // then, loop to try again
-                    disk.grow(self.bin, err);
+                    disk.grow(err);
                }
            }
        }