AcctIdx: stats for buckets on disk, add median (#20528)

This commit is contained in:
Jeff Washington (jwash)
2021-10-08 13:58:38 -05:00
committed by GitHub
parent 2c3d52b4cc
commit 4f6a0b2650
4 changed files with 65 additions and 23 deletions

View File

@ -7,6 +7,7 @@ use solana_sdk::pubkey::Pubkey;
use std::ops::RangeBounds; use std::ops::RangeBounds;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::sync::{RwLock, RwLockWriteGuard}; use std::sync::{RwLock, RwLockWriteGuard};
@ -18,6 +19,7 @@ pub struct BucketApi<T: Clone + Copy> {
pub stats: Arc<BucketMapStats>, pub stats: Arc<BucketMapStats>,
bucket: LockedBucket<T>, bucket: LockedBucket<T>,
count: Arc<AtomicU64>,
} }
impl<T: Clone + Copy> BucketApi<T> { impl<T: Clone + Copy> BucketApi<T> {
@ -25,12 +27,14 @@ impl<T: Clone + Copy> BucketApi<T> {
drives: Arc<Vec<PathBuf>>, drives: Arc<Vec<PathBuf>>,
max_search: MaxSearch, max_search: MaxSearch,
stats: Arc<BucketMapStats>, stats: Arc<BucketMapStats>,
count: Arc<AtomicU64>,
) -> Self { ) -> Self {
Self { Self {
drives, drives,
max_search, max_search,
stats, stats,
bucket: RwLock::default(), bucket: RwLock::default(),
count,
} }
} }
@ -90,7 +94,9 @@ impl<T: Clone + Copy> BucketApi<T> {
Arc::clone(&self.stats), Arc::clone(&self.stats),
)); ));
} else { } else {
bucket.as_mut().unwrap().handle_delayed_grows(); let write = bucket.as_mut().unwrap();
write.handle_delayed_grows();
self.count.store(write.bucket_len(), Ordering::Relaxed);
} }
bucket bucket
} }

View File

@ -69,7 +69,6 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
config.max_buckets.is_power_of_two(), config.max_buckets.is_power_of_two(),
"Max number of buckets must be a power of two" "Max number of buckets must be a power of two"
); );
let stats = Arc::new(BucketMapStats::default());
// this should be <= 1 << DEFAULT_CAPACITY or we end up searching the same items over and over - probably not a big deal since it is so small anyway // this should be <= 1 << DEFAULT_CAPACITY or we end up searching the same items over and over - probably not a big deal since it is so small anyway
const MAX_SEARCH: MaxSearch = 32; const MAX_SEARCH: MaxSearch = 32;
let max_search = config.max_search.unwrap_or(MAX_SEARCH); let max_search = config.max_search.unwrap_or(MAX_SEARCH);
@ -84,14 +83,24 @@ impl<T: Clone + Copy + Debug> BucketMap<T> {
}); });
let drives = Arc::new(drives); let drives = Arc::new(drives);
let mut buckets = Vec::with_capacity(config.max_buckets); let mut per_bucket_count = Vec::with_capacity(config.max_buckets);
buckets.resize_with(config.max_buckets, || { per_bucket_count.resize_with(config.max_buckets, Arc::default);
let stats = Arc::new(BucketMapStats {
per_bucket_count,
..BucketMapStats::default()
});
let buckets = stats
.per_bucket_count
.iter()
.map(|per_bucket_count| {
Arc::new(BucketApi::new( Arc::new(BucketApi::new(
Arc::clone(&drives), Arc::clone(&drives),
max_search, max_search,
Arc::clone(&stats), Arc::clone(&stats),
Arc::clone(per_bucket_count),
)) ))
}); })
.collect();
// A simple log2 function that is correct if x is a power of two // A simple log2 function that is correct if x is a power of two
let log2 = |x: usize| usize::BITS - x.leading_zeros() - 1; let log2 = |x: usize| usize::BITS - x.leading_zeros() - 1;

View File

@ -11,8 +11,9 @@ pub struct BucketStats {
pub mmap_us: AtomicU64, pub mmap_us: AtomicU64,
} }
#[derive(Debug, Default, Clone)] #[derive(Debug, Default)]
pub struct BucketMapStats { pub struct BucketMapStats {
pub index: Arc<BucketStats>, pub index: Arc<BucketStats>,
pub data: Arc<BucketStats>, pub data: Arc<BucketStats>,
pub per_bucket_count: Vec<Arc<AtomicU64>>,
} }

View File

@ -104,6 +104,21 @@ impl BucketMapHolderStats {
.remaining_until_next_interval(STATS_INTERVAL_MS) .remaining_until_next_interval(STATS_INTERVAL_MS)
} }
/// return min, max, sum, median of data
fn get_stats(mut data: Vec<u64>) -> (u64, u64, u64, u64) {
if data.is_empty() {
(0, 0, 0, 0)
} else {
data.sort_unstable();
(
*data.first().unwrap(),
*data.last().unwrap(),
data.iter().sum(),
data[data.len() / 2],
)
}
}
pub fn report_stats<T: IndexValue>(&self, storage: &BucketMapHolder<T>) { pub fn report_stats<T: IndexValue>(&self, storage: &BucketMapHolder<T>) {
if !self.last_time.should_update(STATS_INTERVAL_MS) { if !self.last_time.should_update(STATS_INTERVAL_MS) {
return; return;
@ -111,17 +126,23 @@ impl BucketMapHolderStats {
let ms_per_age = self.ms_per_age(storage); let ms_per_age = self.ms_per_age(storage);
let mut ct = 0; let in_mem_per_bucket_counts = self
let mut min = usize::MAX; .per_bucket_count
let mut max = 0; .iter()
for d in &self.per_bucket_count { .map(|count| count.load(Ordering::Relaxed))
let d = d.load(Ordering::Relaxed) as usize; .collect::<Vec<_>>();
ct += d;
min = std::cmp::min(min, d);
max = std::cmp::max(max, d);
}
let disk = storage.disk.as_ref(); let disk = storage.disk.as_ref();
let disk_per_bucket_counts = disk
.map(|disk| {
disk.stats
.per_bucket_count
.iter()
.map(|count| count.load(Ordering::Relaxed))
.collect::<Vec<_>>()
})
.unwrap_or_default();
let in_mem_stats = Self::get_stats(in_mem_per_bucket_counts);
let disk_stats = Self::get_stats(disk_per_bucket_counts);
datapoint_info!( datapoint_info!(
"accounts_index", "accounts_index",
@ -141,9 +162,14 @@ impl BucketMapHolderStats {
self.bg_throttling_wait_us.swap(0, Ordering::Relaxed), self.bg_throttling_wait_us.swap(0, Ordering::Relaxed),
i64 i64
), ),
("min_in_bin", min, i64), ("min_in_bin_mem", in_mem_stats.0, i64),
("max_in_bin", max, i64), ("max_in_bin_mem", in_mem_stats.1, i64),
("count_from_bins", ct, i64), ("count_from_bins_mem", in_mem_stats.2, i64),
("median_from_bins_mem", in_mem_stats.3, i64),
("min_in_bin_disk", disk_stats.0, i64),
("max_in_bin_disk", disk_stats.1, i64),
("count_from_bins_disk", disk_stats.2, i64),
("median_from_bins_disk", disk_stats.3, i64),
( (
"gets_from_mem", "gets_from_mem",
self.gets_from_mem.swap(0, Ordering::Relaxed), self.gets_from_mem.swap(0, Ordering::Relaxed),