Add limit and shrink policy for recycler (#15320)

This commit is contained in:
carllin
2021-02-24 00:15:58 -08:00
committed by GitHub
parent 2f46da346d
commit c2e8814dce
24 changed files with 515 additions and 127 deletions

View File

@ -76,6 +76,9 @@ impl<T: Default + Clone + Sized> Reset for PinnedVec<T> {
fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
self.recycler = Some(recycler);
}
fn unset_recycler(&mut self) {
self.recycler = None;
}
}
impl<T: Clone + Default + Sized> Default for PinnedVec<T> {

View File

@ -29,19 +29,21 @@ impl Packets {
Packets { packets }
}
pub fn new_with_recycler(recycler: PacketsRecycler, size: usize, name: &'static str) -> Self {
let mut packets = recycler.allocate(name);
packets.reserve_and_pin(size);
Packets { packets }
pub fn new_with_recycler(recycler: PacketsRecycler, size: usize) -> Option<Self> {
let maybe_packets = recycler.allocate();
maybe_packets.map(|mut packets| {
packets.reserve_and_pin(size);
Packets { packets }
})
}
pub fn new_with_recycler_data(
recycler: &PacketsRecycler,
name: &'static str,
mut packets: Vec<Packet>,
) -> Self {
let mut vec = Self::new_with_recycler(recycler.clone(), packets.len(), name);
vec.packets.append(&mut packets);
vec
) -> Option<Self> {
Self::new_with_recycler(recycler.clone(), packets.len()).map(|mut vec| {
vec.packets.append(&mut packets);
vec
})
}
pub fn set_addr(&mut self, addr: &SocketAddr) {
@ -77,11 +79,7 @@ pub fn to_packets_with_destination<T: Serialize>(
recycler: PacketsRecycler,
dests_and_data: &[(SocketAddr, T)],
) -> Packets {
let mut out = Packets::new_with_recycler(
recycler,
dests_and_data.len(),
"to_packets_with_destination",
);
let mut out = Packets::new_with_recycler(recycler, dests_and_data.len()).unwrap();
out.packets.resize(dests_and_data.len(), Packet::default());
for (dest_and_data, o) in dests_and_data.iter().zip(out.packets.iter_mut()) {
if !dest_and_data.0.ip().is_unspecified() && dest_and_data.0.port() != 0 {
@ -139,9 +137,9 @@ mod tests {
#[test]
fn test_to_packets_pinning() {
let recycler = PacketsRecycler::default();
let recycler = PacketsRecycler::new_without_limit("");
for i in 0..2 {
let _first_packets = Packets::new_with_recycler(recycler.clone(), i + 1, "first one");
let _first_packets = Packets::new_with_recycler(recycler.clone(), i + 1);
}
}
}

View File

@ -1,7 +1,23 @@
use rand::{thread_rng, Rng};
use std::sync::atomic::AtomicBool;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex, Weak};
use solana_measure::measure::Measure;
use std::{
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc, Mutex, Weak,
},
time::Instant,
};
pub const DEFAULT_MINIMUM_OBJECT_COUNT: u32 = 1000;
pub const DEFAULT_SHRINK_PCT: u32 = 80;
pub const DEFAULT_MAX_ABOVE_SHRINK_PCT_COUNT: u32 = 10;
pub const DEFAULT_CHECK_SHRINK_INTERVAL_MS: u32 = 10000;
enum AllocationDecision<T> {
Reuse(T),
Allocate(u32, usize),
AllocationLimitReached,
}
#[derive(Debug, Default)]
struct RecyclerStats {
@ -11,36 +27,219 @@ struct RecyclerStats {
max_gc: AtomicUsize,
}
#[derive(Clone, Default)]
pub struct Recycler<T> {
#[derive(Debug, Default)]
struct RecyclerShrinkStats {
resulting_size: u32,
target_size: u32,
ideal_num_to_remove: u32,
shrink_elapsed: u64,
drop_elapsed: u64,
}
impl RecyclerShrinkStats {
fn report(&self, shrink_metric_name: &'static str) {
datapoint_info!(
shrink_metric_name,
("target_size", self.target_size as i64, i64),
("resulting_size", self.resulting_size as i64, i64),
("ideal_num_to_remove", self.ideal_num_to_remove as i64, i64),
("recycler_shrink_elapsed", self.shrink_elapsed as i64, i64),
("drop_elapsed", self.drop_elapsed as i64, i64)
);
}
}
#[derive(Clone)]
pub struct Recycler<T: Reset> {
recycler: Arc<RecyclerX<T>>,
shrink_metric_name: &'static str,
}
impl<T: Default + Reset> Recycler<T> {
pub fn new_without_limit(shrink_metric_name: &'static str) -> Self {
Self {
recycler: Arc::new(RecyclerX::default()),
shrink_metric_name,
}
}
pub fn new_with_limit(shrink_metric_name: &'static str, limit: u32) -> Self {
Self {
recycler: Arc::new(RecyclerX::new(Some(limit))),
shrink_metric_name,
}
}
}
#[derive(Debug)]
pub struct RecyclerX<T> {
gc: Mutex<Vec<T>>,
pub struct ObjectPool<T: Reset> {
objects: Vec<T>,
shrink_pct: u32,
minimum_object_count: u32,
above_shrink_pct_count: u32,
max_above_shrink_pct_count: u32,
check_shrink_interval_ms: u32,
last_shrink_check_time: Instant,
pub total_allocated_count: u32,
limit: Option<u32>,
}
impl<T: Default + Reset> Default for ObjectPool<T> {
fn default() -> Self {
ObjectPool {
objects: vec![],
shrink_pct: DEFAULT_SHRINK_PCT,
minimum_object_count: DEFAULT_MINIMUM_OBJECT_COUNT,
above_shrink_pct_count: 0,
max_above_shrink_pct_count: DEFAULT_MAX_ABOVE_SHRINK_PCT_COUNT,
check_shrink_interval_ms: DEFAULT_CHECK_SHRINK_INTERVAL_MS,
last_shrink_check_time: Instant::now(),
total_allocated_count: 0,
limit: None,
}
}
}
impl<T: Default + Reset> ObjectPool<T> {
fn new(limit: Option<u32>) -> Self {
Self {
limit,
..Self::default()
}
}
fn len(&self) -> usize {
self.objects.len()
}
fn get_shrink_target(shrink_pct: u32, current_size: u32) -> u32 {
((shrink_pct * current_size) + 99) / 100
}
fn shrink_if_necessary(
&mut self,
recycler_name: &'static str,
) -> Option<(RecyclerShrinkStats, Vec<T>)> {
let is_consistent = self.total_allocated_count as usize >= self.len();
assert!(
is_consistent,
"Object pool inconsistent: {} {} {}",
self.total_allocated_count,
self.len(),
recycler_name
);
if self.last_shrink_check_time.elapsed().as_millis() > self.check_shrink_interval_ms as u128
{
self.last_shrink_check_time = Instant::now();
let shrink_threshold_count =
Self::get_shrink_target(self.shrink_pct, self.total_allocated_count);
// If more than the shrink threshold of all allocated objects are sitting doing nothing,
// increment the `above_shrink_pct_count`.
if self.len() > self.minimum_object_count as usize
&& self.len() > shrink_threshold_count as usize
{
self.above_shrink_pct_count += 1;
} else {
self.above_shrink_pct_count = 0;
}
if self.above_shrink_pct_count as usize >= self.max_above_shrink_pct_count as usize {
let mut recycler_shrink_elapsed = Measure::start("recycler_shrink");
// Do the shrink
let target_size = std::cmp::max(self.minimum_object_count, shrink_threshold_count);
let ideal_num_to_remove = self.total_allocated_count - target_size;
let mut shrink_removed_objects = Vec::with_capacity(ideal_num_to_remove as usize);
for _ in 0..ideal_num_to_remove {
if let Some(mut expired_object) = self.objects.pop() {
expired_object.unset_recycler();
// Drop these outside of the lock because the Drop() implmentation for
// certain objects like PinnedVec's can be expensive
shrink_removed_objects.push(expired_object);
// May not be able to shrink exactly `ideal_num_to_remove` objects since
// in the case of new allocations, `total_allocated_count` is incremented
// before the object is allocated (see `should_allocate_new` logic below).
// This race allows a difference of up to the number of threads allocating
// with this recycler.
self.total_allocated_count -= 1;
} else {
break;
}
}
recycler_shrink_elapsed.stop();
self.above_shrink_pct_count = 0;
Some((
RecyclerShrinkStats {
resulting_size: self.total_allocated_count,
target_size,
ideal_num_to_remove,
shrink_elapsed: recycler_shrink_elapsed.as_us(),
// Filled in later
drop_elapsed: 0,
},
shrink_removed_objects,
))
} else {
None
}
} else {
None
}
}
fn make_allocation_decision(&mut self) -> AllocationDecision<T> {
if let Some(reused_object) = self.objects.pop() {
AllocationDecision::Reuse(reused_object)
} else if let Some(limit) = self.limit {
if self.total_allocated_count < limit {
self.total_allocated_count += 1;
AllocationDecision::Allocate(self.total_allocated_count, self.len())
} else {
AllocationDecision::AllocationLimitReached
}
} else {
self.total_allocated_count += 1;
AllocationDecision::Allocate(self.total_allocated_count, self.len())
}
}
}
#[derive(Debug)]
pub struct RecyclerX<T: Reset> {
gc: Mutex<ObjectPool<T>>,
stats: RecyclerStats,
id: usize,
}
impl<T: Default> Default for RecyclerX<T> {
impl<T: Default + Reset> Default for RecyclerX<T> {
fn default() -> RecyclerX<T> {
let id = thread_rng().gen_range(0, 1000);
trace!("new recycler..{}", id);
RecyclerX {
gc: Mutex::new(vec![]),
gc: Mutex::new(ObjectPool::default()),
stats: RecyclerStats::default(),
id,
}
}
}
impl<T: Default + Reset> RecyclerX<T> {
fn new(limit: Option<u32>) -> Self {
RecyclerX {
gc: Mutex::new(ObjectPool::new(limit)),
..Self::default()
}
}
}
pub trait Reset {
fn reset(&mut self);
fn warm(&mut self, size_hint: usize);
fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>)
where
Self: std::marker::Sized;
fn unset_recycler(&mut self)
where
Self: std::marker::Sized;
}
lazy_static! {
@ -56,12 +255,21 @@ fn warm_recyclers() -> bool {
}
impl<T: Default + Reset + Sized> Recycler<T> {
pub fn warmed(num: usize, size_hint: usize) -> Self {
let new = Self::default();
pub fn warmed(
num: u32,
size_hint: usize,
limit: Option<u32>,
shrink_metric_name: &'static str,
) -> Self {
assert!(num <= limit.unwrap_or(std::u32::MAX));
let new = Self {
recycler: Arc::new(RecyclerX::new(limit)),
shrink_metric_name,
};
if warm_recyclers() {
let warmed_items: Vec<_> = (0..num)
.map(|_| {
let mut item = new.allocate("warming");
let mut item = new.allocate().unwrap();
item.warm(size_hint);
item
})
@ -73,33 +281,55 @@ impl<T: Default + Reset + Sized> Recycler<T> {
new
}
pub fn allocate(&self, name: &'static str) -> T {
let new = self
.recycler
.gc
.lock()
.expect("recycler lock in pb fn allocate")
.pop();
pub fn allocate(&self) -> Option<T> {
let (allocation_decision, shrink_output) = {
let mut object_pool = self
.recycler
.gc
.lock()
.expect("recycler lock in pb fn allocate");
if let Some(mut x) = new {
self.recycler.stats.reuse.fetch_add(1, Ordering::Relaxed);
x.reset();
return x;
let shrink_output = object_pool.shrink_if_necessary(self.shrink_metric_name);
// Grab the allocation decision and shrinking stats, do the expensive
// allocations/deallocations outside of the lock.
(object_pool.make_allocation_decision(), shrink_output)
};
if let Some((mut shrink_stats, shrink_removed_objects)) = shrink_output {
let mut shrink_removed_object_elapsed = Measure::start("shrink_removed_object_elapsed");
drop(shrink_removed_objects);
shrink_removed_object_elapsed.stop();
shrink_stats.drop_elapsed = shrink_removed_object_elapsed.as_us();
shrink_stats.report(self.shrink_metric_name);
}
let total = self.recycler.stats.total.fetch_add(1, Ordering::Relaxed);
trace!(
"allocating new: total {} {:?} id: {} reuse: {} max_gc: {}",
total,
name,
self.recycler.id,
self.recycler.stats.reuse.load(Ordering::Relaxed),
self.recycler.stats.max_gc.load(Ordering::Relaxed),
);
match allocation_decision {
AllocationDecision::Reuse(mut reused_object) => {
self.recycler.stats.reuse.fetch_add(1, Ordering::Relaxed);
reused_object.reset();
Some(reused_object)
}
AllocationDecision::Allocate(total_allocated_count, recycled_len) => {
let mut t = T::default();
t.set_recycler(Arc::downgrade(&self.recycler));
if total_allocated_count % 1000 == 0 {
datapoint_info!(
"recycler_total_allocated_count",
("name", self.shrink_metric_name, String),
("count", total_allocated_count as i64, i64),
("recycled_len", recycled_len as i64, i64),
)
}
Some(t)
}
let mut t = T::default();
t.set_recycler(Arc::downgrade(&self.recycler));
t
AllocationDecision::AllocationLimitReached => None,
}
}
pub fn recycle_for_test(&self, x: T) {
self.recycler.recycle(x);
}
}
@ -107,7 +337,7 @@ impl<T: Default + Reset> RecyclerX<T> {
pub fn recycle(&self, x: T) {
let len = {
let mut gc = self.gc.lock().expect("recycler lock in pub fn recycle");
gc.push(x);
gc.objects.push(x);
gc.len()
};
@ -137,6 +367,8 @@ impl<T: Default + Reset> RecyclerX<T> {
#[cfg(test)]
mod tests {
use super::*;
use crate::packet::PacketsRecycler;
use std::{thread::sleep, time::Duration};
impl Reset for u64 {
fn reset(&mut self) {
@ -144,19 +376,115 @@ mod tests {
}
fn warm(&mut self, _size_hint: usize) {}
fn set_recycler(&mut self, _recycler: Weak<RecyclerX<Self>>) {}
fn unset_recycler(&mut self) {}
}
#[test]
fn test_recycler() {
let recycler = Recycler::default();
let mut y: u64 = recycler.allocate("test_recycler1");
let recycler = Recycler::new_without_limit("");
let mut y: u64 = recycler.allocate().unwrap();
assert_eq!(y, 0);
y = 20;
let recycler2 = recycler.clone();
recycler2.recycler.recycle(y);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), 1);
let z = recycler.allocate("test_recycler2");
let z = recycler.allocate().unwrap();
assert_eq!(z, 10);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), 0);
}
#[test]
fn test_recycler_limit() {
let limit = 10;
assert!(limit <= DEFAULT_MINIMUM_OBJECT_COUNT);
// Use PacketRecycler so that dropping the allocated object
// actually recycles
let recycler = PacketsRecycler::new_with_limit("", limit);
let mut allocated_items = vec![];
for i in 0..limit * 2 {
let x = recycler.allocate();
if i < limit {
allocated_items.push(x.unwrap());
} else {
assert!(x.is_none());
}
}
assert_eq!(
recycler.recycler.gc.lock().unwrap().total_allocated_count,
limit
);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), 0_usize);
drop(allocated_items);
assert_eq!(
recycler.recycler.gc.lock().unwrap().total_allocated_count,
limit
);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), limit as usize);
}
#[test]
fn test_recycler_shrink() {
let limit = DEFAULT_MINIMUM_OBJECT_COUNT * 2;
let max_above_shrink_pct_count = 2;
let shrink_pct = 80;
let recycler = PacketsRecycler::new_with_limit("", limit);
{
let mut locked_recycler = recycler.recycler.gc.lock().unwrap();
// Make the shrink interval a long time so shrinking doesn't happen yet
locked_recycler.check_shrink_interval_ms = std::u32::MAX;
// Set the count to one so that we shrink on every other allocation later.
locked_recycler.max_above_shrink_pct_count = max_above_shrink_pct_count;
locked_recycler.shrink_pct = shrink_pct;
}
let mut allocated_items = vec![];
for _ in 0..limit {
allocated_items.push(recycler.allocate().unwrap());
}
assert_eq!(
recycler.recycler.gc.lock().unwrap().total_allocated_count,
limit
);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), 0);
drop(allocated_items);
assert_eq!(recycler.recycler.gc.lock().unwrap().len(), limit as usize);
let shrink_interval = 10;
{
let mut locked_recycler = recycler.recycler.gc.lock().unwrap();
locked_recycler.check_shrink_interval_ms = shrink_interval;
}
let mut current_total_allocated_count =
recycler.recycler.gc.lock().unwrap().total_allocated_count;
// Shrink the recycler until it hits the minimum
let mut i = 0;
while current_total_allocated_count != DEFAULT_MINIMUM_OBJECT_COUNT {
sleep(Duration::from_millis(shrink_interval as u64 * 2));
recycler.allocate().unwrap();
let expected_above_shrink_pct_count = (i + 1) % max_above_shrink_pct_count;
assert_eq!(
recycler.recycler.gc.lock().unwrap().above_shrink_pct_count,
(i + 1) % max_above_shrink_pct_count
);
if expected_above_shrink_pct_count == 0 {
// Shrink happened, update the expected `current_total_allocated_count`;
current_total_allocated_count = std::cmp::max(
ObjectPool::<u64>::get_shrink_target(shrink_pct, current_total_allocated_count),
DEFAULT_MINIMUM_OBJECT_COUNT,
);
assert_eq!(
recycler.recycler.gc.lock().unwrap().total_allocated_count,
current_total_allocated_count
);
assert_eq!(
recycler.recycler.gc.lock().unwrap().len(),
current_total_allocated_count as usize
);
}
i += 1;
}
}
}

View File

@ -2,17 +2,24 @@ use crate::cuda_runtime::PinnedVec;
use crate::recycler::Recycler;
use crate::sigverify::TxOffset;
#[derive(Default, Clone)]
#[derive(Clone)]
pub struct RecyclerCache {
recycler_offsets: Recycler<TxOffset>,
recycler_buffer: Recycler<PinnedVec<u8>>,
}
impl RecyclerCache {
pub fn warmed() -> Self {
pub fn new(offsets_shrink_name: &'static str, buffer_shrink_name: &'static str) -> Self {
Self {
recycler_offsets: Recycler::warmed(50, 4096),
recycler_buffer: Recycler::warmed(50, 4096),
recycler_offsets: Recycler::new_without_limit(offsets_shrink_name),
recycler_buffer: Recycler::new_without_limit(buffer_shrink_name),
}
}
pub fn warmed(offsets_shrink_name: &'static str, buffer_shrink_name: &'static str) -> Self {
Self {
recycler_offsets: Recycler::warmed(50, 4096, None, offsets_shrink_name),
recycler_buffer: Recycler::warmed(50, 4096, None, buffer_shrink_name),
}
}
pub fn offsets(&self) -> &Recycler<TxOffset> {

View File

@ -194,13 +194,13 @@ fn get_packet_offsets(packet: &Packet, current_offset: u32) -> PacketOffsets {
pub fn generate_offsets(batches: &[Packets], recycler: &Recycler<TxOffset>) -> TxOffsets {
debug!("allocating..");
let mut signature_offsets: PinnedVec<_> = recycler.allocate("sig_offsets");
let mut signature_offsets: PinnedVec<_> = recycler.allocate().unwrap();
signature_offsets.set_pinnable();
let mut pubkey_offsets: PinnedVec<_> = recycler.allocate("pubkey_offsets");
let mut pubkey_offsets: PinnedVec<_> = recycler.allocate().unwrap();
pubkey_offsets.set_pinnable();
let mut msg_start_offsets: PinnedVec<_> = recycler.allocate("msg_start_offsets");
let mut msg_start_offsets: PinnedVec<_> = recycler.allocate().unwrap();
msg_start_offsets.set_pinnable();
let mut msg_sizes: PinnedVec<_> = recycler.allocate("msg_size_offsets");
let mut msg_sizes: PinnedVec<_> = recycler.allocate().unwrap();
msg_sizes.set_pinnable();
let mut current_packet = 0;
let mut v_sig_lens = Vec::new();
@ -347,7 +347,7 @@ pub fn ed25519_verify(
debug!("CUDA ECDSA for {}", batch_size(batches));
debug!("allocating out..");
let mut out = recycler_out.allocate("out_buffer");
let mut out = recycler_out.allocate().unwrap();
out.set_pinnable();
let mut elems = Vec::new();
let mut rvs = Vec::new();
@ -678,8 +678,8 @@ mod tests {
let batches = generate_packet_vec(&packet, n, 2);
let recycler = Recycler::default();
let recycler_out = Recycler::default();
let recycler = Recycler::new_without_limit("");
let recycler_out = Recycler::new_without_limit("");
// verify packets
let ans = sigverify::ed25519_verify(&batches, &recycler, &recycler_out);
@ -697,8 +697,8 @@ mod tests {
let batches = generate_packet_vec(&packet, 1, 1);
let recycler = Recycler::default();
let recycler_out = Recycler::default();
let recycler = Recycler::new_without_limit("");
let recycler_out = Recycler::new_without_limit("");
// verify packets
let ans = sigverify::ed25519_verify(&batches, &recycler, &recycler_out);
@ -735,8 +735,8 @@ mod tests {
batches[0].packets.push(packet);
let recycler = Recycler::default();
let recycler_out = Recycler::default();
let recycler = Recycler::new_without_limit("");
let recycler_out = Recycler::new_without_limit("");
// verify packets
let ans = sigverify::ed25519_verify(&batches, &recycler, &recycler_out);
@ -755,8 +755,8 @@ mod tests {
let tx = test_multisig_tx();
let packet = sigverify::make_packet_from_transaction(tx);
let recycler = Recycler::default();
let recycler_out = Recycler::default();
let recycler = Recycler::new_without_limit("");
let recycler_out = Recycler::new_without_limit("");
for _ in 0..50 {
let n = thread_rng().gen_range(1, 30);
let num_batches = thread_rng().gen_range(2, 30);