allows only one thread to update cluster-nodes cache entry for an epoch

If two threads simultaneously call into ClusterNodesCache::get for the
same epoch, and the cache entry is outdated, then both threads recompute
cluster-nodes for the epoch and redundantly overwrite each other.

This commit wraps ClusterNodesCache entries in Arc<Mutex<...>>, so that
when needed only one thread does the computations to update the entry.
This commit is contained in:
behzad nouri
2021-08-01 13:46:03 -04:00
parent fb69f45f14
commit eaf927cf49

View File

@ -18,6 +18,7 @@ use {
cmp::Reverse, cmp::Reverse,
collections::HashMap, collections::HashMap,
marker::PhantomData, marker::PhantomData,
ops::Deref,
sync::{Arc, Mutex}, sync::{Arc, Mutex},
time::{Duration, Instant}, time::{Duration, Instant},
}, },
@ -47,9 +48,12 @@ pub struct ClusterNodes<T> {
_phantom: PhantomData<T>, _phantom: PhantomData<T>,
} }
type CacheEntry<T> = Option<(/*as of:*/ Instant, Arc<ClusterNodes<T>>)>;
pub(crate) struct ClusterNodesCache<T> { pub(crate) struct ClusterNodesCache<T> {
#[allow(clippy::type_complexity)] // Cache entries are wrapped in Arc<Mutex<...>>, so that, when needed, only
cache: Mutex<LruCache<Epoch, (Instant, Arc<ClusterNodes<T>>)>>, // one thread does the computations to update the entry for the epoch.
cache: Mutex<LruCache<Epoch, Arc<Mutex<CacheEntry<T>>>>>,
ttl: Duration, // Time to live. ttl: Duration, // Time to live.
} }
@ -241,6 +245,18 @@ impl<T> ClusterNodesCache<T> {
} }
impl<T: 'static> ClusterNodesCache<T> { impl<T: 'static> ClusterNodesCache<T> {
fn get_cache_entry(&self, epoch: Epoch) -> Arc<Mutex<CacheEntry<T>>> {
let mut cache = self.cache.lock().unwrap();
match cache.get(&epoch) {
Some(entry) => Arc::clone(entry),
None => {
let entry = Arc::default();
cache.put(epoch, Arc::clone(&entry));
entry
}
}
}
pub(crate) fn get( pub(crate) fn get(
&self, &self,
shred_slot: Slot, shred_slot: Slot,
@ -248,13 +264,13 @@ impl<T: 'static> ClusterNodesCache<T> {
cluster_info: &ClusterInfo, cluster_info: &ClusterInfo,
) -> Arc<ClusterNodes<T>> { ) -> Arc<ClusterNodes<T>> {
let epoch = root_bank.get_leader_schedule_epoch(shred_slot); let epoch = root_bank.get_leader_schedule_epoch(shred_slot);
{ let entry = self.get_cache_entry(epoch);
let mut cache = self.cache.lock().unwrap(); // Hold the lock on the entry here so that, if needed, only
if let Some((asof, nodes)) = cache.get(&epoch) { // one thread recomputes cluster-nodes for this epoch.
if asof.elapsed() < self.ttl { let mut entry = entry.lock().unwrap();
return Arc::clone(nodes); if let Some((asof, nodes)) = entry.deref() {
} if asof.elapsed() < self.ttl {
cache.pop(&epoch); return Arc::clone(nodes);
} }
} }
let epoch_staked_nodes = root_bank.epoch_staked_nodes(epoch); let epoch_staked_nodes = root_bank.epoch_staked_nodes(epoch);
@ -269,10 +285,7 @@ impl<T: 'static> ClusterNodesCache<T> {
cluster_info, cluster_info,
&epoch_staked_nodes.unwrap_or_default(), &epoch_staked_nodes.unwrap_or_default(),
)); ));
{ *entry = Some((Instant::now(), Arc::clone(&nodes)));
let mut cache = self.cache.lock().unwrap();
cache.put(epoch, (Instant::now(), Arc::clone(&nodes)));
}
nodes nodes
} }
} }