makes CrdsGossip thread-safe (#18615)

This commit is contained in:
behzad nouri
2021-07-14 22:27:17 +00:00
committed by GitHub
parent 9ed1f24188
commit cf31afdd6a
11 changed files with 477 additions and 471 deletions

View File

@@ -33,24 +33,20 @@ use {
struct Node {
keypair: Arc<Keypair>,
contact_info: ContactInfo,
gossip: Arc<Mutex<CrdsGossip>>,
gossip: Arc<CrdsGossip>,
ping_cache: Arc<Mutex<PingCache>>,
stake: u64,
}
impl Node {
fn new(
keypair: Arc<Keypair>,
contact_info: ContactInfo,
gossip: Arc<Mutex<CrdsGossip>>,
) -> Self {
fn new(keypair: Arc<Keypair>, contact_info: ContactInfo, gossip: Arc<CrdsGossip>) -> Self {
Self::staked(keypair, contact_info, gossip, 0)
}
fn staked(
keypair: Arc<Keypair>,
contact_info: ContactInfo,
gossip: Arc<Mutex<CrdsGossip>>,
gossip: Arc<CrdsGossip>,
stake: u64,
) -> Self {
let ping_cache = Arc::new(Mutex::new(PingCache::new(
@@ -67,14 +63,6 @@ impl Node {
}
}
impl Deref for Node {
type Target = Arc<Mutex<CrdsGossip>>;
fn deref(&self) -> &Self::Target {
&self.gossip
}
}
struct Network {
nodes: HashMap<Pubkey, Node>,
stake_pruned: u64,
@@ -116,17 +104,24 @@ fn star_network_create(num: usize) -> Network {
let node_keypair = Arc::new(Keypair::new());
let contact_info = ContactInfo::new_localhost(&node_keypair.pubkey(), 0);
let new = CrdsValue::new_unsigned(CrdsData::ContactInfo(contact_info.clone()));
let mut node = CrdsGossip::default();
node.crds.insert(new.clone(), timestamp()).unwrap();
node.crds.insert(entry.clone(), timestamp()).unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(Mutex::new(node)));
let node = CrdsGossip::default();
{
let mut node_crds = node.crds.write().unwrap();
node_crds.insert(new.clone(), timestamp()).unwrap();
node_crds.insert(entry.clone(), timestamp()).unwrap();
}
let node = Node::new(node_keypair, contact_info, Arc::new(node));
(new.label().pubkey(), node)
})
.collect();
let mut node = CrdsGossip::default();
let node = CrdsGossip::default();
let id = entry.label().pubkey();
node.crds.insert(entry, timestamp()).unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(Mutex::new(node)));
node.crds
.write()
.unwrap()
.insert(entry, timestamp())
.unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(node));
network.insert(id, node);
Network::new(network)
}
@@ -135,22 +130,36 @@ fn rstar_network_create(num: usize) -> Network {
let node_keypair = Arc::new(Keypair::new());
let contact_info = ContactInfo::new_localhost(&node_keypair.pubkey(), 0);
let entry = CrdsValue::new_unsigned(CrdsData::ContactInfo(contact_info.clone()));
let mut origin = CrdsGossip::default();
let origin = CrdsGossip::default();
let id = entry.label().pubkey();
origin.crds.insert(entry, timestamp()).unwrap();
origin
.crds
.write()
.unwrap()
.insert(entry, timestamp())
.unwrap();
let mut network: HashMap<_, _> = (1..num)
.map(|_| {
let node_keypair = Arc::new(Keypair::new());
let contact_info = ContactInfo::new_localhost(&node_keypair.pubkey(), 0);
let new = CrdsValue::new_unsigned(CrdsData::ContactInfo(contact_info.clone()));
let mut node = CrdsGossip::default();
node.crds.insert(new.clone(), timestamp()).unwrap();
origin.crds.insert(new.clone(), timestamp()).unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(Mutex::new(node)));
let node = CrdsGossip::default();
node.crds
.write()
.unwrap()
.insert(new.clone(), timestamp())
.unwrap();
origin
.crds
.write()
.unwrap()
.insert(new.clone(), timestamp())
.unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(node));
(new.label().pubkey(), node)
})
.collect();
let node = Node::new(node_keypair, contact_info, Arc::new(Mutex::new(origin)));
let node = Node::new(node_keypair, contact_info, Arc::new(origin));
network.insert(id, node);
Network::new(network)
}
@@ -161,9 +170,13 @@ fn ring_network_create(num: usize) -> Network {
let node_keypair = Arc::new(Keypair::new());
let contact_info = ContactInfo::new_localhost(&node_keypair.pubkey(), 0);
let new = CrdsValue::new_unsigned(CrdsData::ContactInfo(contact_info.clone()));
let mut node = CrdsGossip::default();
node.crds.insert(new.clone(), timestamp()).unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(Mutex::new(node)));
let node = CrdsGossip::default();
node.crds
.write()
.unwrap()
.insert(new.clone(), timestamp())
.unwrap();
let node = Node::new(node_keypair, contact_info, Arc::new(node));
(new.label().pubkey(), node)
})
.collect();
@@ -173,15 +186,12 @@ fn ring_network_create(num: usize) -> Network {
let start = &network[&keys[k]];
let start_id = keys[k];
let label = CrdsValueLabel::ContactInfo(start_id);
let gossip = start.gossip.lock().unwrap();
gossip.crds.get(&label).unwrap().value.clone()
let gossip_crds = start.gossip.crds.read().unwrap();
gossip_crds.get(&label).unwrap().value.clone()
};
let end = network.get_mut(&keys[(k + 1) % keys.len()]).unwrap();
end.lock()
.unwrap()
.crds
.insert(start_info, timestamp())
.unwrap();
let mut end_crds = end.gossip.crds.write().unwrap();
end_crds.insert(start_info, timestamp()).unwrap();
}
Network::new(network)
}
@@ -193,14 +203,13 @@ fn connected_staked_network_create(stakes: &[u64]) -> Network {
let node_keypair = Arc::new(Keypair::new());
let contact_info = ContactInfo::new_localhost(&node_keypair.pubkey(), 0);
let new = CrdsValue::new_unsigned(CrdsData::ContactInfo(contact_info.clone()));
let mut node = CrdsGossip::default();
node.crds.insert(new.clone(), timestamp()).unwrap();
let node = Node::staked(
node_keypair,
contact_info,
Arc::new(Mutex::new(node)),
stakes[n],
);
let node = CrdsGossip::default();
node.crds
.write()
.unwrap()
.insert(new.clone(), timestamp())
.unwrap();
let node = Node::staked(node_keypair, contact_info, Arc::new(node), stakes[n]);
(new.label().pubkey(), node)
})
.collect();
@@ -209,17 +218,18 @@ fn connected_staked_network_create(stakes: &[u64]) -> Network {
let start_entries: Vec<_> = keys
.iter()
.map(|k| {
let start = &network[k].lock().unwrap();
let start = &network[k];
let start_label = CrdsValueLabel::ContactInfo(*k);
start.crds.get(&start_label).unwrap().value.clone()
let gossip_crds = start.gossip.crds.read().unwrap();
gossip_crds.get(&start_label).unwrap().value.clone()
})
.collect();
for (end_pubkey, end) in network.iter_mut() {
let mut end_crds = end.gossip.crds.write().unwrap();
for k in 0..keys.len() {
let mut end = end.lock().unwrap();
if keys[k] != *end_pubkey {
let start_info = start_entries[k].clone();
end.crds.insert(start_info, timestamp()).unwrap();
end_crds.insert(start_info, timestamp()).unwrap();
}
}
}
@@ -247,7 +257,7 @@ fn network_simulator(thread_pool: &ThreadPool, network: &mut Network, max_conver
let network_values: Vec<Node> = network.values().cloned().collect();
network_values.par_iter().for_each(|node| {
let node_pubkey = node.keypair.pubkey();
node.lock().unwrap().refresh_push_active_set(
node.gossip.refresh_push_active_set(
&node_pubkey,
0, // shred version
&HashMap::new(), // stakes
@@ -262,14 +272,14 @@ fn network_simulator(thread_pool: &ThreadPool, network: &mut Network, max_conver
let now = (start * 100) as u64;
ts += 1000;
// push a message to the network
network_values.par_iter().for_each(|locked_node| {
let node_pubkey = locked_node.keypair.pubkey();
let node = &mut locked_node.lock().unwrap();
let label = CrdsValueLabel::ContactInfo(node_pubkey);
let entry = node.crds.get(&label).unwrap();
let mut m = entry.value.contact_info().cloned().unwrap();
network_values.par_iter().for_each(|node| {
let node_pubkey = node.keypair.pubkey();
let mut m = {
let node_crds = node.gossip.crds.read().unwrap();
node_crds.get_contact_info(node_pubkey).cloned().unwrap()
};
m.wallclock = now;
node.process_push_message(
node.gossip.process_push_message(
&Pubkey::default(),
vec![CrdsValue::new_unsigned(CrdsData::ContactInfo(m))],
now,
@@ -321,14 +331,13 @@ fn network_run_push(
.par_iter()
.map(|node| {
let node_pubkey = node.keypair.pubkey();
let mut node_lock = node.lock().unwrap();
let timeouts = node_lock.make_timeouts(
let timeouts = node.gossip.make_timeouts(
node_pubkey,
&HashMap::default(), // stakes
Duration::from_millis(node_lock.pull.crds_timeout),
Duration::from_millis(node.gossip.pull.crds_timeout),
);
node_lock.purge(&node_pubkey, thread_pool, now, &timeouts);
(node_pubkey, node_lock.new_push_messages(vec![], now))
node.gossip.purge(&node_pubkey, thread_pool, now, &timeouts);
(node_pubkey, node.gossip.new_push_messages(vec![], now))
})
.collect();
let transfered: Vec<_> = requests
@@ -344,8 +353,7 @@ fn network_run_push(
let origins: HashSet<_> = network
.get(&to)
.unwrap()
.lock()
.unwrap()
.gossip
.process_push_message(&from, msgs.clone(), now)
.into_iter()
.collect();
@@ -353,11 +361,8 @@ fn network_run_push(
.get(&to)
.map(|node| {
let node_pubkey = node.keypair.pubkey();
node.lock().unwrap().prune_received_cache(
&node_pubkey,
origins,
&stakes,
)
node.gossip
.prune_received_cache(&node_pubkey, origins, &stakes)
})
.unwrap();
@@ -374,18 +379,18 @@ fn network_run_push(
.get(&from)
.map(|node| {
let node_pubkey = node.keypair.pubkey();
let node = node.lock().unwrap();
let destination = node_pubkey;
let now = timestamp();
node.process_prune_msg(
&node_pubkey,
&to,
&destination,
&prune_keys,
now,
now,
)
.unwrap()
node.gossip
.process_prune_msg(
&node_pubkey,
&to,
&destination,
&prune_keys,
now,
now,
)
.unwrap()
})
.unwrap();
}
@@ -410,7 +415,7 @@ fn network_run_push(
if now % CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS == 0 && now > 0 {
network_values.par_iter().for_each(|node| {
let node_pubkey = node.keypair.pubkey();
node.lock().unwrap().refresh_push_active_set(
node.gossip.refresh_push_active_set(
&node_pubkey,
0, // shred version
&HashMap::new(), // stakes
@@ -420,10 +425,7 @@ fn network_run_push(
}
total = network_values
.par_iter()
.map(|node| {
let gossip = node.gossip.lock().unwrap();
gossip.push.num_pending(&gossip.crds)
})
.map(|node| node.gossip.push.num_pending(&node.gossip.crds))
.sum();
trace!(
"network_run_push_{}: now: {} queue: {} bytes: {} num_msgs: {} prunes: {} stake_pruned: {} delivered: {}",
@@ -477,8 +479,7 @@ fn network_run_pull(
.filter_map(|from| {
let mut pings = Vec::new();
let (peer, filters) = from
.lock()
.unwrap()
.gossip
.new_pull_request(
thread_pool,
from.keypair.deref(),
@@ -492,9 +493,9 @@ fn network_run_pull(
)
.ok()?;
let from_pubkey = from.keypair.pubkey();
let gossip = from.gossip.lock().unwrap();
let label = CrdsValueLabel::ContactInfo(from_pubkey);
let self_info = gossip.crds.get(&label).unwrap().value.clone();
let gossip_crds = from.gossip.crds.read().unwrap();
let self_info = gossip_crds.get(&label).unwrap().value.clone();
Some((peer.id, filters, self_info))
})
.collect()
@@ -520,8 +521,7 @@ fn network_run_pull(
.get(&to)
.map(|node| {
let rsp = node
.lock()
.unwrap()
.gossip
.generate_pull_responses(
&filters,
/*output_size_limit=*/ usize::MAX,
@@ -530,7 +530,7 @@ fn network_run_pull(
.into_iter()
.flatten()
.collect();
node.lock().unwrap().process_pull_requests(
node.gossip.process_pull_requests(
filters.into_iter().map(|(caller, _)| caller),
now,
);
@@ -540,12 +540,12 @@ fn network_run_pull(
bytes += serialized_size(&rsp).unwrap() as usize;
msgs += rsp.len();
if let Some(node) = network.get(&from) {
let mut node = node.lock().unwrap();
node.mark_pull_request_creation_time(from, now);
node.gossip.mark_pull_request_creation_time(from, now);
let mut stats = ProcessPullStats::default();
let (vers, vers_expired_timeout, failed_inserts) =
node.filter_pull_responses(&timeouts, rsp, now, &mut stats);
node.process_pull_responses(
let (vers, vers_expired_timeout, failed_inserts) = node
.gossip
.filter_pull_responses(&timeouts, rsp, now, &mut stats);
node.gossip.process_pull_responses(
&from,
vers,
vers_expired_timeout,
@@ -566,7 +566,7 @@ fn network_run_pull(
}
let total: usize = network_values
.par_iter()
.map(|v| v.lock().unwrap().crds.len())
.map(|v| v.gossip.crds.read().unwrap().len())
.sum();
convergance = total as f64 / ((num * num) as f64);
if convergance > max_convergance {
@@ -692,12 +692,14 @@ fn test_star_network_large_push() {
}
#[test]
fn test_prune_errors() {
let mut crds_gossip = CrdsGossip::default();
let crds_gossip = CrdsGossip::default();
let id = Pubkey::new(&[0; 32]);
let ci = ContactInfo::new_localhost(&Pubkey::new(&[1; 32]), 0);
let prune_pubkey = Pubkey::new(&[2; 32]);
crds_gossip
.crds
.write()
.unwrap()
.insert(
CrdsValue::new_unsigned(CrdsData::ContactInfo(ci.clone())),
0,

View File

@@ -321,11 +321,10 @@ pub fn cluster_info_scale() {
.iter()
.filter(|v| v.message.account_keys == tx.message.account_keys)
.count();
let gossip = node.gossip.read().unwrap();
num_old += gossip.push.num_old.load(Ordering::Relaxed);
num_push_total += gossip.push.num_total.load(Ordering::Relaxed);
num_pushes += gossip.push.num_pushes.load(Ordering::Relaxed);
num_pulls += gossip.pull.num_pulls.load(Ordering::Relaxed);
num_old += node.gossip.push.num_old.load(Ordering::Relaxed);
num_push_total += node.gossip.push.num_total.load(Ordering::Relaxed);
num_pushes += node.gossip.push.num_pushes.load(Ordering::Relaxed);
num_pulls += node.gossip.pull.num_pulls.load(Ordering::Relaxed);
if has_tx == 0 {
not_done += 1;
}
@@ -348,11 +347,10 @@ pub fn cluster_info_scale() {
);
sleep(Duration::from_millis(200));
for (node, _, _) in nodes.iter() {
let gossip = node.gossip.read().unwrap();
gossip.push.num_old.store(0, Ordering::Relaxed);
gossip.push.num_total.store(0, Ordering::Relaxed);
gossip.push.num_pushes.store(0, Ordering::Relaxed);
gossip.pull.num_pulls.store(0, Ordering::Relaxed);
node.gossip.push.num_old.store(0, Ordering::Relaxed);
node.gossip.push.num_total.store(0, Ordering::Relaxed);
node.gossip.push.num_pushes.store(0, Ordering::Relaxed);
node.gossip.pull.num_pulls.store(0, Ordering::Relaxed);
}
}