Remove unnecessary locking in retransmit stage (#6276)

* Add more detailed metrics to retransmit

* Remove unnecessary locking and add more metrics
This commit is contained in:
Sagar Dhawan
2019-10-08 14:41:16 -07:00
committed by GitHub
parent baf4e767e1
commit 723f9a9b81
4 changed files with 34 additions and 27 deletions

View File

@ -532,7 +532,7 @@ impl ClusterInfo {
/// Return sorted Retransmit peers and index of `Self.id()` as if it were in that list
pub fn shuffle_peers_and_index(
&self,
id: &Pubkey,
peers: &[ContactInfo],
stakes_and_index: &[(u64, usize)],
rng: ChaChaRng,
@ -543,7 +543,7 @@ impl ClusterInfo {
.iter()
.enumerate()
.for_each(|(i, (_stake, index))| {
if peers[*index].id == self.id() {
if &peers[*index].id == id {
self_index = i;
}
});
@ -737,25 +737,20 @@ impl ClusterInfo {
/// # Remarks
/// We need to avoid having obj locked while doing a io, such as the `send_to`
pub fn retransmit_to(
obj: &Arc<RwLock<Self>>,
id: &Pubkey,
peers: &[&ContactInfo],
packet: &Packet,
slot_leader_pubkey: Option<Pubkey>,
s: &UdpSocket,
forwarded: bool,
) -> Result<()> {
let (me, orders): (ContactInfo, &[&ContactInfo]) = {
// copy to avoid locking during IO
let s = obj.read().unwrap();
(s.my_data().clone(), peers)
};
trace!("retransmit orders {}", orders.len());
let errs: Vec<_> = orders
trace!("retransmit orders {}", peers.len());
let errs: Vec<_> = peers
.par_iter()
.filter(|v| v.id != slot_leader_pubkey.unwrap_or_default())
.map(|v| {
let dest = if forwarded { &v.tvu_forwards } else { &v.tvu };
debug!("{}: retransmit packet to {} {}", me.id, v.id, *dest,);
debug!("{}: retransmit packet to {} {}", id, v.id, *dest,);
s.send_to(&packet.data, dest)
})
.collect();

View File

@ -41,8 +41,6 @@ pub fn retransmit(
packet_v.push(nq);
}
datapoint_debug!("retransmit-stage", ("count", total_packets, i64));
let r_bank = bank_forks.read().unwrap().working_bank();
let bank_epoch = r_bank.get_stakers_epoch(r_bank.slot());
let mut peers_len = 0;
@ -51,15 +49,18 @@ pub fn retransmit(
.read()
.unwrap()
.sorted_retransmit_peers_and_stakes(stakes.as_ref());
let me = cluster_info.read().unwrap().my_data().clone();
let mut retransmit_total = 0;
let mut compute_turbine_peers_total = 0;
for packets in packet_v {
for packet in &packets.packets {
let (my_index, mut shuffled_stakes_and_index) =
cluster_info.read().unwrap().shuffle_peers_and_index(
&peers,
&stakes_and_index,
ChaChaRng::from_seed(packet.meta.seed),
);
let mut compute_turbine_peers = Measure::start("turbine_start");
let (my_index, mut shuffled_stakes_and_index) = ClusterInfo::shuffle_peers_and_index(
&me.id,
&peers,
&stakes_and_index,
ChaChaRng::from_seed(packet.meta.seed),
);
peers_len = cmp::max(peers_len, shuffled_stakes_and_index.len());
shuffled_stakes_and_index.remove(my_index);
// split off the indexes, we don't need the stakes anymore
@ -72,28 +73,37 @@ pub fn retransmit(
compute_retransmit_peers(DATA_PLANE_FANOUT, my_index, indexes);
let neighbors: Vec<_> = neighbors.into_iter().map(|index| &peers[index]).collect();
let children: Vec<_> = children.into_iter().map(|index| &peers[index]).collect();
compute_turbine_peers.stop();
compute_turbine_peers_total += compute_turbine_peers.as_ms();
let leader =
leader_schedule_cache.slot_leader_at(packet.meta.slot, Some(r_bank.as_ref()));
let mut retransmit_time = Measure::start("retransmit_to");
if !packet.meta.forward {
ClusterInfo::retransmit_to(&cluster_info, &neighbors, packet, leader, sock, true)?;
ClusterInfo::retransmit_to(&cluster_info, &children, packet, leader, sock, false)?;
ClusterInfo::retransmit_to(&me.id, &neighbors, packet, leader, sock, true)?;
ClusterInfo::retransmit_to(&me.id, &children, packet, leader, sock, false)?;
} else {
ClusterInfo::retransmit_to(&cluster_info, &children, packet, leader, sock, true)?;
ClusterInfo::retransmit_to(&me.id, &children, packet, leader, sock, true)?;
}
retransmit_time.stop();
retransmit_total += retransmit_time.as_us();
retransmit_total += retransmit_time.as_ms();
}
}
timer_start.stop();
debug!(
"retransmitted {} packets in {}us retransmit_time: {}us",
"retransmitted {} packets in {}ms retransmit_time: {}ms",
total_packets,
timer_start.as_us(),
timer_start.as_ms(),
retransmit_total
);
datapoint_debug!("cluster_info-num_nodes", ("count", peers_len, i64));
datapoint_debug!(
"retransmit-stage",
("total_time", timer_start.as_ms() as i64, i64),
("total_packets", total_packets as i64, i64),
("retransmit_total", retransmit_total as i64, i64),
("compute_turbine", compute_turbine_peers_total as i64, i64),
);
Ok(())
}

View File

@ -116,7 +116,8 @@ fn run_simulation(stakes: &[u64], fanout: usize) {
seed[0..4].copy_from_slice(&i.to_le_bytes());
let (peers, stakes_and_index) =
cluster_info.sorted_retransmit_peers_and_stakes(Some(&staked_nodes));
let (_, shuffled_stakes_and_indexes) = cluster_info.shuffle_peers_and_index(
let (_, shuffled_stakes_and_indexes) = ClusterInfo::shuffle_peers_and_index(
&cluster_info.id(),
&peers,
&stakes_and_index,
ChaChaRng::from_seed(seed),

View File

@ -177,8 +177,9 @@ pub fn cluster_info_retransmit() -> result::Result<()> {
let mut p = Packet::default();
p.meta.size = 10;
let peers = c1.read().unwrap().retransmit_peers();
let self_id = c1.read().unwrap().id();
let retransmit_peers: Vec<_> = peers.iter().collect();
ClusterInfo::retransmit_to(&c1, &retransmit_peers, &p, None, &tn1, false)?;
ClusterInfo::retransmit_to(&self_id, &retransmit_peers, &p, None, &tn1, false)?;
let res: Vec<_> = [tn1, tn2, tn3]
.into_par_iter()
.map(|s| {