Optimize broadcast cluster_info critical section (#9327)
This commit is contained in:
@ -4,6 +4,9 @@ use self::{
|
||||
fail_entry_verification_broadcast_run::FailEntryVerificationBroadcastRun,
|
||||
standard_broadcast_run::StandardBroadcastRun,
|
||||
};
|
||||
use crate::contact_info::ContactInfo;
|
||||
use crate::crds_gossip_pull::CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS;
|
||||
use crate::weighted_shuffle::weighted_best;
|
||||
use crate::{
|
||||
cluster_info::{ClusterInfo, ClusterInfoError},
|
||||
poh_recorder::WorkingBankEntry,
|
||||
@ -14,9 +17,13 @@ use crossbeam_channel::{
|
||||
Sender as CrossbeamSender,
|
||||
};
|
||||
use solana_ledger::{blockstore::Blockstore, shred::Shred, staking_utils};
|
||||
use solana_measure::measure::Measure;
|
||||
use solana_metrics::{inc_new_counter_error, inc_new_counter_info};
|
||||
use solana_runtime::bank::Bank;
|
||||
use solana_sdk::timing::duration_as_s;
|
||||
use solana_sdk::timing::timestamp;
|
||||
use solana_sdk::{clock::Slot, pubkey::Pubkey};
|
||||
use solana_streamer::sendmmsg::send_mmsg;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
net::UdpSocket,
|
||||
@ -328,6 +335,84 @@ impl BroadcastStage {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_peer_stats(num_live_peers: i64, broadcast_len: i64, last_datapoint_submit: &mut Instant) {
|
||||
if duration_as_s(&Instant::now().duration_since(*last_datapoint_submit)) >= 1.0 {
|
||||
datapoint_info!(
|
||||
"cluster_info-num_nodes",
|
||||
("live_count", num_live_peers, i64),
|
||||
("broadcast_count", broadcast_len, i64)
|
||||
);
|
||||
*last_datapoint_submit = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_broadcast_peers<S: std::hash::BuildHasher>(
|
||||
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
||||
stakes: Option<Arc<HashMap<Pubkey, u64, S>>>,
|
||||
) -> (Vec<ContactInfo>, Vec<(u64, usize)>) {
|
||||
use crate::cluster_info;
|
||||
let mut peers = cluster_info.read().unwrap().tvu_peers();
|
||||
let peers_and_stakes = cluster_info::stake_weight_peers(&mut peers, stakes);
|
||||
(peers, peers_and_stakes)
|
||||
}
|
||||
|
||||
/// broadcast messages from the leader to layer 1 nodes
|
||||
/// # Remarks
|
||||
pub fn broadcast_shreds(
|
||||
s: &UdpSocket,
|
||||
shreds: &Arc<Vec<Shred>>,
|
||||
peers_and_stakes: &[(u64, usize)],
|
||||
peers: &[ContactInfo],
|
||||
last_datapoint_submit: &mut Instant,
|
||||
send_mmsg_total: &mut u64,
|
||||
) -> Result<()> {
|
||||
let broadcast_len = peers_and_stakes.len();
|
||||
if broadcast_len == 0 {
|
||||
update_peer_stats(1, 1, last_datapoint_submit);
|
||||
return Ok(());
|
||||
}
|
||||
let packets: Vec<_> = shreds
|
||||
.iter()
|
||||
.map(|shred| {
|
||||
let broadcast_index = weighted_best(&peers_and_stakes, shred.seed());
|
||||
|
||||
(&shred.payload, &peers[broadcast_index].tvu)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut sent = 0;
|
||||
let mut send_mmsg_time = Measure::start("send_mmsg");
|
||||
while sent < packets.len() {
|
||||
match send_mmsg(s, &packets[sent..]) {
|
||||
Ok(n) => sent += n,
|
||||
Err(e) => {
|
||||
return Err(Error::IO(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
send_mmsg_time.stop();
|
||||
*send_mmsg_total += send_mmsg_time.as_us();
|
||||
|
||||
let num_live_peers = num_live_peers(&peers);
|
||||
update_peer_stats(
|
||||
num_live_peers,
|
||||
broadcast_len as i64 + 1,
|
||||
last_datapoint_submit,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn num_live_peers(peers: &[ContactInfo]) -> i64 {
|
||||
let mut num_live_peers = 1i64;
|
||||
peers.iter().for_each(|p| {
|
||||
// A peer is considered live if they generated their contact info recently
|
||||
if timestamp() - p.wallclock <= CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS {
|
||||
num_live_peers += 1;
|
||||
}
|
||||
});
|
||||
num_live_peers
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use super::*;
|
||||
|
@ -78,16 +78,19 @@ impl BroadcastRun for FailEntryVerificationBroadcastRun {
|
||||
sock: &UdpSocket,
|
||||
) -> Result<()> {
|
||||
let (stakes, shreds) = receiver.lock().unwrap().recv()?;
|
||||
let all_seeds: Vec<[u8; 32]> = shreds.iter().map(|s| s.seed()).collect();
|
||||
// Broadcast data
|
||||
let all_shred_bufs: Vec<Vec<u8>> = shreds.to_vec().into_iter().map(|s| s.payload).collect();
|
||||
cluster_info.read().unwrap().broadcast_shreds(
|
||||
let (peers, peers_and_stakes) = get_broadcast_peers(cluster_info, stakes);
|
||||
|
||||
let mut send_mmsg_total = 0;
|
||||
broadcast_shreds(
|
||||
sock,
|
||||
all_shred_bufs,
|
||||
&all_seeds,
|
||||
stakes,
|
||||
&shreds,
|
||||
&peers_and_stakes,
|
||||
&peers,
|
||||
&mut Instant::now(),
|
||||
&mut send_mmsg_total,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn record(
|
||||
|
@ -17,20 +17,17 @@ struct BroadcastStats {
|
||||
broadcast_elapsed: u64,
|
||||
receive_elapsed: u64,
|
||||
seed_elapsed: u64,
|
||||
send_mmsg_elapsed: u64,
|
||||
}
|
||||
|
||||
impl BroadcastStats {
|
||||
fn reset(&mut self) {
|
||||
self.insert_shreds_elapsed = 0;
|
||||
self.shredding_elapsed = 0;
|
||||
self.broadcast_elapsed = 0;
|
||||
self.receive_elapsed = 0;
|
||||
self.seed_elapsed = 0;
|
||||
*self = Self::default();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct StandardBroadcastRun {
|
||||
pub struct StandardBroadcastRun {
|
||||
stats: Arc<RwLock<BroadcastStats>>,
|
||||
unfinished_slot: Option<UnfinishedSlotInfo>,
|
||||
current_slot_and_parent: Option<(u64, u64)>,
|
||||
@ -258,20 +255,22 @@ impl StandardBroadcastRun {
|
||||
shreds: Arc<Vec<Shred>>,
|
||||
) -> Result<()> {
|
||||
let seed_start = Instant::now();
|
||||
let seeds: Vec<[u8; 32]> = shreds.iter().map(|s| s.seed()).collect();
|
||||
let seed_elapsed = seed_start.elapsed();
|
||||
|
||||
// Broadcast the shreds
|
||||
let broadcast_start = Instant::now();
|
||||
let shred_bufs: Vec<Vec<u8>> = shreds.to_vec().into_iter().map(|s| s.payload).collect();
|
||||
trace!("Broadcasting {:?} shreds", shred_bufs.len());
|
||||
trace!("Broadcasting {:?} shreds", shreds.len());
|
||||
|
||||
cluster_info.read().unwrap().broadcast_shreds(
|
||||
let (peers, peers_and_stakes) = get_broadcast_peers(cluster_info, stakes);
|
||||
|
||||
let mut send_mmsg_total = 0;
|
||||
broadcast_shreds(
|
||||
sock,
|
||||
shred_bufs,
|
||||
&seeds,
|
||||
stakes,
|
||||
&shreds,
|
||||
&peers_and_stakes,
|
||||
&peers,
|
||||
&mut self.last_datapoint_submit,
|
||||
&mut send_mmsg_total,
|
||||
)?;
|
||||
|
||||
let broadcast_elapsed = broadcast_start.elapsed();
|
||||
@ -279,6 +278,7 @@ impl StandardBroadcastRun {
|
||||
self.update_broadcast_stats(BroadcastStats {
|
||||
broadcast_elapsed: duration_as_us(&broadcast_elapsed),
|
||||
seed_elapsed: duration_as_us(&seed_elapsed),
|
||||
send_mmsg_elapsed: send_mmsg_total,
|
||||
..BroadcastStats::default()
|
||||
});
|
||||
Ok(())
|
||||
@ -291,6 +291,7 @@ impl StandardBroadcastRun {
|
||||
wstats.insert_shreds_elapsed += stats.insert_shreds_elapsed;
|
||||
wstats.broadcast_elapsed += stats.broadcast_elapsed;
|
||||
wstats.seed_elapsed += stats.seed_elapsed;
|
||||
wstats.send_mmsg_elapsed += stats.send_mmsg_elapsed;
|
||||
}
|
||||
|
||||
fn report_and_reset_stats(&mut self) {
|
||||
@ -303,6 +304,7 @@ impl StandardBroadcastRun {
|
||||
("insertion_time", stats.insert_shreds_elapsed as i64, i64),
|
||||
("broadcast_time", stats.broadcast_elapsed as i64, i64),
|
||||
("receive_time", stats.receive_elapsed as i64, i64),
|
||||
("send_mmsg", stats.send_mmsg_elapsed as i64, i64),
|
||||
("seed", stats.seed_elapsed as i64, i64),
|
||||
(
|
||||
"num_shreds",
|
||||
|
@ -22,7 +22,7 @@ use crate::{
|
||||
},
|
||||
epoch_slots::EpochSlots,
|
||||
result::{Error, Result},
|
||||
weighted_shuffle::{weighted_best, weighted_shuffle},
|
||||
weighted_shuffle::weighted_shuffle,
|
||||
};
|
||||
use bincode::{serialize, serialized_size};
|
||||
use core::cmp;
|
||||
@ -43,7 +43,6 @@ use solana_perf::packet::{
|
||||
};
|
||||
use solana_rayon_threadlimit::get_thread_count;
|
||||
use solana_sdk::hash::Hash;
|
||||
use solana_sdk::timing::duration_as_s;
|
||||
use solana_sdk::{
|
||||
clock::{Slot, DEFAULT_MS_PER_SLOT, DEFAULT_SLOTS_PER_EPOCH},
|
||||
pubkey::Pubkey,
|
||||
@ -51,7 +50,7 @@ use solana_sdk::{
|
||||
timing::{duration_as_ms, timestamp},
|
||||
transaction::Transaction,
|
||||
};
|
||||
use solana_streamer::sendmmsg::{multicast, send_mmsg};
|
||||
use solana_streamer::sendmmsg::multicast;
|
||||
use solana_streamer::streamer::{PacketReceiver, PacketSender};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
@ -934,77 +933,6 @@ impl ClusterInfo {
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn sorted_tvu_peers_and_stakes(
|
||||
&self,
|
||||
stakes: Option<Arc<HashMap<Pubkey, u64>>>,
|
||||
) -> (Vec<ContactInfo>, Vec<(u64, usize)>) {
|
||||
let mut peers = self.tvu_peers();
|
||||
peers.dedup();
|
||||
let stakes_and_index = ClusterInfo::sorted_stakes_with_index(&peers, stakes);
|
||||
(peers, stakes_and_index)
|
||||
}
|
||||
|
||||
/// broadcast messages from the leader to layer 1 nodes
|
||||
/// # Remarks
|
||||
pub fn broadcast_shreds(
|
||||
&self,
|
||||
s: &UdpSocket,
|
||||
shreds: Vec<Vec<u8>>,
|
||||
seeds: &[[u8; 32]],
|
||||
stakes: Option<Arc<HashMap<Pubkey, u64>>>,
|
||||
last_datapoint_submit: &mut Instant,
|
||||
) -> Result<()> {
|
||||
let (peers, peers_and_stakes) = self.sorted_tvu_peers_and_stakes(stakes);
|
||||
let broadcast_len = peers_and_stakes.len();
|
||||
if broadcast_len == 0 {
|
||||
if duration_as_s(&Instant::now().duration_since(*last_datapoint_submit)) >= 1.0 {
|
||||
datapoint_info!(
|
||||
"cluster_info-num_nodes",
|
||||
("live_count", 1, i64),
|
||||
("broadcast_count", 1, i64)
|
||||
);
|
||||
*last_datapoint_submit = Instant::now();
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
let mut packets: Vec<_> = shreds
|
||||
.into_iter()
|
||||
.zip(seeds)
|
||||
.map(|(shred, seed)| {
|
||||
let broadcast_index = weighted_best(&peers_and_stakes, *seed);
|
||||
|
||||
(shred, &peers[broadcast_index].tvu)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut sent = 0;
|
||||
while sent < packets.len() {
|
||||
match send_mmsg(s, &mut packets[sent..]) {
|
||||
Ok(n) => sent += n,
|
||||
Err(e) => {
|
||||
return Err(Error::IO(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut num_live_peers = 1i64;
|
||||
peers.iter().for_each(|p| {
|
||||
// A peer is considered live if they generated their contact info recently
|
||||
if timestamp() - p.wallclock <= CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS {
|
||||
num_live_peers += 1;
|
||||
}
|
||||
});
|
||||
if duration_as_s(&Instant::now().duration_since(*last_datapoint_submit)) >= 1.0 {
|
||||
datapoint_info!(
|
||||
"cluster_info-num_nodes",
|
||||
("live_count", num_live_peers, i64),
|
||||
("broadcast_count", broadcast_len + 1, i64)
|
||||
);
|
||||
*last_datapoint_submit = Instant::now();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// retransmit messages to a list of nodes
|
||||
/// # Remarks
|
||||
/// We need to avoid having obj locked while doing a io, such as the `send_to`
|
||||
@ -1942,6 +1870,14 @@ fn report_time_spent(label: &str, time: &Duration, extra: &str) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stake_weight_peers<S: std::hash::BuildHasher>(
|
||||
peers: &mut Vec<ContactInfo>,
|
||||
stakes: Option<Arc<HashMap<Pubkey, u64, S>>>,
|
||||
) -> Vec<(u64, usize)> {
|
||||
peers.dedup();
|
||||
ClusterInfo::sorted_stakes_with_index(peers, stakes)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -2480,7 +2416,8 @@ mod tests {
|
||||
stakes.insert(id4, 10);
|
||||
|
||||
let stakes = Arc::new(stakes);
|
||||
let (peers, peers_and_stakes) = cluster_info.sorted_tvu_peers_and_stakes(Some(stakes));
|
||||
let mut peers = cluster_info.tvu_peers();
|
||||
let peers_and_stakes = stake_weight_peers(&mut peers, Some(stakes));
|
||||
assert_eq!(peers.len(), 2);
|
||||
assert_eq!(peers[0].id, id);
|
||||
assert_eq!(peers[1].id, id2);
|
||||
|
Reference in New Issue
Block a user