sendmmsg cleanup #18589
Rationalize usage of sendmmsg(2). Skip packets which failed to send and track failures.
This commit is contained in:
@ -23,7 +23,7 @@ use solana_poh::poh_recorder::WorkingBankEntry;
|
||||
use solana_runtime::{bank::Bank, bank_forks::BankForks};
|
||||
use solana_sdk::timing::timestamp;
|
||||
use solana_sdk::{clock::Slot, pubkey::Pubkey, signature::Keypair};
|
||||
use solana_streamer::sendmmsg::send_mmsg;
|
||||
use solana_streamer::sendmmsg::{batch_send, SendPktsError};
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
@ -402,10 +402,11 @@ pub fn broadcast_shreds(
|
||||
self_pubkey: Pubkey,
|
||||
bank_forks: &Arc<RwLock<BankForks>>,
|
||||
) -> Result<()> {
|
||||
let mut result = Ok(());
|
||||
let broadcast_len = cluster_nodes.num_peers();
|
||||
if broadcast_len == 0 {
|
||||
update_peer_stats(1, 1, last_datapoint_submit);
|
||||
return Ok(());
|
||||
return result;
|
||||
}
|
||||
let mut shred_select = Measure::start("shred_select");
|
||||
let root_bank = bank_forks.read().unwrap().root_bank();
|
||||
@ -414,24 +415,20 @@ pub fn broadcast_shreds(
|
||||
.filter_map(|shred| {
|
||||
let seed = shred.seed(Some(self_pubkey), &root_bank);
|
||||
let node = cluster_nodes.get_broadcast_peer(seed)?;
|
||||
Some((&shred.payload, &node.tvu))
|
||||
Some((&shred.payload[..], &node.tvu))
|
||||
})
|
||||
.collect();
|
||||
shred_select.stop();
|
||||
transmit_stats.shred_select += shred_select.as_us();
|
||||
|
||||
let mut sent = 0;
|
||||
let mut send_mmsg_time = Measure::start("send_mmsg");
|
||||
while sent < packets.len() {
|
||||
match send_mmsg(s, &packets[sent..]) {
|
||||
Ok(n) => sent += n,
|
||||
Err(e) => {
|
||||
return Err(Error::Io(e));
|
||||
}
|
||||
}
|
||||
if let Err(SendPktsError::IoError(ioerr, num_failed)) = batch_send(s, &packets[..]) {
|
||||
transmit_stats.dropped_packets += num_failed;
|
||||
result = Err(Error::Io(ioerr));
|
||||
}
|
||||
send_mmsg_time.stop();
|
||||
transmit_stats.send_mmsg_elapsed += send_mmsg_time.as_us();
|
||||
transmit_stats.total_packets += packets.len();
|
||||
|
||||
let num_live_peers = cluster_nodes.num_peers_live(timestamp()) as i64;
|
||||
update_peer_stats(
|
||||
@ -439,7 +436,7 @@ pub fn broadcast_shreds(
|
||||
broadcast_len as i64 + 1,
|
||||
last_datapoint_submit,
|
||||
);
|
||||
Ok(())
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -19,6 +19,8 @@ pub struct TransmitShredsStats {
|
||||
pub get_peers_elapsed: u64,
|
||||
pub shred_select: u64,
|
||||
pub num_shreds: usize,
|
||||
pub total_packets: usize,
|
||||
pub dropped_packets: usize,
|
||||
}
|
||||
|
||||
impl BroadcastStats for TransmitShredsStats {
|
||||
@ -28,6 +30,8 @@ impl BroadcastStats for TransmitShredsStats {
|
||||
self.get_peers_elapsed += new_stats.get_peers_elapsed;
|
||||
self.num_shreds += new_stats.num_shreds;
|
||||
self.shred_select += new_stats.shred_select;
|
||||
self.total_packets += new_stats.total_packets;
|
||||
self.dropped_packets += new_stats.dropped_packets;
|
||||
}
|
||||
fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
|
||||
datapoint_info!(
|
||||
@ -45,6 +49,8 @@ impl BroadcastStats for TransmitShredsStats {
|
||||
("get_peers_elapsed", self.get_peers_elapsed as i64, i64),
|
||||
("num_shreds", self.num_shreds as i64, i64),
|
||||
("shred_select", self.shred_select as i64, i64),
|
||||
("total_packets", self.total_packets as i64, i64),
|
||||
("dropped_packets", self.dropped_packets as i64, i64),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -173,6 +179,8 @@ mod test {
|
||||
send_mmsg_elapsed: 3,
|
||||
shred_select: 4,
|
||||
num_shreds: 5,
|
||||
total_packets: 6,
|
||||
dropped_packets: 7,
|
||||
},
|
||||
&Some(BroadcastShredBatchInfo {
|
||||
slot: 0,
|
||||
@ -190,14 +198,18 @@ mod test {
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.send_mmsg_elapsed, 3);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.shred_select, 4);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.num_shreds, 5);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.total_packets, 6);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.dropped_packets, 7);
|
||||
|
||||
slot_broadcast_stats.update(
|
||||
&TransmitShredsStats {
|
||||
transmit_elapsed: 7,
|
||||
get_peers_elapsed: 8,
|
||||
send_mmsg_elapsed: 9,
|
||||
shred_select: 10,
|
||||
num_shreds: 11,
|
||||
transmit_elapsed: 11,
|
||||
get_peers_elapsed: 12,
|
||||
send_mmsg_elapsed: 13,
|
||||
shred_select: 14,
|
||||
num_shreds: 15,
|
||||
total_packets: 16,
|
||||
dropped_packets: 17,
|
||||
},
|
||||
&None,
|
||||
);
|
||||
@ -211,6 +223,8 @@ mod test {
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.send_mmsg_elapsed, 3);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.shred_select, 4);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.num_shreds, 5);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.total_packets, 6);
|
||||
assert_eq!(slot_0_stats.broadcast_shred_stats.dropped_packets, 7);
|
||||
|
||||
// If another batch is given, then total number of batches == num_expected_batches == 2,
|
||||
// so the batch should be purged from the HashMap
|
||||
@ -221,6 +235,8 @@ mod test {
|
||||
send_mmsg_elapsed: 1,
|
||||
shred_select: 1,
|
||||
num_shreds: 1,
|
||||
total_packets: 1,
|
||||
dropped_packets: 1,
|
||||
},
|
||||
&Some(BroadcastShredBatchInfo {
|
||||
slot: 0,
|
||||
|
Reference in New Issue
Block a user