Separate out interrupted slots broadcast metrics (#20537)
This commit is contained in:
@ -89,6 +89,7 @@ impl BroadcastRun for BroadcastFakeShredsRun {
|
|||||||
slot,
|
slot,
|
||||||
num_expected_batches: None,
|
num_expected_batches: None,
|
||||||
slot_start_ts: Instant::now(),
|
slot_start_ts: Instant::now(),
|
||||||
|
was_interrupted: false,
|
||||||
};
|
};
|
||||||
// 3) Start broadcast step
|
// 3) Start broadcast step
|
||||||
//some indicates fake shreds
|
//some indicates fake shreds
|
||||||
|
@ -2,7 +2,7 @@ use super::*;
|
|||||||
|
|
||||||
pub(crate) trait BroadcastStats {
|
pub(crate) trait BroadcastStats {
|
||||||
fn update(&mut self, new_stats: &Self);
|
fn update(&mut self, new_stats: &Self);
|
||||||
fn report_stats(&mut self, slot: Slot, slot_start: Instant);
|
fn report_stats(&mut self, slot: Slot, slot_start: Instant, was_interrupted: bool);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -10,6 +10,7 @@ pub(crate) struct BroadcastShredBatchInfo {
|
|||||||
pub(crate) slot: Slot,
|
pub(crate) slot: Slot,
|
||||||
pub(crate) num_expected_batches: Option<usize>,
|
pub(crate) num_expected_batches: Option<usize>,
|
||||||
pub(crate) slot_start_ts: Instant,
|
pub(crate) slot_start_ts: Instant,
|
||||||
|
pub(crate) was_interrupted: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone)]
|
#[derive(Default, Clone)]
|
||||||
@ -33,7 +34,20 @@ impl BroadcastStats for TransmitShredsStats {
|
|||||||
self.total_packets += new_stats.total_packets;
|
self.total_packets += new_stats.total_packets;
|
||||||
self.dropped_packets += new_stats.dropped_packets;
|
self.dropped_packets += new_stats.dropped_packets;
|
||||||
}
|
}
|
||||||
fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
|
fn report_stats(&mut self, slot: Slot, slot_start: Instant, was_interrupted: bool) {
|
||||||
|
if was_interrupted {
|
||||||
|
datapoint_info!(
|
||||||
|
"broadcast-transmit-shreds-interrupted-stats",
|
||||||
|
("slot", slot as i64, i64),
|
||||||
|
("transmit_elapsed", self.transmit_elapsed as i64, i64),
|
||||||
|
("send_mmsg_elapsed", self.send_mmsg_elapsed as i64, i64),
|
||||||
|
("get_peers_elapsed", self.get_peers_elapsed as i64, i64),
|
||||||
|
("num_shreds", self.num_shreds as i64, i64),
|
||||||
|
("shred_select", self.shred_select as i64, i64),
|
||||||
|
("total_packets", self.total_packets as i64, i64),
|
||||||
|
("dropped_packets", self.dropped_packets as i64, i64),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"broadcast-transmit-shreds-stats",
|
"broadcast-transmit-shreds-stats",
|
||||||
("slot", slot as i64, i64),
|
("slot", slot as i64, i64),
|
||||||
@ -53,6 +67,7 @@ impl BroadcastStats for TransmitShredsStats {
|
|||||||
("dropped_packets", self.dropped_packets as i64, i64),
|
("dropped_packets", self.dropped_packets as i64, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone)]
|
#[derive(Default, Clone)]
|
||||||
@ -65,7 +80,19 @@ impl BroadcastStats for InsertShredsStats {
|
|||||||
self.insert_shreds_elapsed += new_stats.insert_shreds_elapsed;
|
self.insert_shreds_elapsed += new_stats.insert_shreds_elapsed;
|
||||||
self.num_shreds += new_stats.num_shreds;
|
self.num_shreds += new_stats.num_shreds;
|
||||||
}
|
}
|
||||||
fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
|
fn report_stats(&mut self, slot: Slot, slot_start: Instant, was_interrupted: bool) {
|
||||||
|
if was_interrupted {
|
||||||
|
datapoint_info!(
|
||||||
|
"broadcast-insert-shreds-interrupted-stats",
|
||||||
|
("slot", slot as i64, i64),
|
||||||
|
(
|
||||||
|
"insert_shreds_elapsed",
|
||||||
|
self.insert_shreds_elapsed as i64,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
("num_shreds", self.num_shreds as i64, i64),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"broadcast-insert-shreds-stats",
|
"broadcast-insert-shreds-stats",
|
||||||
("slot", slot as i64, i64),
|
("slot", slot as i64, i64),
|
||||||
@ -84,6 +111,7 @@ impl BroadcastStats for InsertShredsStats {
|
|||||||
("num_shreds", self.num_shreds as i64, i64),
|
("num_shreds", self.num_shreds as i64, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tracks metrics of type `T` across multiple threads
|
// Tracks metrics of type `T` across multiple threads
|
||||||
@ -128,9 +156,11 @@ impl<T: BroadcastStats + Default> SlotBroadcastStats<T> {
|
|||||||
}
|
}
|
||||||
if let Some(num_expected_batches) = slot_batch_counter.num_expected_batches {
|
if let Some(num_expected_batches) = slot_batch_counter.num_expected_batches {
|
||||||
if slot_batch_counter.num_batches == num_expected_batches {
|
if slot_batch_counter.num_batches == num_expected_batches {
|
||||||
slot_batch_counter
|
slot_batch_counter.broadcast_shred_stats.report_stats(
|
||||||
.broadcast_shred_stats
|
batch_info.slot,
|
||||||
.report_stats(batch_info.slot, batch_info.slot_start_ts);
|
batch_info.slot_start_ts,
|
||||||
|
batch_info.was_interrupted,
|
||||||
|
);
|
||||||
should_delete = true;
|
should_delete = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -159,7 +189,7 @@ mod test {
|
|||||||
self.count += new_stats.count;
|
self.count += new_stats.count;
|
||||||
self.sender = new_stats.sender.clone();
|
self.sender = new_stats.sender.clone();
|
||||||
}
|
}
|
||||||
fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
|
fn report_stats(&mut self, slot: Slot, slot_start: Instant, _was_interrupted: bool) {
|
||||||
self.sender
|
self.sender
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@ -186,6 +216,7 @@ mod test {
|
|||||||
slot: 0,
|
slot: 0,
|
||||||
num_expected_batches: Some(2),
|
num_expected_batches: Some(2),
|
||||||
slot_start_ts: start,
|
slot_start_ts: start,
|
||||||
|
was_interrupted: false,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -242,6 +273,7 @@ mod test {
|
|||||||
slot: 0,
|
slot: 0,
|
||||||
num_expected_batches: None,
|
num_expected_batches: None,
|
||||||
slot_start_ts: start,
|
slot_start_ts: start,
|
||||||
|
was_interrupted: false,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -265,6 +297,7 @@ mod test {
|
|||||||
slot,
|
slot,
|
||||||
num_expected_batches: None,
|
num_expected_batches: None,
|
||||||
slot_start_ts: start,
|
slot_start_ts: start,
|
||||||
|
was_interrupted: false,
|
||||||
};
|
};
|
||||||
if i == round % num_threads {
|
if i == round % num_threads {
|
||||||
broadcast_batch_info.num_expected_batches = Some(num_threads);
|
broadcast_batch_info.num_expected_batches = Some(num_threads);
|
||||||
|
@ -92,7 +92,7 @@ impl StandardBroadcastRun {
|
|||||||
stats,
|
stats,
|
||||||
);
|
);
|
||||||
shreds.insert(0, shred);
|
shreds.insert(0, shred);
|
||||||
self.report_and_reset_stats();
|
self.report_and_reset_stats(true);
|
||||||
self.unfinished_slot = None;
|
self.unfinished_slot = None;
|
||||||
shreds
|
shreds
|
||||||
}
|
}
|
||||||
@ -240,6 +240,7 @@ impl StandardBroadcastRun {
|
|||||||
"Old broadcast start time for previous slot must exist if the previous slot
|
"Old broadcast start time for previous slot must exist if the previous slot
|
||||||
was interrupted",
|
was interrupted",
|
||||||
),
|
),
|
||||||
|
was_interrupted: true,
|
||||||
});
|
});
|
||||||
let shreds = Arc::new(prev_slot_shreds);
|
let shreds = Arc::new(prev_slot_shreds);
|
||||||
debug_assert!(shreds.iter().all(|shred| shred.slot() == slot));
|
debug_assert!(shreds.iter().all(|shred| shred.slot() == slot));
|
||||||
@ -262,6 +263,7 @@ impl StandardBroadcastRun {
|
|||||||
slot_start_ts: self
|
slot_start_ts: self
|
||||||
.slot_broadcast_start
|
.slot_broadcast_start
|
||||||
.expect("Start timestamp must exist for a slot if we're broadcasting the slot"),
|
.expect("Start timestamp must exist for a slot if we're broadcasting the slot"),
|
||||||
|
was_interrupted: false,
|
||||||
});
|
});
|
||||||
get_leader_schedule_time.stop();
|
get_leader_schedule_time.stop();
|
||||||
|
|
||||||
@ -297,7 +299,7 @@ impl StandardBroadcastRun {
|
|||||||
self.process_shreds_stats.update(&process_stats);
|
self.process_shreds_stats.update(&process_stats);
|
||||||
|
|
||||||
if last_tick_height == bank.max_tick_height() {
|
if last_tick_height == bank.max_tick_height() {
|
||||||
self.report_and_reset_stats();
|
self.report_and_reset_stats(false);
|
||||||
self.unfinished_slot = None;
|
self.unfinished_slot = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -380,9 +382,32 @@ impl StandardBroadcastRun {
|
|||||||
transmit_shreds_stats.update(new_transmit_shreds_stats, broadcast_shred_batch_info);
|
transmit_shreds_stats.update(new_transmit_shreds_stats, broadcast_shred_batch_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn report_and_reset_stats(&mut self) {
|
fn report_and_reset_stats(&mut self, was_interrupted: bool) {
|
||||||
let stats = &self.process_shreds_stats;
|
let stats = &self.process_shreds_stats;
|
||||||
let unfinished_slot = self.unfinished_slot.as_ref().unwrap();
|
let unfinished_slot = self.unfinished_slot.as_ref().unwrap();
|
||||||
|
if was_interrupted {
|
||||||
|
datapoint_info!(
|
||||||
|
"broadcast-process-shreds-interrupted-stats",
|
||||||
|
("slot", unfinished_slot.slot as i64, i64),
|
||||||
|
("shredding_time", stats.shredding_elapsed, i64),
|
||||||
|
("receive_time", stats.receive_elapsed, i64),
|
||||||
|
(
|
||||||
|
"num_data_shreds",
|
||||||
|
unfinished_slot.next_shred_index as i64,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"get_leader_schedule_time",
|
||||||
|
stats.get_leader_schedule_elapsed,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
("serialize_shreds_time", stats.serialize_elapsed, i64),
|
||||||
|
("gen_data_time", stats.gen_data_elapsed, i64),
|
||||||
|
("gen_coding_time", stats.gen_coding_elapsed, i64),
|
||||||
|
("sign_coding_time", stats.sign_coding_elapsed, i64),
|
||||||
|
("coding_send_time", stats.coding_send_elapsed, i64),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"broadcast-process-shreds-stats",
|
"broadcast-process-shreds-stats",
|
||||||
("slot", unfinished_slot.slot as i64, i64),
|
("slot", unfinished_slot.slot as i64, i64),
|
||||||
@ -409,6 +434,7 @@ impl StandardBroadcastRun {
|
|||||||
("sign_coding_time", stats.sign_coding_elapsed, i64),
|
("sign_coding_time", stats.sign_coding_elapsed, i64),
|
||||||
("coding_send_time", stats.coding_send_elapsed, i64),
|
("coding_send_time", stats.coding_send_elapsed, i64),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
self.process_shreds_stats.reset();
|
self.process_shreds_stats.reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user