Handle already discarded packets in gpu sigverify path (#22680)

This commit is contained in:
sakridge
2022-01-24 05:35:47 -08:00
committed by GitHub
parent 7569f282c6
commit 2e56c59bcb
8 changed files with 193 additions and 65 deletions

View File

@ -82,16 +82,16 @@ impl PacketBatch {
}
pub fn to_packet_batches<T: Serialize>(xs: &[T], chunks: usize) -> Vec<PacketBatch> {
let mut out = vec![];
for x in xs.chunks(chunks) {
let mut batch = PacketBatch::with_capacity(x.len());
batch.packets.resize(x.len(), Packet::default());
for (i, packet) in x.iter().zip(batch.packets.iter_mut()) {
Packet::populate_packet(packet, None, i).expect("serialize request");
}
out.push(batch);
}
out
xs.chunks(chunks)
.map(|x| {
let mut batch = PacketBatch::with_capacity(x.len());
batch.packets.resize(x.len(), Packet::default());
for (i, packet) in x.iter().zip(batch.packets.iter_mut()) {
Packet::populate_packet(packet, None, i).expect("serialize request");
}
batch
})
.collect()
}
#[cfg(test)]

View File

@ -385,41 +385,45 @@ pub fn generate_offsets(
let mut msg_sizes: PinnedVec<_> = recycler.allocate("msg_size_offsets");
msg_sizes.set_pinnable();
let mut current_offset: usize = 0;
let mut v_sig_lens = Vec::new();
batches.iter_mut().for_each(|batch| {
let mut sig_lens = Vec::new();
batch.packets.iter_mut().for_each(|packet| {
let packet_offsets = get_packet_offsets(packet, current_offset, reject_non_vote);
let offsets = batches
.iter_mut()
.map(|batch| {
batch
.packets
.iter_mut()
.map(|packet| {
let packet_offsets =
get_packet_offsets(packet, current_offset, reject_non_vote);
sig_lens.push(packet_offsets.sig_len);
trace!("pubkey_offset: {}", packet_offsets.pubkey_start);
trace!("pubkey_offset: {}", packet_offsets.pubkey_start);
let mut pubkey_offset = packet_offsets.pubkey_start;
let mut sig_offset = packet_offsets.sig_start;
let msg_size = current_offset.saturating_add(packet.meta.size) as u32;
for _ in 0..packet_offsets.sig_len {
signature_offsets.push(sig_offset);
sig_offset = sig_offset.saturating_add(size_of::<Signature>() as u32);
let mut pubkey_offset = packet_offsets.pubkey_start;
let mut sig_offset = packet_offsets.sig_start;
let msg_size = current_offset.saturating_add(packet.meta.size) as u32;
for _ in 0..packet_offsets.sig_len {
signature_offsets.push(sig_offset);
sig_offset = sig_offset.saturating_add(size_of::<Signature>() as u32);
pubkey_offsets.push(pubkey_offset);
pubkey_offset = pubkey_offset.saturating_add(size_of::<Pubkey>() as u32);
pubkey_offsets.push(pubkey_offset);
pubkey_offset = pubkey_offset.saturating_add(size_of::<Pubkey>() as u32);
msg_start_offsets.push(packet_offsets.msg_start);
msg_start_offsets.push(packet_offsets.msg_start);
let msg_size = msg_size.saturating_sub(packet_offsets.msg_start);
msg_sizes.push(msg_size);
}
current_offset = current_offset.saturating_add(size_of::<Packet>());
});
v_sig_lens.push(sig_lens);
});
let msg_size = msg_size.saturating_sub(packet_offsets.msg_start);
msg_sizes.push(msg_size);
}
current_offset = current_offset.saturating_add(size_of::<Packet>());
packet_offsets.sig_len
})
.collect()
})
.collect();
(
signature_offsets,
pubkey_offsets,
msg_start_offsets,
msg_sizes,
v_sig_lens,
offsets,
)
}
@ -492,9 +496,8 @@ impl Deduper {
}
}
pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) {
pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool, packet_count: usize) {
use rayon::prelude::*;
let packet_count = count_packets_in_batches(batches);
debug!("CPU ECDSA for {}", packet_count);
PAR_THREAD_POOL.install(|| {
batches.into_par_iter().for_each(|batch| {
@ -574,7 +577,9 @@ pub fn get_checked_scalar(scalar: &[u8; 32]) -> Result<[u8; 32], PacketError> {
pub fn mark_disabled(batches: &mut [PacketBatch], r: &[Vec<u8>]) {
for (batch, v) in batches.iter_mut().zip(r) {
for (pkt, f) in batch.packets.iter_mut().zip(v) {
pkt.meta.set_discard(*f == 0);
if !pkt.meta.discard() {
pkt.meta.set_discard(*f == 0);
}
}
}
}
@ -584,29 +589,35 @@ pub fn ed25519_verify(
recycler: &Recycler<TxOffset>,
recycler_out: &Recycler<PinnedVec<u8>>,
reject_non_vote: bool,
valid_packet_count: usize,
) {
let api = perf_libs::api();
if api.is_none() {
return ed25519_verify_cpu(batches, reject_non_vote);
return ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
}
let api = api.unwrap();
use crate::packet::PACKET_DATA_SIZE;
let packet_count = count_packets_in_batches(batches);
let total_packet_count = count_packets_in_batches(batches);
// micro-benchmarks show GPU time for smallest batch around 15-20ms
// and CPU speed for 64-128 sigverifies around 10-20ms. 64 is a nice
// power-of-two number around that accounting for the fact that the CPU
// may be busy doing other things while being a real validator
// TODO: dynamically adjust this crossover
if packet_count < 64 {
return ed25519_verify_cpu(batches, reject_non_vote);
if valid_packet_count < 64
|| 100usize
.wrapping_mul(valid_packet_count)
.wrapping_div(total_packet_count)
< 90
{
return ed25519_verify_cpu(batches, reject_non_vote, valid_packet_count);
}
let (signature_offsets, pubkey_offsets, msg_start_offsets, msg_sizes, sig_lens) =
generate_offsets(batches, recycler, reject_non_vote);
debug!("CUDA ECDSA for {}", packet_count);
debug!("CUDA ECDSA for {}", valid_packet_count);
debug!("allocating out..");
let mut out = recycler_out.allocate("out_buffer");
out.set_pinnable();
@ -619,8 +630,7 @@ pub fn ed25519_verify(
elems: batch.packets.as_ptr(),
num: batch.packets.len() as u32,
});
let mut v = Vec::new();
v.resize(batch.packets.len(), 0);
let v = vec![0u8; batch.packets.len()];
rvs.push(v);
num_packets = num_packets.saturating_add(batch.packets.len());
}
@ -651,7 +661,7 @@ pub fn ed25519_verify(
trace!("done verify");
copy_return_values(&sig_lens, &out, &mut rvs);
mark_disabled(batches, &rvs);
inc_new_counter_debug!("ed25519_verify_gpu", packet_count);
inc_new_counter_debug!("ed25519_verify_gpu", valid_packet_count);
}
#[cfg(test)]
@ -704,6 +714,7 @@ mod tests {
let mut batches: Vec<PacketBatch> = vec![batch];
mark_disabled(&mut batches, &[vec![0]]);
assert!(batches[0].packets[0].meta.discard());
batches[0].packets[0].meta.set_discard(false);
mark_disabled(&mut batches, &[vec![1]]);
assert!(!batches[0].packets[0].meta.discard());
}
@ -1005,6 +1016,29 @@ mod tests {
);
}
fn generate_packet_batches_random_size(
packet: &Packet,
max_packets_per_batch: usize,
num_batches: usize,
) -> Vec<PacketBatch> {
// generate packet vector
let batches: Vec<_> = (0..num_batches)
.map(|_| {
let mut packet_batch = PacketBatch::default();
packet_batch.packets.resize(0, Packet::default());
let num_packets_per_batch = thread_rng().gen_range(1, max_packets_per_batch);
for _ in 0..num_packets_per_batch {
packet_batch.packets.push(packet.clone());
}
assert_eq!(packet_batch.packets.len(), num_packets_per_batch);
packet_batch
})
.collect();
assert_eq!(batches.len(), num_batches);
batches
}
fn generate_packet_batches(
packet: &Packet,
num_packets_per_batch: usize,
@ -1052,7 +1086,8 @@ mod tests {
fn ed25519_verify(batches: &mut [PacketBatch]) {
let recycler = Recycler::default();
let recycler_out = Recycler::default();
sigverify::ed25519_verify(batches, &recycler, &recycler_out, false);
let packet_count = sigverify::count_packets_in_batches(batches);
sigverify::ed25519_verify(batches, &recycler, &recycler_out, false, packet_count);
}
#[test]
@ -1133,9 +1168,8 @@ mod tests {
let recycler = Recycler::default();
let recycler_out = Recycler::default();
for _ in 0..50 {
let n = thread_rng().gen_range(1, 30);
let num_batches = thread_rng().gen_range(2, 30);
let mut batches = generate_packet_batches(&packet, n, num_batches);
let mut batches = generate_packet_batches_random_size(&packet, 128, num_batches);
let num_modifications = thread_rng().gen_range(0, 5);
for _ in 0..num_modifications {
@ -1147,11 +1181,17 @@ mod tests {
batches[batch].packets[packet].data[offset].wrapping_add(add);
}
let batch_to_disable = thread_rng().gen_range(0, batches.len());
for p in batches[batch_to_disable].packets.iter_mut() {
p.meta.set_discard(true);
}
// verify from GPU verification pipeline (when GPU verification is enabled) are
// equivalent to the CPU verification pipeline.
let mut batches_cpu = batches.clone();
sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false);
ed25519_verify_cpu(&mut batches_cpu, false);
let packet_count = sigverify::count_packets_in_batches(&batches);
sigverify::ed25519_verify(&mut batches, &recycler, &recycler_out, false, packet_count);
ed25519_verify_cpu(&mut batches_cpu, false, packet_count);
// check result
batches