Faster dedup v1.9 (#22638)

Faster dedup port of #22607
This commit is contained in:
anatoly yakovenko
2022-01-21 11:21:28 -08:00
committed by GitHub
parent 2ddb5b27c1
commit bf45f5b88e
6 changed files with 775 additions and 823 deletions

1
Cargo.lock generated
View File

@ -5325,6 +5325,7 @@ dependencies = [
name = "solana-perf" name = "solana-perf"
version = "1.9.5" version = "1.9.5"
dependencies = [ dependencies = [
"ahash 0.7.6",
"bincode", "bincode",
"bv", "bv",
"caps", "caps",

View File

@ -7,13 +7,11 @@
use { use {
crate::sigverify, crate::sigverify,
core::time::Duration,
crossbeam_channel::{SendError, Sender as CrossbeamSender}, crossbeam_channel::{SendError, Sender as CrossbeamSender},
itertools::Itertools, itertools::Itertools,
solana_bloom::bloom::{AtomicBloom, Bloom},
solana_measure::measure::Measure, solana_measure::measure::Measure,
solana_perf::packet::PacketBatch, solana_perf::packet::PacketBatch,
solana_perf::sigverify::dedup_packets, solana_perf::sigverify::Deduper,
solana_sdk::timing, solana_sdk::timing,
solana_streamer::streamer::{self, PacketBatchReceiver, StreamerError}, solana_streamer::streamer::{self, PacketBatchReceiver, StreamerError},
std::{ std::{
@ -215,7 +213,7 @@ impl SigVerifyStage {
} }
fn verifier<T: SigVerifier>( fn verifier<T: SigVerifier>(
bloom: &AtomicBloom<&[u8]>, deduper: &Deduper,
recvr: &PacketBatchReceiver, recvr: &PacketBatchReceiver,
sendr: &CrossbeamSender<Vec<PacketBatch>>, sendr: &CrossbeamSender<Vec<PacketBatch>>,
verifier: &T, verifier: &T,
@ -231,15 +229,15 @@ impl SigVerifyStage {
); );
let mut dedup_time = Measure::start("sigverify_dedup_time"); let mut dedup_time = Measure::start("sigverify_dedup_time");
let dedup_fail = dedup_packets(bloom, &mut batches) as usize; let dedup_fail = deduper.dedup_packets(&mut batches) as usize;
dedup_time.stop(); dedup_time.stop();
let valid_packets = num_packets.saturating_sub(dedup_fail); let num_unique = num_packets.saturating_sub(dedup_fail);
let mut discard_time = Measure::start("sigverify_discard_time"); let mut discard_time = Measure::start("sigverify_discard_time");
if valid_packets > MAX_SIGVERIFY_BATCH { if num_unique > MAX_SIGVERIFY_BATCH {
Self::discard_excess_packets(&mut batches, MAX_SIGVERIFY_BATCH) Self::discard_excess_packets(&mut batches, MAX_SIGVERIFY_BATCH)
}; };
let excess_fail = valid_packets.saturating_sub(MAX_SIGVERIFY_BATCH); let excess_fail = num_unique.saturating_sub(MAX_SIGVERIFY_BATCH);
discard_time.stop(); discard_time.stop();
let mut verify_batch_time = Measure::start("sigverify_batch_time"); let mut verify_batch_time = Measure::start("sigverify_batch_time");
@ -290,25 +288,16 @@ impl SigVerifyStage {
let verifier = verifier.clone(); let verifier = verifier.clone();
let mut stats = SigVerifierStats::default(); let mut stats = SigVerifierStats::default();
let mut last_print = Instant::now(); let mut last_print = Instant::now();
const MAX_BLOOM_AGE: Duration = Duration::from_millis(2_000); const MAX_DEDUPER_AGE_MS: u64 = 2_000;
const MAX_BLOOM_ITEMS: usize = 1_000_000; const MAX_DEDUPER_ITEMS: u32 = 1_000_000;
const MAX_BLOOM_FAIL: f64 = 0.0001;
const MAX_BLOOM_BITS: usize = 8 << 22;
Builder::new() Builder::new()
.name("solana-verifier".to_string()) .name("solana-verifier".to_string())
.spawn(move || { .spawn(move || {
let mut bloom = let mut deduper = Deduper::new(MAX_DEDUPER_ITEMS, MAX_DEDUPER_AGE_MS);
Bloom::random(MAX_BLOOM_ITEMS, MAX_BLOOM_FAIL, MAX_BLOOM_BITS).into();
let mut bloom_age = Instant::now();
loop { loop {
let now = Instant::now(); deduper.reset();
if now.duration_since(bloom_age) > MAX_BLOOM_AGE {
bloom =
Bloom::random(MAX_BLOOM_ITEMS, MAX_BLOOM_FAIL, MAX_BLOOM_BITS).into();
bloom_age = now;
}
if let Err(e) = Self::verifier( if let Err(e) = Self::verifier(
&bloom, &deduper,
&packet_receiver, &packet_receiver,
&verified_sender, &verified_sender,
&verifier, &verifier,

View File

@ -12,6 +12,7 @@ edition = "2021"
[dependencies] [dependencies]
bincode = "1.3.3" bincode = "1.3.3"
curve25519-dalek = { version = "3" } curve25519-dalek = { version = "3" }
ahash = "0.7.6"
dlopen = "0.1.8" dlopen = "0.1.8"
dlopen_derive = "0.1.4" dlopen_derive = "0.1.4"
lazy_static = "1.4.0" lazy_static = "1.4.0"

View File

@ -5,7 +5,6 @@ extern crate test;
use { use {
rand::prelude::*, rand::prelude::*,
solana_bloom::bloom::{AtomicBloom, Bloom},
solana_perf::{ solana_perf::{
packet::{to_packet_batches, PacketBatch}, packet::{to_packet_batches, PacketBatch},
sigverify, sigverify,
@ -13,6 +12,8 @@ use {
test::Bencher, test::Bencher,
}; };
const NUM: usize = 4096;
fn test_packet_with_size(size: usize, rng: &mut ThreadRng) -> Vec<u8> { fn test_packet_with_size(size: usize, rng: &mut ThreadRng) -> Vec<u8> {
// subtract 8 bytes because the length will get serialized as well // subtract 8 bytes because the length will get serialized as well
(0..size.checked_sub(8).unwrap()) (0..size.checked_sub(8).unwrap())
@ -22,20 +23,14 @@ fn test_packet_with_size(size: usize, rng: &mut ThreadRng) -> Vec<u8> {
fn do_bench_dedup_packets(bencher: &mut Bencher, mut batches: Vec<PacketBatch>) { fn do_bench_dedup_packets(bencher: &mut Bencher, mut batches: Vec<PacketBatch>) {
// verify packets // verify packets
let mut bloom: AtomicBloom<&[u8]> = Bloom::random(1_000_000, 0.0001, 8 << 22).into(); let mut deduper = sigverify::Deduper::new(1_000_000, 2_000);
bencher.iter(|| { bencher.iter(|| {
// bench let _ans = deduper.dedup_packets(&mut batches);
sigverify::dedup_packets(&bloom, &mut batches); deduper.reset();
batches
// reset
bloom.clear_for_tests();
batches.iter_mut().for_each(|batch| {
batch
.packets
.iter_mut() .iter_mut()
.for_each(|p| p.meta.set_discard(false)) .for_each(|b| b.packets.iter_mut().for_each(|p| p.meta.set_discard(false)));
}); });
})
} }
#[bench] #[bench]
@ -46,7 +41,7 @@ fn bench_dedup_same_small_packets(bencher: &mut Bencher) {
let batches = to_packet_batches( let batches = to_packet_batches(
&std::iter::repeat(small_packet) &std::iter::repeat(small_packet)
.take(4096) .take(NUM)
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
128, 128,
); );
@ -61,7 +56,7 @@ fn bench_dedup_same_big_packets(bencher: &mut Bencher) {
let big_packet = test_packet_with_size(1024, &mut rng); let big_packet = test_packet_with_size(1024, &mut rng);
let batches = to_packet_batches( let batches = to_packet_batches(
&std::iter::repeat(big_packet).take(4096).collect::<Vec<_>>(), &std::iter::repeat(big_packet).take(NUM).collect::<Vec<_>>(),
128, 128,
); );
@ -74,7 +69,7 @@ fn bench_dedup_diff_small_packets(bencher: &mut Bencher) {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let batches = to_packet_batches( let batches = to_packet_batches(
&(0..4096) &(0..NUM)
.map(|_| test_packet_with_size(128, &mut rng)) .map(|_| test_packet_with_size(128, &mut rng))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
128, 128,
@ -89,7 +84,7 @@ fn bench_dedup_diff_big_packets(bencher: &mut Bencher) {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let batches = to_packet_batches( let batches = to_packet_batches(
&(0..4096) &(0..NUM)
.map(|_| test_packet_with_size(1024, &mut rng)) .map(|_| test_packet_with_size(1024, &mut rng))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
128, 128,
@ -97,3 +92,27 @@ fn bench_dedup_diff_big_packets(bencher: &mut Bencher) {
do_bench_dedup_packets(bencher, batches); do_bench_dedup_packets(bencher, batches);
} }
#[bench]
#[ignore]
fn bench_dedup_baseline(bencher: &mut Bencher) {
let mut rng = rand::thread_rng();
let batches = to_packet_batches(
&(0..0)
.map(|_| test_packet_with_size(128, &mut rng))
.collect::<Vec<_>>(),
128,
);
do_bench_dedup_packets(bencher, batches);
}
#[bench]
#[ignore]
fn bench_dedup_reset(bencher: &mut Bencher) {
let mut deduper = sigverify::Deduper::new(1_000_000, 0);
bencher.iter(|| {
deduper.reset();
});
}

View File

@ -4,7 +4,6 @@
//! to the GPU. //! to the GPU.
//! //!
use solana_bloom::bloom::AtomicBloom;
#[cfg(test)] #[cfg(test)]
use solana_sdk::transaction::Transaction; use solana_sdk::transaction::Transaction;
use { use {
@ -14,6 +13,8 @@ use {
perf_libs, perf_libs,
recycler::Recycler, recycler::Recycler,
}, },
ahash::AHasher,
rand::{thread_rng, Rng},
rayon::ThreadPool, rayon::ThreadPool,
solana_metrics::inc_new_counter_debug, solana_metrics::inc_new_counter_debug,
solana_rayon_threadlimit::get_thread_count, solana_rayon_threadlimit::get_thread_count,
@ -24,7 +25,9 @@ use {
short_vec::decode_shortu16_len, short_vec::decode_shortu16_len,
signature::Signature, signature::Signature,
}, },
std::sync::atomic::{AtomicU64, Ordering}, std::hash::Hasher,
std::sync::atomic::{AtomicBool, AtomicU64, Ordering},
std::time::{Duration, Instant},
std::{convert::TryFrom, mem::size_of}, std::{convert::TryFrom, mem::size_of},
}; };
@ -420,34 +423,76 @@ pub fn generate_offsets(
) )
} }
fn dedup_packet(count: &AtomicU64, packet: &mut Packet, bloom: &AtomicBloom<&[u8]>) { pub struct Deduper {
filter: Vec<AtomicU64>,
seed: (u128, u128),
age: Instant,
max_age: Duration,
pub saturated: AtomicBool,
}
impl Deduper {
pub fn new(size: u32, max_age_ms: u64) -> Self {
let mut filter: Vec<AtomicU64> = Vec::with_capacity(size as usize);
filter.resize_with(size as usize, Default::default);
let seed = thread_rng().gen();
Self {
filter,
seed,
age: Instant::now(),
max_age: Duration::from_millis(max_age_ms),
saturated: AtomicBool::new(false),
}
}
pub fn reset(&mut self) {
let now = Instant::now();
//this should reset every 500k unique packets per 1m sized deduper
//false positive rate is 1/1000 at that point
let saturated = self.saturated.load(Ordering::Relaxed);
if saturated || now.duration_since(self.age) > self.max_age {
for i in &self.filter {
i.store(0, Ordering::Relaxed);
}
self.seed = thread_rng().gen();
self.age = now;
self.saturated.store(false, Ordering::Relaxed);
}
}
fn dedup_packet(&self, count: &AtomicU64, packet: &mut Packet) {
// If this packet was already marked as discard, drop it // If this packet was already marked as discard, drop it
if packet.meta.discard() { if packet.meta.discard() {
return; return;
} }
let mut hasher = AHasher::new_with_keys(self.seed.0, self.seed.1);
// If this packet was not newly added, it's a dup and should be discarded hasher.write(&packet.data[0..packet.meta.size]);
if !bloom.add(&&packet.data.as_slice()[0..packet.meta.size]) { let hash = hasher.finish();
let len = self.filter.len();
let pos = (usize::try_from(hash).unwrap()).wrapping_rem(len);
// saturate each position with or
let prev = self.filter[pos].fetch_or(hash, Ordering::Relaxed);
if prev == u64::MAX {
self.saturated.store(true, Ordering::Relaxed);
//reset this value
self.filter[pos].store(hash, Ordering::Relaxed);
}
if hash == prev & hash {
packet.meta.set_discard(true); packet.meta.set_discard(true);
count.fetch_add(1, Ordering::Relaxed); count.fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn dedup_packets(bloom: &AtomicBloom<&[u8]>, batches: &mut [PacketBatch]) -> u64 { pub fn dedup_packets(&self, batches: &mut [PacketBatch]) -> u64 {
use rayon::prelude::*;
let packet_count = count_packets_in_batches(batches);
// machine specific random offset to read the u64 from the packet signature
let count = AtomicU64::new(0); let count = AtomicU64::new(0);
PAR_THREAD_POOL.install(|| { batches.iter_mut().for_each(|batch| {
batches.into_par_iter().for_each(|batch| {
batch batch
.packets .packets
.par_iter_mut() .iter_mut()
.for_each(|p| dedup_packet(&count, p, bloom)) .for_each(|p| self.dedup_packet(&count, p))
})
}); });
inc_new_counter_debug!("dedup_packets_total", packet_count);
count.load(Ordering::Relaxed) count.load(Ordering::Relaxed)
}
} }
pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) { pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) {
@ -460,7 +505,7 @@ pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) {
.packets .packets
.par_iter_mut() .par_iter_mut()
.for_each(|p| verify_packet(p, reject_non_vote)) .for_each(|p| verify_packet(p, reject_non_vote))
}) });
}); });
inc_new_counter_debug!("ed25519_verify_cpu", packet_count); inc_new_counter_debug!("ed25519_verify_cpu", packet_count);
} }
@ -634,7 +679,6 @@ mod tests {
test_tx::{new_test_vote_tx, test_multisig_tx, test_tx}, test_tx::{new_test_vote_tx, test_multisig_tx, test_tx},
}, },
bincode::{deserialize, serialize}, bincode::{deserialize, serialize},
solana_bloom::bloom::{AtomicBloom, Bloom},
solana_sdk::{ solana_sdk::{
instruction::CompiledInstruction, instruction::CompiledInstruction,
message::{Message, MessageHeader}, message::{Message, MessageHeader},
@ -1299,26 +1343,57 @@ mod tests {
fn test_dedup_same() { fn test_dedup_same() {
let tx = test_tx(); let tx = test_tx();
// generate packet vector
let mut batches = let mut batches =
to_packet_batches(&std::iter::repeat(tx).take(1024).collect::<Vec<_>>(), 128); to_packet_batches(&std::iter::repeat(tx).take(1024).collect::<Vec<_>>(), 128);
let packet_count = sigverify::count_packets_in_batches(&batches); let packet_count = sigverify::count_packets_in_batches(&batches);
let bloom: AtomicBloom<&[u8]> = Bloom::random(1_000_000, 0.0001, 8 << 20).into(); let filter = Deduper::new(1_000_000, 0);
let discard = sigverify::dedup_packets(&bloom, &mut batches) as usize; let discard = filter.dedup_packets(&mut batches) as usize;
// because dedup uses a threadpool, there maybe up to N threads of txs that go through assert_eq!(packet_count, discard + 1);
let n = get_thread_count();
assert!(packet_count < discard + n * 2);
} }
#[test] #[test]
fn test_dedup_diff() { fn test_dedup_diff() {
// generate packet vector let mut filter = Deduper::new(1_000_000, 0);
let mut batches = to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128); let mut batches = to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128);
let bloom: AtomicBloom<&[u8]> = Bloom::random(1_000_000, 0.0001, 8 << 20).into(); let discard = filter.dedup_packets(&mut batches) as usize;
let discard = sigverify::dedup_packets(&bloom, &mut batches) as usize;
// because dedup uses a threadpool, there maybe up to N threads of txs that go through // because dedup uses a threadpool, there maybe up to N threads of txs that go through
let n = get_thread_count(); assert_eq!(discard, 0);
assert!(discard < n * 2); filter.reset();
for i in filter.filter {
assert_eq!(i.load(Ordering::Relaxed), 0);
}
}
#[test]
#[ignore]
fn test_dedup_saturated() {
let filter = Deduper::new(1_000_000, 0);
let mut discard = 0;
assert!(!filter.saturated.load(Ordering::Relaxed));
for i in 0..1000 {
let mut batches =
to_packet_batches(&(0..1000).map(|_| test_tx()).collect::<Vec<_>>(), 128);
discard += filter.dedup_packets(&mut batches) as usize;
println!("{} {}", i, discard);
if filter.saturated.load(Ordering::Relaxed) {
break;
}
}
assert!(filter.saturated.load(Ordering::Relaxed));
}
#[test]
fn test_dedup_false_positive() {
let filter = Deduper::new(1_000_000, 0);
let mut discard = 0;
for i in 0..10 {
let mut batches =
to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128);
discard += filter.dedup_packets(&mut batches) as usize;
println!("false positive rate: {}/{}", discard, i * 1024);
}
//allow for 1 false positive even if extremely unlikely
assert!(discard < 2);
} }
} }

1355
programs/bpf/Cargo.lock generated

File diff suppressed because it is too large Load Diff