Optimize packet dedup (#22571) (#22585)

* Use bloom filter to dedup packets

* dedup first

* Update bloom/src/bloom.rs

Co-authored-by: Trent Nelson <trent.a.b.nelson@gmail.com>

* Update core/src/sigverify_stage.rs

Co-authored-by: Trent Nelson <trent.a.b.nelson@gmail.com>

* Update core/src/sigverify_stage.rs

Co-authored-by: Trent Nelson <trent.a.b.nelson@gmail.com>

* Update core/src/sigverify_stage.rs

Co-authored-by: Trent Nelson <trent.a.b.nelson@gmail.com>

* fixup

* fixup

* fixup

Co-authored-by: Trent Nelson <trent.a.b.nelson@gmail.com>
(cherry picked from commit d343713f61)

# Conflicts:
#	Cargo.lock
#	core/Cargo.toml
#	core/src/banking_stage.rs
#	core/src/sigverify_stage.rs
#	gossip/Cargo.toml
#	perf/Cargo.toml
#	programs/bpf/Cargo.lock
#	runtime/Cargo.toml

Co-authored-by: anatoly yakovenko <anatoly@solana.com>
This commit is contained in:
mergify[bot]
2022-01-20 02:51:49 +00:00
committed by GitHub
parent 37e9076db0
commit dbf9a32883
20 changed files with 303 additions and 117 deletions

View File

@ -35,6 +35,7 @@ retain_mut = "0.1.5"
serde = "1.0.130"
serde_derive = "1.0.103"
solana-accountsdb-plugin-manager = { path = "../accountsdb-plugin-manager", version = "=1.9.5" }
solana-bloom = { path = "../bloom", version = "=1.9.5" }
solana-client = { path = "../client", version = "=1.9.5" }
solana-entry = { path = "../entry", version = "=1.9.5" }
solana-gossip = { path = "../gossip", version = "=1.9.5" }

View File

@ -2,11 +2,10 @@
//! to contruct a software pipeline. The stage uses all available CPU cores and
//! can do its processing in parallel with signature verification on the GPU.
use {
crate::{packet_hasher::PacketHasher, qos_service::QosService},
crate::qos_service::QosService,
crossbeam_channel::{Receiver as CrossbeamReceiver, RecvTimeoutError},
histogram::Histogram,
itertools::Itertools,
lru::LruCache,
retain_mut::RetainMut,
solana_entry::entry::hash_transactions,
solana_gossip::{cluster_info::ClusterInfo, contact_info::ContactInfo},
@ -55,7 +54,6 @@ use {
env,
mem::size_of,
net::{SocketAddr, UdpSocket},
ops::DerefMut,
sync::{
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc, Mutex, RwLock,
@ -82,8 +80,6 @@ const TOTAL_BUFFERED_PACKETS: usize = 500_000;
const MAX_NUM_TRANSACTIONS_PER_BATCH: usize = 128;
const DEFAULT_LRU_SIZE: usize = 200_000;
const NUM_VOTE_PROCESSING_THREADS: u32 = 2;
const MIN_THREADS_BANKING: u32 = 1;
@ -354,10 +350,6 @@ impl BankingStage {
// Single thread to generate entries from many banks.
// This thread talks to poh_service and broadcasts the entries once they have been recorded.
// Once an entry has been recorded, its blockhash is registered with the bank.
let duplicates = Arc::new(Mutex::new((
LruCache::new(DEFAULT_LRU_SIZE),
PacketHasher::default(),
)));
let data_budget = Arc::new(DataBudget::default());
let qos_service = Arc::new(QosService::new(cost_model));
// Many banks that process transactions in parallel.
@ -382,7 +374,6 @@ impl BankingStage {
let mut recv_start = Instant::now();
let transaction_status_sender = transaction_status_sender.clone();
let gossip_vote_sender = gossip_vote_sender.clone();
let duplicates = duplicates.clone();
let data_budget = data_budget.clone();
let qos_service = qos_service.clone();
Builder::new()
@ -398,7 +389,6 @@ impl BankingStage {
batch_limit,
transaction_status_sender,
gossip_vote_sender,
&duplicates,
&data_budget,
qos_service,
);
@ -752,7 +742,6 @@ impl BankingStage {
batch_limit: usize,
transaction_status_sender: Option<TransactionStatusSender>,
gossip_vote_sender: ReplayVoteSender,
duplicates: &Arc<Mutex<(LruCache<u64, ()>, PacketHasher)>>,
data_budget: &DataBudget,
qos_service: Arc<QosService>,
) {
@ -808,7 +797,6 @@ impl BankingStage {
&gossip_vote_sender,
&mut buffered_packet_batches,
&mut banking_stage_stats,
duplicates,
&recorder,
&qos_service,
) {
@ -1360,7 +1348,6 @@ impl BankingStage {
gossip_vote_sender: &ReplayVoteSender,
buffered_packet_batches: &mut UnprocessedPacketBatches,
banking_stage_stats: &mut BankingStageStats,
duplicates: &Arc<Mutex<(LruCache<u64, ()>, PacketHasher)>>,
recorder: &TransactionRecorder,
qos_service: &Arc<QosService>,
) -> Result<(), RecvTimeoutError> {
@ -1398,7 +1385,6 @@ impl BankingStage {
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
duplicates,
banking_stage_stats,
);
continue;
@ -1434,7 +1420,6 @@ impl BankingStage {
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
duplicates,
banking_stage_stats,
);
@ -1462,7 +1447,6 @@ impl BankingStage {
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
duplicates,
banking_stage_stats,
);
}
@ -1519,40 +1503,13 @@ impl BankingStage {
fn push_unprocessed(
unprocessed_packet_batches: &mut UnprocessedPacketBatches,
packet_batch: PacketBatch,
mut packet_indexes: Vec<usize>,
packet_indexes: Vec<usize>,
dropped_packet_batches_count: &mut usize,
dropped_packets_count: &mut usize,
newly_buffered_packets_count: &mut usize,
batch_limit: usize,
duplicates: &Arc<Mutex<(LruCache<u64, ()>, PacketHasher)>>,
banking_stage_stats: &mut BankingStageStats,
) {
{
let original_packets_count = packet_indexes.len();
let mut packet_duplicate_check_time = Measure::start("packet_duplicate_check");
let mut duplicates = duplicates.lock().unwrap();
let (cache, hasher) = duplicates.deref_mut();
packet_indexes.retain(|i| {
let packet_hash = hasher.hash_packet(&packet_batch.packets[*i]);
match cache.get_mut(&packet_hash) {
Some(_hash) => false,
None => {
cache.put(packet_hash, ());
true
}
}
});
packet_duplicate_check_time.stop();
banking_stage_stats
.packet_duplicate_check_elapsed
.fetch_add(packet_duplicate_check_time.as_us(), Ordering::Relaxed);
banking_stage_stats
.dropped_duplicated_packets_count
.fetch_add(
original_packets_count.saturating_sub(packet_indexes.len()),
Ordering::Relaxed,
);
}
if Self::packet_has_more_unprocessed_transactions(&packet_indexes) {
if unprocessed_packet_batches.len() >= batch_limit {
*dropped_packet_batches_count += 1;
@ -3126,10 +3083,6 @@ mod tests {
let new_packet_batch = PacketBatch::new(vec![Packet::default()]);
let packet_indexes = vec![];
let duplicates = Arc::new(Mutex::new((
LruCache::new(DEFAULT_LRU_SIZE),
PacketHasher::default(),
)));
let mut dropped_packet_batches_count = 0;
let mut dropped_packets_count = 0;
let mut newly_buffered_packets_count = 0;
@ -3144,7 +3097,6 @@ mod tests {
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
&duplicates,
&mut banking_stage_stats,
);
assert_eq!(unprocessed_packets.len(), 1);
@ -3163,7 +3115,6 @@ mod tests {
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
&duplicates,
&mut banking_stage_stats,
);
assert_eq!(unprocessed_packets.len(), 2);
@ -3179,27 +3130,6 @@ mod tests {
)
.unwrap()]);
assert_eq!(unprocessed_packets.len(), batch_limit);
BankingStage::push_unprocessed(
&mut unprocessed_packets,
new_packet_batch.clone(),
packet_indexes.clone(),
&mut dropped_packet_batches_count,
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
batch_limit,
&duplicates,
&mut banking_stage_stats,
);
assert_eq!(unprocessed_packets.len(), 2);
assert_eq!(
unprocessed_packets[1].0.packets[0],
new_packet_batch.packets[0]
);
assert_eq!(dropped_packet_batches_count, 1);
assert_eq!(dropped_packets_count, 2);
assert_eq!(newly_buffered_packets_count, 2);
// Check duplicates are dropped (newly buffered shouldn't change)
BankingStage::push_unprocessed(
&mut unprocessed_packets,
new_packet_batch.clone(),
@ -3207,8 +3137,7 @@ mod tests {
&mut dropped_packet_batches_count,
&mut dropped_packets_count,
&mut newly_buffered_packets_count,
3,
&duplicates,
batch_limit,
&mut banking_stage_stats,
);
assert_eq!(unprocessed_packets.len(), 2);

View File

@ -7,10 +7,13 @@
use {
crate::sigverify,
core::time::Duration,
crossbeam_channel::{SendError, Sender as CrossbeamSender},
itertools::Itertools,
solana_bloom::bloom::{AtomicBloom, Bloom},
solana_measure::measure::Measure,
solana_perf::packet::PacketBatch,
solana_perf::sigverify::dedup_packets,
solana_sdk::timing,
solana_streamer::streamer::{self, PacketBatchReceiver, StreamerError},
std::{
@ -50,10 +53,13 @@ struct SigVerifierStats {
recv_batches_us_hist: histogram::Histogram, // time to call recv_batch
verify_batches_pp_us_hist: histogram::Histogram, // per-packet time to call verify_batch
discard_packets_pp_us_hist: histogram::Histogram, // per-packet time to call verify_batch
dedup_packets_pp_us_hist: histogram::Histogram, // per-packet time to call verify_batch
batches_hist: histogram::Histogram, // number of packet batches per verify call
packets_hist: histogram::Histogram, // number of packets per verify call
total_batches: usize,
total_packets: usize,
total_dedup: usize,
total_excess_fail: usize,
}
impl SigVerifierStats {
@ -122,6 +128,26 @@ impl SigVerifierStats {
self.discard_packets_pp_us_hist.mean().unwrap_or(0),
i64
),
(
"dedup_packets_pp_us_90pct",
self.dedup_packets_pp_us_hist.percentile(90.0).unwrap_or(0),
i64
),
(
"dedup_packets_pp_us_min",
self.dedup_packets_pp_us_hist.minimum().unwrap_or(0),
i64
),
(
"dedup_packets_pp_us_max",
self.dedup_packets_pp_us_hist.maximum().unwrap_or(0),
i64
),
(
"dedup_packets_pp_us_mean",
self.dedup_packets_pp_us_hist.mean().unwrap_or(0),
i64
),
(
"batches_90pct",
self.batches_hist.percentile(90.0).unwrap_or(0),
@ -140,6 +166,8 @@ impl SigVerifierStats {
("packets_mean", self.packets_hist.mean().unwrap_or(0), i64),
("total_batches", self.total_batches, i64),
("total_packets", self.total_packets, i64),
("total_dedup", self.total_dedup, i64),
("total_excess_fail", self.total_excess_fail, i64),
);
}
}
@ -187,6 +215,7 @@ impl SigVerifyStage {
}
fn verifier<T: SigVerifier>(
bloom: &AtomicBloom<&[u8]>,
recvr: &PacketBatchReceiver,
sendr: &CrossbeamSender<Vec<PacketBatch>>,
verifier: &T,
@ -200,13 +229,22 @@ impl SigVerifyStage {
timing::timestamp(),
num_packets,
);
let mut dedup_time = Measure::start("sigverify_dedup_time");
let dedup_fail = dedup_packets(bloom, &mut batches) as usize;
dedup_time.stop();
let valid_packets = num_packets.saturating_sub(dedup_fail);
let mut discard_time = Measure::start("sigverify_discard_time");
if num_packets > MAX_SIGVERIFY_BATCH {
Self::discard_excess_packets(&mut batches, MAX_SIGVERIFY_BATCH);
}
if valid_packets > MAX_SIGVERIFY_BATCH {
Self::discard_excess_packets(&mut batches, MAX_SIGVERIFY_BATCH)
};
let excess_fail = valid_packets.saturating_sub(MAX_SIGVERIFY_BATCH);
discard_time.stop();
let mut verify_batch_time = Measure::start("sigverify_batch_time");
sendr.send(verifier.verify_batches(batches))?;
let batches = verifier.verify_batches(batches);
sendr.send(batches)?;
verify_batch_time.stop();
debug!(
@ -230,10 +268,16 @@ impl SigVerifyStage {
.discard_packets_pp_us_hist
.increment(discard_time.as_us() / (num_packets as u64))
.unwrap();
stats
.dedup_packets_pp_us_hist
.increment(dedup_time.as_us() / (num_packets as u64))
.unwrap();
stats.batches_hist.increment(batches_len as u64).unwrap();
stats.packets_hist.increment(num_packets as u64).unwrap();
stats.total_batches += batches_len;
stats.total_packets += num_packets;
stats.total_dedup += dedup_fail;
stats.total_excess_fail += excess_fail;
Ok(())
}
@ -246,29 +290,48 @@ impl SigVerifyStage {
let verifier = verifier.clone();
let mut stats = SigVerifierStats::default();
let mut last_print = Instant::now();
const MAX_BLOOM_AGE: Duration = Duration::from_millis(2_000);
const MAX_BLOOM_ITEMS: usize = 1_000_000;
const MAX_BLOOM_FAIL: f64 = 0.0001;
const MAX_BLOOM_BITS: usize = 8 << 22;
Builder::new()
.name("solana-verifier".to_string())
.spawn(move || loop {
if let Err(e) =
Self::verifier(&packet_receiver, &verified_sender, &verifier, &mut stats)
{
match e {
SigVerifyServiceError::Streamer(StreamerError::RecvTimeout(
RecvTimeoutError::Disconnected,
)) => break,
SigVerifyServiceError::Streamer(StreamerError::RecvTimeout(
RecvTimeoutError::Timeout,
)) => (),
SigVerifyServiceError::Send(_) => {
break;
}
_ => error!("{:?}", e),
.spawn(move || {
let mut bloom =
Bloom::random(MAX_BLOOM_ITEMS, MAX_BLOOM_FAIL, MAX_BLOOM_BITS).into();
let mut bloom_age = Instant::now();
loop {
let now = Instant::now();
if now.duration_since(bloom_age) > MAX_BLOOM_AGE {
bloom =
Bloom::random(MAX_BLOOM_ITEMS, MAX_BLOOM_FAIL, MAX_BLOOM_BITS).into();
bloom_age = now;
}
if let Err(e) = Self::verifier(
&bloom,
&packet_receiver,
&verified_sender,
&verifier,
&mut stats,
) {
match e {
SigVerifyServiceError::Streamer(StreamerError::RecvTimeout(
RecvTimeoutError::Disconnected,
)) => break,
SigVerifyServiceError::Streamer(StreamerError::RecvTimeout(
RecvTimeoutError::Timeout,
)) => (),
SigVerifyServiceError::Send(_) => {
break;
}
_ => error!("{:?}", e),
}
}
if last_print.elapsed().as_secs() > 2 {
stats.report();
stats = SigVerifierStats::default();
last_print = Instant::now();
}
}
if last_print.elapsed().as_secs() > 2 {
stats.report();
stats = SigVerifierStats::default();
last_print = Instant::now();
}
})
.unwrap()