master: Add nonce to shreds repairs, add shred data size to header (#10109)

* Add nonce to shreds/repairs

* Add data shred size to header

Co-authored-by: Carl <carl@solana.com>
This commit is contained in:
carllin
2020-05-19 12:38:18 -07:00
committed by GitHub
parent 427c78d891
commit 97f2bcff69
15 changed files with 598 additions and 256 deletions

View File

@ -4,8 +4,10 @@
use crate::{
cluster_info::ClusterInfo,
cluster_slots::ClusterSlots,
repair_response,
repair_service::{RepairInfo, RepairService},
result::{Error, Result},
serve_repair::DEFAULT_NONCE,
};
use crossbeam_channel::{
unbounded, Receiver as CrossbeamReceiver, RecvTimeoutError, Sender as CrossbeamSender,
@ -13,24 +15,25 @@ use crossbeam_channel::{
use rayon::iter::IntoParallelRefMutIterator;
use rayon::iter::ParallelIterator;
use rayon::ThreadPool;
use solana_ledger::bank_forks::BankForks;
use solana_ledger::blockstore::{
self, Blockstore, BlockstoreInsertionMetrics, MAX_DATA_SHREDS_PER_SLOT,
use solana_ledger::{
bank_forks::BankForks,
blockstore::{self, Blockstore, BlockstoreInsertionMetrics, MAX_DATA_SHREDS_PER_SLOT},
leader_schedule_cache::LeaderScheduleCache,
shred::{Nonce, Shred},
};
use solana_ledger::leader_schedule_cache::LeaderScheduleCache;
use solana_ledger::shred::Shred;
use solana_metrics::{inc_new_counter_debug, inc_new_counter_error};
use solana_perf::packet::Packets;
use solana_rayon_threadlimit::get_thread_count;
use solana_runtime::bank::Bank;
use solana_sdk::pubkey::Pubkey;
use solana_sdk::timing::duration_as_ms;
use solana_sdk::{packet::PACKET_DATA_SIZE, pubkey::Pubkey, timing::duration_as_ms};
use solana_streamer::streamer::PacketSender;
use std::net::UdpSocket;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::{self, Builder, JoinHandle};
use std::time::{Duration, Instant};
use std::{
net::{SocketAddr, UdpSocket},
sync::atomic::{AtomicBool, Ordering},
sync::{Arc, RwLock},
thread::{self, Builder, JoinHandle},
time::{Duration, Instant},
};
fn verify_shred_slot(shred: &Shred, root: u64) -> bool {
if shred.is_data() {
@ -107,8 +110,15 @@ fn run_check_duplicate(
Ok(())
}
fn verify_repair(_shred: &Shred, repair_info: &Option<RepairMeta>) -> bool {
repair_info
.as_ref()
.map(|repair_info| repair_info.nonce == DEFAULT_NONCE)
.unwrap_or(true)
}
fn run_insert<F>(
shred_receiver: &CrossbeamReceiver<Vec<Shred>>,
shred_receiver: &CrossbeamReceiver<(Vec<Shred>, Vec<Option<RepairMeta>>)>,
blockstore: &Arc<Blockstore>,
leader_schedule_cache: &Arc<LeaderScheduleCache>,
handle_duplicate: F,
@ -118,12 +128,16 @@ where
F: Fn(Shred) -> (),
{
let timer = Duration::from_millis(200);
let mut shreds = shred_receiver.recv_timeout(timer)?;
while let Ok(mut more_shreds) = shred_receiver.try_recv() {
shreds.append(&mut more_shreds)
let (mut shreds, mut repair_infos) = shred_receiver.recv_timeout(timer)?;
while let Ok((more_shreds, more_repair_infos)) = shred_receiver.try_recv() {
shreds.extend(more_shreds);
repair_infos.extend(more_repair_infos);
}
assert_eq!(shreds.len(), repair_infos.len());
let mut i = 0;
shreds.retain(|shred| (verify_repair(&shred, &repair_infos[i]), i += 1).0);
blockstore.insert_shreds_handle_duplicate(
shreds,
Some(leader_schedule_cache),
@ -136,7 +150,7 @@ where
fn recv_window<F>(
blockstore: &Arc<Blockstore>,
insert_shred_sender: &CrossbeamSender<Vec<Shred>>,
insert_shred_sender: &CrossbeamSender<(Vec<Shred>, Vec<Option<RepairMeta>>)>,
my_pubkey: &Pubkey,
verified_receiver: &CrossbeamReceiver<Vec<Packets>>,
retransmit: &PacketSender,
@ -160,7 +174,7 @@ where
inc_new_counter_debug!("streamer-recv_window-recv", total_packets);
let last_root = blockstore.last_root();
let shreds: Vec<_> = thread_pool.install(|| {
let (shreds, repair_infos): (Vec<_>, Vec<_>) = thread_pool.install(|| {
packets
.par_iter_mut()
.flat_map(|packets| {
@ -169,34 +183,58 @@ where
.iter_mut()
.filter_map(|packet| {
if packet.meta.discard {
inc_new_counter_debug!("streamer-recv_window-invalid_signature", 1);
inc_new_counter_debug!(
"streamer-recv_window-invalid_or_unnecessary_packet",
1
);
None
} else if let Ok(shred) =
Shred::new_from_serialized_shred(packet.data.to_vec())
{
if shred_filter(&shred, last_root) {
// Mark slot as dead if the current shred is on the boundary
// of max shreds per slot. However, let the current shred
// get retransmitted. It'll allow peer nodes to see this shred
// and trigger them to mark the slot as dead.
if shred.index() >= (MAX_DATA_SHREDS_PER_SLOT - 1) as u32 {
let _ = blockstore.set_dead_slot(shred.slot());
} else {
// shred fetch stage should be sending packets
// with sufficiently large buffers. Needed to ensure
// call to `new_from_serialized_shred` is safe.
assert_eq!(packet.data.len(), PACKET_DATA_SIZE);
let serialized_shred = packet.data.to_vec();
if let Ok(shred) = Shred::new_from_serialized_shred(serialized_shred) {
let repair_info = {
if packet.meta.repair {
if let Some(nonce) = repair_response::nonce(&packet.data) {
let repair_info = RepairMeta {
_from_addr: packet.meta.addr(),
nonce,
};
Some(repair_info)
} else {
// If can't parse the nonce, dump the packet
return None;
}
} else {
None
}
};
if shred_filter(&shred, last_root) {
// Mark slot as dead if the current shred is on the boundary
// of max shreds per slot. However, let the current shred
// get retransmitted. It'll allow peer nodes to see this shred
// and trigger them to mark the slot as dead.
if shred.index() >= (MAX_DATA_SHREDS_PER_SLOT - 1) as u32 {
let _ = blockstore.set_dead_slot(shred.slot());
}
packet.meta.slot = shred.slot();
packet.meta.seed = shred.seed();
Some((shred, repair_info))
} else {
packet.meta.discard = true;
None
}
packet.meta.slot = shred.slot();
packet.meta.seed = shred.seed();
Some(shred)
} else {
packet.meta.discard = true;
None
}
} else {
packet.meta.discard = true;
None
}
})
.collect::<Vec<_>>()
})
.collect()
.unzip()
});
trace!("{:?} shreds from packets", shreds.len());
@ -210,7 +248,7 @@ where
}
}
insert_shred_sender.send(shreds)?;
insert_shred_sender.send((shreds, repair_infos))?;
trace!(
"Elapsed processing time in recv_window(): {}",
@ -220,6 +258,11 @@ where
Ok(())
}
struct RepairMeta {
_from_addr: SocketAddr,
nonce: Nonce,
}
// Implement a destructor for the window_service thread to signal it exited
// even on panics
struct Finalizer {
@ -340,7 +383,7 @@ impl WindowService {
exit: &Arc<AtomicBool>,
blockstore: &Arc<Blockstore>,
leader_schedule_cache: &Arc<LeaderScheduleCache>,
insert_receiver: CrossbeamReceiver<Vec<Shred>>,
insert_receiver: CrossbeamReceiver<(Vec<Shred>, Vec<Option<RepairMeta>>)>,
duplicate_sender: CrossbeamSender<Shred>,
) -> JoinHandle<()> {
let exit = exit.clone();
@ -390,7 +433,7 @@ impl WindowService {
id: Pubkey,
exit: &Arc<AtomicBool>,
blockstore: &Arc<Blockstore>,
insert_sender: CrossbeamSender<Vec<Shred>>,
insert_sender: CrossbeamSender<(Vec<Shred>, Vec<Option<RepairMeta>>)>,
verified_receiver: CrossbeamReceiver<Vec<Packets>>,
shred_filter: F,
bank_forks: Option<Arc<RwLock<BankForks>>>,
@ -488,13 +531,12 @@ impl WindowService {
#[cfg(test)]
mod test {
use super::*;
use solana_ledger::shred::DataShredHeader;
use solana_ledger::{
blockstore::{make_many_slot_entries, Blockstore},
entry::{create_ticks, Entry},
genesis_utils::create_genesis_config_with_leader,
get_tmp_ledger_path,
shred::Shredder,
shred::{DataShredHeader, Shredder},
};
use solana_sdk::{
clock::Slot,