2018-09-07 16:00:26 -06:00
|
|
|
//! The `window_service` provides a thread for maintaining a window (tail of the ledger).
|
|
|
|
//!
|
2019-02-07 20:52:39 -08:00
|
|
|
use crate::blocktree::Blocktree;
|
2018-12-07 20:16:27 -07:00
|
|
|
use crate::cluster_info::ClusterInfo;
|
|
|
|
use crate::db_window::*;
|
2019-02-07 15:10:54 -08:00
|
|
|
use crate::repair_service::RepairService;
|
2018-12-07 20:16:27 -07:00
|
|
|
use crate::result::{Error, Result};
|
2019-02-07 15:10:54 -08:00
|
|
|
use crate::service::Service;
|
2018-12-07 20:16:27 -07:00
|
|
|
use crate::streamer::{BlobReceiver, BlobSender};
|
2019-02-18 23:26:22 -07:00
|
|
|
use solana_metrics::counter::Counter;
|
2018-11-16 08:45:59 -08:00
|
|
|
use solana_metrics::{influxdb, submit};
|
2018-10-25 11:13:08 -07:00
|
|
|
use solana_sdk::pubkey::Pubkey;
|
2018-11-16 08:45:59 -08:00
|
|
|
use solana_sdk::timing::duration_as_ms;
|
2018-09-07 16:00:26 -06:00
|
|
|
use std::net::UdpSocket;
|
2019-02-13 20:04:20 -08:00
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2018-09-07 16:00:26 -06:00
|
|
|
use std::sync::mpsc::RecvTimeoutError;
|
|
|
|
use std::sync::{Arc, RwLock};
|
2019-02-07 15:10:54 -08:00
|
|
|
use std::thread::{self, Builder, JoinHandle};
|
2018-09-07 16:00:26 -06:00
|
|
|
use std::time::{Duration, Instant};
|
|
|
|
|
|
|
|
pub const MAX_REPAIR_BACKOFF: usize = 128;
|
|
|
|
|
2018-09-25 15:41:29 -07:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub enum WindowServiceReturnType {
|
|
|
|
LeaderRotation(u64),
|
|
|
|
}
|
|
|
|
|
2018-12-07 20:01:28 -07:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2018-09-07 16:00:26 -06:00
|
|
|
fn recv_window(
|
2019-02-07 20:52:39 -08:00
|
|
|
blocktree: &Arc<Blocktree>,
|
2018-09-07 16:00:26 -06:00
|
|
|
id: &Pubkey,
|
|
|
|
r: &BlobReceiver,
|
|
|
|
retransmit: &BlobSender,
|
|
|
|
) -> Result<()> {
|
|
|
|
let timer = Duration::from_millis(200);
|
|
|
|
let mut dq = r.recv_timeout(timer)?;
|
2018-11-24 19:32:33 -08:00
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
while let Ok(mut nq) = r.try_recv() {
|
|
|
|
dq.append(&mut nq)
|
|
|
|
}
|
|
|
|
let now = Instant::now();
|
|
|
|
inc_new_counter_info!("streamer-recv_window-recv", dq.len(), 100);
|
2018-10-16 12:54:23 -07:00
|
|
|
|
2018-11-16 08:45:59 -08:00
|
|
|
submit(
|
2018-10-16 12:54:23 -07:00
|
|
|
influxdb::Point::new("recv-window")
|
2018-10-20 06:38:20 -07:00
|
|
|
.add_field("count", influxdb::Value::Integer(dq.len() as i64))
|
2018-10-16 12:54:23 -07:00
|
|
|
.to_owned(),
|
|
|
|
);
|
|
|
|
|
2019-02-27 13:37:08 -08:00
|
|
|
retransmit_blobs(&dq, retransmit, id)?;
|
2018-09-07 16:00:26 -06:00
|
|
|
|
|
|
|
//send a contiguous set of blocks
|
2018-11-24 19:32:33 -08:00
|
|
|
trace!("{} num blobs received: {}", id, dq.len());
|
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
for b in dq {
|
|
|
|
let (pix, meta_size) = {
|
2018-09-26 16:50:12 +00:00
|
|
|
let p = b.read().unwrap();
|
2019-01-30 20:18:28 -08:00
|
|
|
(p.index(), p.meta.size)
|
2018-09-07 16:00:26 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
trace!("{} window pix: {} size: {}", id, pix, meta_size);
|
|
|
|
|
2019-02-27 13:37:08 -08:00
|
|
|
let _ = process_blob(blocktree, &b);
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
2018-11-24 19:32:33 -08:00
|
|
|
|
|
|
|
trace!(
|
|
|
|
"Elapsed processing time in recv_window(): {}",
|
|
|
|
duration_as_ms(&now.elapsed())
|
|
|
|
);
|
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-02-07 15:10:54 -08:00
|
|
|
// Implement a destructor for the window_service thread to signal it exited
|
|
|
|
// even on panics
|
|
|
|
struct Finalizer {
|
|
|
|
exit_sender: Arc<AtomicBool>,
|
|
|
|
}
|
2018-11-24 19:32:33 -08:00
|
|
|
|
2019-02-07 15:10:54 -08:00
|
|
|
impl Finalizer {
|
|
|
|
fn new(exit_sender: Arc<AtomicBool>) -> Self {
|
|
|
|
Finalizer { exit_sender }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Implement a destructor for Finalizer.
|
|
|
|
impl Drop for Finalizer {
|
|
|
|
fn drop(&mut self) {
|
|
|
|
self.exit_sender.clone().store(true, Ordering::Relaxed);
|
|
|
|
}
|
|
|
|
}
|
2018-11-24 19:32:33 -08:00
|
|
|
|
2019-02-07 15:10:54 -08:00
|
|
|
pub struct WindowService {
|
|
|
|
t_window: JoinHandle<()>,
|
|
|
|
repair_service: RepairService,
|
|
|
|
}
|
2018-09-07 16:00:26 -06:00
|
|
|
|
2019-02-07 15:10:54 -08:00
|
|
|
impl WindowService {
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
|
|
pub fn new(
|
2019-02-07 20:52:39 -08:00
|
|
|
blocktree: Arc<Blocktree>,
|
2019-02-07 15:10:54 -08:00
|
|
|
cluster_info: Arc<RwLock<ClusterInfo>>,
|
|
|
|
r: BlobReceiver,
|
|
|
|
retransmit: BlobSender,
|
|
|
|
repair_socket: Arc<UdpSocket>,
|
|
|
|
exit: Arc<AtomicBool>,
|
|
|
|
) -> WindowService {
|
|
|
|
let exit_ = exit.clone();
|
|
|
|
let repair_service = RepairService::new(
|
2019-02-07 20:52:39 -08:00
|
|
|
blocktree.clone(),
|
2019-02-07 15:10:54 -08:00
|
|
|
exit.clone(),
|
|
|
|
repair_socket,
|
|
|
|
cluster_info.clone(),
|
|
|
|
);
|
|
|
|
let t_window = Builder::new()
|
|
|
|
.name("solana-window".to_string())
|
|
|
|
.spawn(move || {
|
|
|
|
let _exit = Finalizer::new(exit_);
|
|
|
|
let id = cluster_info.read().unwrap().id();
|
|
|
|
trace!("{}: RECV_WINDOW started", id);
|
|
|
|
loop {
|
|
|
|
if exit.load(Ordering::Relaxed) {
|
|
|
|
break;
|
2018-11-24 19:32:33 -08:00
|
|
|
}
|
2019-02-27 13:37:08 -08:00
|
|
|
if let Err(e) = recv_window(&blocktree, &id, &r, &retransmit) {
|
2019-02-07 15:10:54 -08:00
|
|
|
match e {
|
|
|
|
Error::RecvTimeoutError(RecvTimeoutError::Disconnected) => break,
|
|
|
|
Error::RecvTimeoutError(RecvTimeoutError::Timeout) => (),
|
|
|
|
_ => {
|
|
|
|
inc_new_counter_info!("streamer-window-error", 1, 1);
|
|
|
|
error!("window error: {:?}", e);
|
|
|
|
}
|
2018-11-24 19:32:33 -08:00
|
|
|
}
|
|
|
|
}
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
2019-02-07 15:10:54 -08:00
|
|
|
})
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
WindowService {
|
|
|
|
t_window,
|
|
|
|
repair_service,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Service for WindowService {
|
|
|
|
type JoinReturnType = ();
|
|
|
|
|
|
|
|
fn join(self) -> thread::Result<()> {
|
|
|
|
self.t_window.join()?;
|
|
|
|
self.repair_service.join()
|
|
|
|
}
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
2019-02-07 20:52:39 -08:00
|
|
|
use crate::blocktree::get_tmp_ledger_path;
|
|
|
|
use crate::blocktree::Blocktree;
|
2018-12-07 20:16:27 -07:00
|
|
|
use crate::cluster_info::{ClusterInfo, Node};
|
2019-02-04 15:33:43 -08:00
|
|
|
use crate::entry::make_consecutive_blobs;
|
2019-02-07 15:10:54 -08:00
|
|
|
use crate::service::Service;
|
2018-12-07 20:16:27 -07:00
|
|
|
use crate::streamer::{blob_receiver, responder};
|
2019-02-07 15:10:54 -08:00
|
|
|
use crate::window_service::WindowService;
|
2018-11-16 08:04:46 -08:00
|
|
|
use solana_sdk::hash::Hash;
|
2018-11-24 19:32:33 -08:00
|
|
|
use std::fs::remove_dir_all;
|
2018-09-25 15:41:29 -07:00
|
|
|
use std::net::UdpSocket;
|
2018-09-07 16:00:26 -06:00
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2019-02-04 15:33:43 -08:00
|
|
|
use std::sync::mpsc::channel;
|
2018-09-07 16:00:26 -06:00
|
|
|
use std::sync::{Arc, RwLock};
|
|
|
|
use std::time::Duration;
|
2018-09-21 16:01:24 -07:00
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
#[test]
|
|
|
|
pub fn window_send_test() {
|
2018-12-14 12:36:50 -08:00
|
|
|
solana_logger::setup();
|
2019-02-01 14:30:26 -08:00
|
|
|
// setup a leader whose id is used to generates blobs and a validator
|
|
|
|
// node whose window service will retransmit leader blobs.
|
|
|
|
let leader_node = Node::new_localhost();
|
|
|
|
let validator_node = Node::new_localhost();
|
2018-09-07 16:00:26 -06:00
|
|
|
let exit = Arc::new(AtomicBool::new(false));
|
2019-02-01 14:30:26 -08:00
|
|
|
let mut cluster_info_me = ClusterInfo::new(validator_node.info.clone());
|
|
|
|
let me_id = leader_node.info.id;
|
2018-10-08 20:55:54 -06:00
|
|
|
cluster_info_me.set_leader(me_id);
|
|
|
|
let subs = Arc::new(RwLock::new(cluster_info_me));
|
2018-09-07 16:00:26 -06:00
|
|
|
|
|
|
|
let (s_reader, r_reader) = channel();
|
2019-02-01 14:30:26 -08:00
|
|
|
let t_receiver =
|
|
|
|
blob_receiver(Arc::new(leader_node.sockets.gossip), exit.clone(), s_reader);
|
2018-09-07 16:00:26 -06:00
|
|
|
let (s_retransmit, r_retransmit) = channel();
|
2019-02-26 17:11:26 -08:00
|
|
|
let blocktree_path = get_tmp_ledger_path!();
|
2019-02-07 20:52:39 -08:00
|
|
|
let blocktree = Arc::new(
|
|
|
|
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
2018-12-18 15:18:57 -08:00
|
|
|
);
|
2019-02-07 15:10:54 -08:00
|
|
|
let t_window = WindowService::new(
|
2019-02-07 20:52:39 -08:00
|
|
|
blocktree,
|
2018-09-07 16:00:26 -06:00
|
|
|
subs,
|
|
|
|
r_reader,
|
|
|
|
s_retransmit,
|
2019-02-01 14:30:26 -08:00
|
|
|
Arc::new(leader_node.sockets.repair),
|
2019-01-31 13:43:22 -08:00
|
|
|
exit.clone(),
|
2018-09-07 16:00:26 -06:00
|
|
|
);
|
|
|
|
let t_responder = {
|
|
|
|
let (s_responder, r_responder) = channel();
|
2018-09-14 16:56:06 -07:00
|
|
|
let blob_sockets: Vec<Arc<UdpSocket>> =
|
2019-02-01 14:30:26 -08:00
|
|
|
leader_node.sockets.tvu.into_iter().map(Arc::new).collect();
|
2018-09-14 16:56:06 -07:00
|
|
|
|
2018-09-18 08:02:57 -07:00
|
|
|
let t_responder = responder("window_send_test", blob_sockets[0].clone(), r_responder);
|
2018-12-08 22:44:20 -07:00
|
|
|
let num_blobs_to_make = 10;
|
2019-02-01 14:30:26 -08:00
|
|
|
let gossip_address = &leader_node.info.gossip;
|
2019-02-27 13:37:08 -08:00
|
|
|
let msgs = make_consecutive_blobs(
|
|
|
|
&me_id,
|
|
|
|
num_blobs_to_make,
|
|
|
|
0,
|
|
|
|
Hash::default(),
|
|
|
|
&gossip_address,
|
|
|
|
)
|
|
|
|
.into_iter()
|
|
|
|
.rev()
|
|
|
|
.collect();;
|
2018-09-07 16:00:26 -06:00
|
|
|
s_responder.send(msgs).expect("send");
|
|
|
|
t_responder
|
|
|
|
};
|
|
|
|
|
2019-02-04 15:33:43 -08:00
|
|
|
let max_attempts = 10;
|
|
|
|
let mut num_attempts = 0;
|
2019-02-13 10:19:10 -08:00
|
|
|
let mut q = Vec::new();
|
2019-02-04 15:33:43 -08:00
|
|
|
loop {
|
|
|
|
assert!(num_attempts != max_attempts);
|
2019-02-13 10:19:10 -08:00
|
|
|
while let Ok(mut nq) = r_retransmit.recv_timeout(Duration::from_millis(500)) {
|
2019-02-04 15:33:43 -08:00
|
|
|
q.append(&mut nq);
|
|
|
|
}
|
2019-02-13 10:19:10 -08:00
|
|
|
if q.len() == 10 {
|
2019-02-04 15:33:43 -08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
num_attempts += 1;
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
2019-02-04 15:33:43 -08:00
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
exit.store(true, Ordering::Relaxed);
|
|
|
|
t_receiver.join().expect("join");
|
|
|
|
t_responder.join().expect("join");
|
|
|
|
t_window.join().expect("join");
|
2019-02-07 20:52:39 -08:00
|
|
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
|
|
|
let _ignored = remove_dir_all(&blocktree_path);
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2018-11-24 19:32:33 -08:00
|
|
|
pub fn window_send_leader_test2() {
|
2018-12-14 12:36:50 -08:00
|
|
|
solana_logger::setup();
|
2019-02-01 14:30:26 -08:00
|
|
|
// setup a leader whose id is used to generates blobs and a validator
|
|
|
|
// node whose window service will retransmit leader blobs.
|
|
|
|
let leader_node = Node::new_localhost();
|
|
|
|
let validator_node = Node::new_localhost();
|
2018-09-07 16:00:26 -06:00
|
|
|
let exit = Arc::new(AtomicBool::new(false));
|
2019-02-01 14:30:26 -08:00
|
|
|
let cluster_info_me = ClusterInfo::new(validator_node.info.clone());
|
|
|
|
let me_id = leader_node.info.id;
|
2018-10-08 20:55:54 -06:00
|
|
|
let subs = Arc::new(RwLock::new(cluster_info_me));
|
2018-09-07 16:00:26 -06:00
|
|
|
|
|
|
|
let (s_reader, r_reader) = channel();
|
2019-02-01 14:30:26 -08:00
|
|
|
let t_receiver =
|
|
|
|
blob_receiver(Arc::new(leader_node.sockets.gossip), exit.clone(), s_reader);
|
2018-09-07 16:00:26 -06:00
|
|
|
let (s_retransmit, r_retransmit) = channel();
|
2019-02-26 17:11:26 -08:00
|
|
|
let blocktree_path = get_tmp_ledger_path!();
|
2019-02-07 20:52:39 -08:00
|
|
|
let blocktree = Arc::new(
|
|
|
|
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
2018-12-18 15:18:57 -08:00
|
|
|
);
|
2019-02-07 15:10:54 -08:00
|
|
|
let t_window = WindowService::new(
|
2019-02-07 20:52:39 -08:00
|
|
|
blocktree,
|
2018-09-07 16:00:26 -06:00
|
|
|
subs.clone(),
|
|
|
|
r_reader,
|
|
|
|
s_retransmit,
|
2019-02-01 14:30:26 -08:00
|
|
|
Arc::new(leader_node.sockets.repair),
|
2019-01-31 13:43:22 -08:00
|
|
|
exit.clone(),
|
2018-09-07 16:00:26 -06:00
|
|
|
);
|
|
|
|
let t_responder = {
|
|
|
|
let (s_responder, r_responder) = channel();
|
2018-09-14 16:56:06 -07:00
|
|
|
let blob_sockets: Vec<Arc<UdpSocket>> =
|
2019-02-01 14:30:26 -08:00
|
|
|
leader_node.sockets.tvu.into_iter().map(Arc::new).collect();
|
2018-09-18 08:02:57 -07:00
|
|
|
let t_responder = responder("window_send_test", blob_sockets[0].clone(), r_responder);
|
2018-09-07 16:00:26 -06:00
|
|
|
let mut msgs = Vec::new();
|
2019-02-27 13:37:08 -08:00
|
|
|
let blobs =
|
|
|
|
make_consecutive_blobs(&me_id, 14u64, 0, Hash::default(), &leader_node.info.gossip);
|
2019-01-30 20:18:28 -08:00
|
|
|
|
2018-09-07 16:00:26 -06:00
|
|
|
for v in 0..10 {
|
|
|
|
let i = 9 - v;
|
2019-01-30 20:18:28 -08:00
|
|
|
msgs.push(blobs[i].clone());
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
|
|
|
s_responder.send(msgs).expect("send");
|
|
|
|
|
|
|
|
subs.write().unwrap().set_leader(me_id);
|
|
|
|
let mut msgs1 = Vec::new();
|
|
|
|
for v in 1..5 {
|
|
|
|
let i = 9 + v;
|
2019-01-30 20:18:28 -08:00
|
|
|
msgs1.push(blobs[i].clone());
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
|
|
|
s_responder.send(msgs1).expect("send");
|
|
|
|
t_responder
|
|
|
|
};
|
2019-02-13 10:19:10 -08:00
|
|
|
let mut q = Vec::new();
|
|
|
|
while let Ok(mut nq) = r_retransmit.recv_timeout(Duration::from_millis(500)) {
|
2018-09-07 16:00:26 -06:00
|
|
|
q.append(&mut nq);
|
|
|
|
}
|
|
|
|
assert!(q.len() > 10);
|
|
|
|
exit.store(true, Ordering::Relaxed);
|
|
|
|
t_receiver.join().expect("join");
|
|
|
|
t_responder.join().expect("join");
|
|
|
|
t_window.join().expect("join");
|
2019-02-07 20:52:39 -08:00
|
|
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
|
|
|
let _ignored = remove_dir_all(&blocktree_path);
|
2018-09-07 16:00:26 -06:00
|
|
|
}
|
|
|
|
}
|