Changed transition to restart Rpu rather than modify bank to prevent lock contention
This commit is contained in:
@ -76,7 +76,7 @@ pub struct Fullnode {
|
|||||||
pub node_role: Option<NodeRole>,
|
pub node_role: Option<NodeRole>,
|
||||||
keypair: Arc<Keypair>,
|
keypair: Arc<Keypair>,
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
rpu: Rpu,
|
rpu: Option<Rpu>,
|
||||||
rpc_service: JsonRpcService,
|
rpc_service: JsonRpcService,
|
||||||
ncp: Ncp,
|
ncp: Ncp,
|
||||||
bank: Arc<Bank>,
|
bank: Arc<Bank>,
|
||||||
@ -89,6 +89,8 @@ pub struct Fullnode {
|
|||||||
retransmit_socket: UdpSocket,
|
retransmit_socket: UdpSocket,
|
||||||
transaction_sockets: Vec<UdpSocket>,
|
transaction_sockets: Vec<UdpSocket>,
|
||||||
broadcast_socket: UdpSocket,
|
broadcast_socket: UdpSocket,
|
||||||
|
requests_socket: UdpSocket,
|
||||||
|
respond_socket: UdpSocket,
|
||||||
blob_recycler: BlobRecycler,
|
blob_recycler: BlobRecycler,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,13 +238,18 @@ impl Fullnode {
|
|||||||
let mut blob_recycler = BlobRecycler::default();
|
let mut blob_recycler = BlobRecycler::default();
|
||||||
blob_recycler.set_name("fullnode::Blob");
|
blob_recycler.set_name("fullnode::Blob");
|
||||||
|
|
||||||
let rpu = Rpu::new(
|
let rpu = Some(Rpu::new(
|
||||||
&bank,
|
&bank,
|
||||||
node.sockets.requests,
|
node.sockets
|
||||||
node.sockets.respond,
|
.requests
|
||||||
|
.try_clone()
|
||||||
|
.expect("Failed to clone requests socket"),
|
||||||
|
node.sockets
|
||||||
|
.respond
|
||||||
|
.try_clone()
|
||||||
|
.expect("Failed to clone respond socket"),
|
||||||
&blob_recycler,
|
&blob_recycler,
|
||||||
exit.clone(),
|
));
|
||||||
);
|
|
||||||
|
|
||||||
// TODO: this code assumes this node is the leader
|
// TODO: this code assumes this node is the leader
|
||||||
let mut drone_addr = node.info.contact_info.tpu;
|
let mut drone_addr = node.info.contact_info.tpu;
|
||||||
@ -364,10 +371,12 @@ impl Fullnode {
|
|||||||
retransmit_socket: node.sockets.retransmit,
|
retransmit_socket: node.sockets.retransmit,
|
||||||
transaction_sockets: node.sockets.transaction,
|
transaction_sockets: node.sockets.transaction,
|
||||||
broadcast_socket: node.sockets.broadcast,
|
broadcast_socket: node.sockets.broadcast,
|
||||||
|
requests_socket: node.sockets.requests,
|
||||||
|
respond_socket: node.sockets.respond,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn leader_to_validator(&mut self) {
|
fn leader_to_validator(&mut self) -> Result<()> {
|
||||||
// TODO: We can avoid building the bank again once RecordStage is
|
// TODO: We can avoid building the bank again once RecordStage is
|
||||||
// integrated with BankingStage
|
// integrated with BankingStage
|
||||||
let (bank, entry_height, _) = Self::new_bank_from_ledger(&self.ledger_path);
|
let (bank, entry_height, _) = Self::new_bank_from_ledger(&self.ledger_path);
|
||||||
@ -384,9 +393,23 @@ impl Fullnode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tell the RPU to serve requests out of the new bank we've created
|
// Make a new RPU to serve requests out of the new bank we've created
|
||||||
// instead of the old one
|
// instead of the old one
|
||||||
self.rpu.set_new_bank(self.bank.clone());
|
if !self.rpu.is_none() {
|
||||||
|
let old_rpu = self.rpu.take().unwrap();
|
||||||
|
old_rpu.close()?;
|
||||||
|
self.rpu = Some(Rpu::new(
|
||||||
|
&self.bank,
|
||||||
|
self.requests_socket
|
||||||
|
.try_clone()
|
||||||
|
.expect("Failed to clone requests socket"),
|
||||||
|
self.respond_socket
|
||||||
|
.try_clone()
|
||||||
|
.expect("Failed to clone respond socket"),
|
||||||
|
&self.blob_recycler,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let tvu = Tvu::new(
|
let tvu = Tvu::new(
|
||||||
self.keypair.clone(),
|
self.keypair.clone(),
|
||||||
&self.bank,
|
&self.bank,
|
||||||
@ -409,6 +432,7 @@ impl Fullnode {
|
|||||||
);
|
);
|
||||||
let validator_state = ValidatorServices::new(tvu);
|
let validator_state = ValidatorServices::new(tvu);
|
||||||
self.node_role = Some(NodeRole::Validator(validator_state));
|
self.node_role = Some(NodeRole::Validator(validator_state));
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn handle_role_transition(&mut self) -> Result<Option<FullnodeReturnType>> {
|
pub fn handle_role_transition(&mut self) -> Result<Option<FullnodeReturnType>> {
|
||||||
@ -416,7 +440,7 @@ impl Fullnode {
|
|||||||
match node_role {
|
match node_role {
|
||||||
Some(NodeRole::Leader(leader_services)) => match leader_services.join()? {
|
Some(NodeRole::Leader(leader_services)) => match leader_services.join()? {
|
||||||
Some(TpuReturnType::LeaderRotation) => {
|
Some(TpuReturnType::LeaderRotation) => {
|
||||||
self.leader_to_validator();
|
self.leader_to_validator()?;
|
||||||
Ok(Some(FullnodeReturnType::LeaderRotation))
|
Ok(Some(FullnodeReturnType::LeaderRotation))
|
||||||
}
|
}
|
||||||
_ => Ok(None),
|
_ => Ok(None),
|
||||||
@ -431,7 +455,9 @@ impl Fullnode {
|
|||||||
//used for notifying many nodes in parallel to exit
|
//used for notifying many nodes in parallel to exit
|
||||||
pub fn exit(&self) {
|
pub fn exit(&self) {
|
||||||
self.exit.store(true, Ordering::Relaxed);
|
self.exit.store(true, Ordering::Relaxed);
|
||||||
|
if let Some(ref rpu) = self.rpu {
|
||||||
|
rpu.exit();
|
||||||
|
}
|
||||||
match self.node_role {
|
match self.node_role {
|
||||||
Some(NodeRole::Leader(ref leader_services)) => leader_services.exit(),
|
Some(NodeRole::Leader(ref leader_services)) => leader_services.exit(),
|
||||||
Some(NodeRole::Validator(ref validator_services)) => validator_services.exit(),
|
Some(NodeRole::Validator(ref validator_services)) => validator_services.exit(),
|
||||||
@ -471,7 +497,9 @@ impl Service for Fullnode {
|
|||||||
type JoinReturnType = Option<FullnodeReturnType>;
|
type JoinReturnType = Option<FullnodeReturnType>;
|
||||||
|
|
||||||
fn join(self) -> Result<Option<FullnodeReturnType>> {
|
fn join(self) -> Result<Option<FullnodeReturnType>> {
|
||||||
self.rpu.join()?;
|
if let Some(rpu) = self.rpu {
|
||||||
|
rpu.join()?;
|
||||||
|
}
|
||||||
self.ncp.join()?;
|
self.ncp.join()?;
|
||||||
self.rpc_service.join()?;
|
self.rpc_service.join()?;
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ use result::{Error, Result};
|
|||||||
use service::Service;
|
use service::Service;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::sync::mpsc::{channel, Receiver, RecvTimeoutError};
|
use std::sync::mpsc::{channel, Receiver, RecvTimeoutError};
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::Arc;
|
||||||
use std::thread::{self, Builder, JoinHandle};
|
use std::thread::{self, Builder, JoinHandle};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use streamer::{self, BlobReceiver, BlobSender};
|
use streamer::{self, BlobReceiver, BlobSender};
|
||||||
@ -17,7 +17,7 @@ use timing;
|
|||||||
|
|
||||||
pub struct RequestStage {
|
pub struct RequestStage {
|
||||||
thread_hdl: JoinHandle<()>,
|
thread_hdl: JoinHandle<()>,
|
||||||
pub request_processor: Arc<RwLock<RequestProcessor>>,
|
pub request_processor: Arc<RequestProcessor>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RequestStage {
|
impl RequestStage {
|
||||||
@ -78,18 +78,19 @@ impl RequestStage {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
pub fn new(
|
pub fn new(
|
||||||
request_processor: Arc<RwLock<RequestProcessor>>,
|
request_processor: RequestProcessor,
|
||||||
packet_receiver: Receiver<SharedPackets>,
|
packet_receiver: Receiver<SharedPackets>,
|
||||||
packet_recycler: PacketRecycler,
|
packet_recycler: PacketRecycler,
|
||||||
blob_recycler: BlobRecycler,
|
blob_recycler: BlobRecycler,
|
||||||
) -> (Self, BlobReceiver) {
|
) -> (Self, BlobReceiver) {
|
||||||
|
let request_processor = Arc::new(request_processor);
|
||||||
let request_processor_ = request_processor.clone();
|
let request_processor_ = request_processor.clone();
|
||||||
let (blob_sender, blob_receiver) = channel();
|
let (blob_sender, blob_receiver) = channel();
|
||||||
let thread_hdl = Builder::new()
|
let thread_hdl = Builder::new()
|
||||||
.name("solana-request-stage".to_string())
|
.name("solana-request-stage".to_string())
|
||||||
.spawn(move || loop {
|
.spawn(move || loop {
|
||||||
if let Err(e) = Self::process_request_packets(
|
if let Err(e) = Self::process_request_packets(
|
||||||
&request_processor_.read().unwrap(),
|
&request_processor_,
|
||||||
&packet_receiver,
|
&packet_receiver,
|
||||||
&blob_sender,
|
&blob_sender,
|
||||||
&packet_recycler,
|
&packet_recycler,
|
||||||
|
27
src/rpu.rs
27
src/rpu.rs
@ -28,18 +28,17 @@ use packet::{BlobRecycler, PacketRecycler};
|
|||||||
use request_processor::RequestProcessor;
|
use request_processor::RequestProcessor;
|
||||||
use request_stage::RequestStage;
|
use request_stage::RequestStage;
|
||||||
use service::Service;
|
use service::Service;
|
||||||
use std::mem;
|
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::mpsc::channel;
|
use std::sync::mpsc::channel;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::Arc;
|
||||||
use std::thread::{self, JoinHandle};
|
use std::thread::{self, JoinHandle};
|
||||||
use streamer;
|
use streamer;
|
||||||
|
|
||||||
pub struct Rpu {
|
pub struct Rpu {
|
||||||
request_stage: RequestStage,
|
request_stage: RequestStage,
|
||||||
thread_hdls: Vec<JoinHandle<()>>,
|
thread_hdls: Vec<JoinHandle<()>>,
|
||||||
request_processor: Arc<RwLock<RequestProcessor>>,
|
exit: Arc<AtomicBool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Rpu {
|
impl Rpu {
|
||||||
@ -48,21 +47,21 @@ impl Rpu {
|
|||||||
requests_socket: UdpSocket,
|
requests_socket: UdpSocket,
|
||||||
respond_socket: UdpSocket,
|
respond_socket: UdpSocket,
|
||||||
blob_recycler: &BlobRecycler,
|
blob_recycler: &BlobRecycler,
|
||||||
exit: Arc<AtomicBool>,
|
|
||||||
) -> Self {
|
) -> Self {
|
||||||
|
let exit = Arc::new(AtomicBool::new(false));
|
||||||
let mut packet_recycler = PacketRecycler::default();
|
let mut packet_recycler = PacketRecycler::default();
|
||||||
packet_recycler.set_name("rpu::Packet");
|
packet_recycler.set_name("rpu::Packet");
|
||||||
let (packet_sender, packet_receiver) = channel();
|
let (packet_sender, packet_receiver) = channel();
|
||||||
let t_receiver = streamer::receiver(
|
let t_receiver = streamer::receiver(
|
||||||
Arc::new(requests_socket),
|
Arc::new(requests_socket),
|
||||||
exit,
|
exit.clone(),
|
||||||
packet_recycler.clone(),
|
packet_recycler.clone(),
|
||||||
packet_sender,
|
packet_sender,
|
||||||
);
|
);
|
||||||
|
|
||||||
let request_processor = Arc::new(RwLock::new(RequestProcessor::new(bank.clone())));
|
let request_processor = RequestProcessor::new(bank.clone());
|
||||||
let (request_stage, blob_receiver) = RequestStage::new(
|
let (request_stage, blob_receiver) = RequestStage::new(
|
||||||
request_processor.clone(),
|
request_processor,
|
||||||
packet_receiver,
|
packet_receiver,
|
||||||
packet_recycler.clone(),
|
packet_recycler.clone(),
|
||||||
blob_recycler.clone(),
|
blob_recycler.clone(),
|
||||||
@ -80,13 +79,17 @@ impl Rpu {
|
|||||||
Rpu {
|
Rpu {
|
||||||
thread_hdls,
|
thread_hdls,
|
||||||
request_stage,
|
request_stage,
|
||||||
request_processor,
|
exit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_new_bank(&self, new_bank: Arc<Bank>) {
|
pub fn exit(&self) {
|
||||||
let mut w_request_procesor = self.request_processor.write().unwrap();
|
self.exit.store(true, Ordering::Relaxed);
|
||||||
mem::replace(&mut *w_request_procesor, RequestProcessor::new(new_bank));
|
}
|
||||||
|
|
||||||
|
pub fn close(self) -> thread::Result<()> {
|
||||||
|
self.exit();
|
||||||
|
self.join()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -810,18 +810,29 @@ fn test_leader_to_validator_transition() {
|
|||||||
// make sure the leader stops.
|
// make sure the leader stops.
|
||||||
assert!(genesis_height < leader_rotation_interval);
|
assert!(genesis_height < leader_rotation_interval);
|
||||||
for i in genesis_height..(leader_rotation_interval + extra_transactions) {
|
for i in genesis_height..(leader_rotation_interval + extra_transactions) {
|
||||||
let expected_balance = std::cmp::min(
|
if i < leader_rotation_interval {
|
||||||
leader_rotation_interval - genesis_height,
|
// Poll to see that the bank state is updated after every transaction
|
||||||
i - genesis_height,
|
// to ensure that each transaction is packaged as a single entry,
|
||||||
);
|
// so that we can be sure leader rotation is triggered
|
||||||
|
let expected_balance = std::cmp::min(
|
||||||
|
leader_rotation_interval - genesis_height,
|
||||||
|
i - genesis_height + 1,
|
||||||
|
);
|
||||||
|
let result = send_tx_and_retry_get_balance(
|
||||||
|
&leader_info,
|
||||||
|
&mint,
|
||||||
|
&bob_pubkey,
|
||||||
|
1,
|
||||||
|
Some(expected_balance as i64),
|
||||||
|
);
|
||||||
|
|
||||||
send_tx_and_retry_get_balance(
|
assert_eq!(result, Some(expected_balance as i64))
|
||||||
&leader_info,
|
} else {
|
||||||
&mint,
|
// After leader_rotation_interval entries, we don't care about the
|
||||||
&bob_pubkey,
|
// number of entries generated by these transactions. These are just
|
||||||
1,
|
// here for testing to make sure the leader stopped at the correct point.
|
||||||
Some(expected_balance as i64),
|
send_tx_and_retry_get_balance(&leader_info, &mint, &bob_pubkey, 1, None);
|
||||||
);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for leader to shut down tpu and restart tvu
|
// Wait for leader to shut down tpu and restart tvu
|
||||||
|
Reference in New Issue
Block a user