From 5fb8da9b35bb047940d96f2b12e9c99997d42143 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 28 May 2020 21:46:38 -0700 Subject: [PATCH] Feign RPC health while in a `--wait-for-supermajority` holding pattern (#10295) (#10301) (cherry picked from commit 0442c45d5b7d42066981e3db116c0b4e4a6ca426) Co-authored-by: Michael Vines --- core/src/rpc_service.rs | 20 ++++++++++++++++++-- core/src/validator.rs | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/core/src/rpc_service.rs b/core/src/rpc_service.rs index fe4f4f7246..706b4ef244 100644 --- a/core/src/rpc_service.rs +++ b/core/src/rpc_service.rs @@ -19,6 +19,7 @@ use std::{ collections::HashSet, net::SocketAddr, path::{Path, PathBuf}, + sync::atomic::{AtomicBool, Ordering}, sync::{mpsc::channel, Arc, RwLock}, thread::{self, Builder, JoinHandle}, }; @@ -44,6 +45,7 @@ struct RpcRequestMiddleware { cluster_info: Arc, trusted_validators: Option>, bank_forks: Arc>, + override_health_check: Arc, } impl RpcRequestMiddleware { @@ -53,6 +55,7 @@ impl RpcRequestMiddleware { cluster_info: Arc, trusted_validators: Option>, bank_forks: Arc>, + override_health_check: Arc, ) -> Self { Self { ledger_path, @@ -64,6 +67,7 @@ impl RpcRequestMiddleware { cluster_info, trusted_validators, bank_forks, + override_health_check, } } @@ -134,7 +138,9 @@ impl RpcRequestMiddleware { } fn health_check(&self) -> &'static str { - let response = if let Some(trusted_validators) = &self.trusted_validators { + let response = if self.override_health_check.load(Ordering::Relaxed) { + "ok" + } else if let Some(trusted_validators) = &self.trusted_validators { let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = { ( self.cluster_info @@ -290,6 +296,7 @@ impl JsonRpcService { ledger_path: &Path, validator_exit: Arc>>, trusted_validators: Option>, + override_health_check: Arc, ) -> Self { info!("rpc bound to {:?}", rpc_addr); info!("rpc configuration: {:?}", config); @@ -320,6 +327,7 @@ impl JsonRpcService { cluster_info.clone(), trusted_validators, bank_forks.clone(), + override_health_check, ); let server = ServerBuilder::with_meta_extractor( io, @@ -395,7 +403,6 @@ mod tests { use solana_runtime::bank::Bank; use solana_sdk::signature::Signer; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; - use std::sync::atomic::AtomicBool; #[test] fn test_rpc_new() { @@ -431,6 +438,7 @@ mod tests { &PathBuf::from("farf"), validator_exit, None, + Arc::new(AtomicBool::new(false)), ); let thread = rpc_service.thread_hdl.thread(); assert_eq!(thread.name().unwrap(), "solana-jsonrpc"); @@ -481,6 +489,7 @@ mod tests { cluster_info.clone(), None, bank_forks.clone(), + Arc::new(AtomicBool::new(false)), ); let rrm_with_snapshot_config = RpcRequestMiddleware::new( PathBuf::from("/"), @@ -493,6 +502,7 @@ mod tests { cluster_info, None, bank_forks, + Arc::new(AtomicBool::new(false)), ); assert!(rrm.is_file_get_path("/genesis.tar.bz2")); @@ -526,6 +536,7 @@ mod tests { cluster_info, None, create_bank_forks(), + Arc::new(AtomicBool::new(false)), ); assert_eq!(rm.health_check(), "ok"); } @@ -534,6 +545,7 @@ mod tests { fn test_health_check_with_trusted_validators() { let cluster_info = Arc::new(ClusterInfo::new_with_invalid_keypair(ContactInfo::default())); + let override_health_check = Arc::new(AtomicBool::new(false)); let trusted_validators = vec![Pubkey::new_rand(), Pubkey::new_rand(), Pubkey::new_rand()]; let rm = RpcRequestMiddleware::new( PathBuf::from("/"), @@ -541,6 +553,7 @@ mod tests { cluster_info.clone(), Some(trusted_validators.clone().into_iter().collect()), create_bank_forks(), + override_health_check.clone(), ); // No account hashes for this node or any trusted validators == "behind" @@ -549,6 +562,9 @@ mod tests { // No account hashes for any trusted validators == "behind" cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]); assert_eq!(rm.health_check(), "behind"); + override_health_check.store(true, Ordering::Relaxed); + assert_eq!(rm.health_check(), "ok"); + override_health_check.store(false, Ordering::Relaxed); // This node is ahead of the trusted validators == "ok" cluster_info diff --git a/core/src/validator.rs b/core/src/validator.rs index d22830d15b..5e4e1466fd 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -234,6 +234,7 @@ impl Validator { block_commitment_cache.clone(), )); + let rpc_override_health_check = Arc::new(AtomicBool::new(false)); let rpc_service = config.rpc_ports.map(|(rpc_port, rpc_pubsub_port)| { if ContactInfo::is_valid_address(&node.info.rpc) { assert!(ContactInfo::is_valid_address(&node.info.rpc_pubsub)); @@ -255,6 +256,7 @@ impl Validator { ledger_path, validator_exit.clone(), config.trusted_validators.clone(), + rpc_override_health_check.clone(), ), PubSubService::new( &subscriptions, @@ -374,7 +376,7 @@ impl Validator { (None, None) }; - wait_for_supermajority(config, &bank, &cluster_info); + wait_for_supermajority(config, &bank, &cluster_info, rpc_override_health_check); let poh_service = PohService::new(poh_recorder.clone(), &poh_config, &exit); assert_eq!( @@ -620,7 +622,12 @@ fn new_banks_from_blockstore( ) } -fn wait_for_supermajority(config: &ValidatorConfig, bank: &Bank, cluster_info: &ClusterInfo) { +fn wait_for_supermajority( + config: &ValidatorConfig, + bank: &Bank, + cluster_info: &ClusterInfo, + rpc_override_health_check: Arc, +) { if config.wait_for_supermajority != Some(bank.slot()) { return; } @@ -635,8 +642,13 @@ fn wait_for_supermajority(config: &ValidatorConfig, bank: &Bank, cluster_info: & if gossip_stake_percent >= 80 { break; } + // The normal RPC health checks don't apply as the node is waiting, so feign health to + // prevent load balancers from removing the node from their list of candidates during a + // manual restart. + rpc_override_health_check.store(true, Ordering::Relaxed); sleep(Duration::new(1, 0)); } + rpc_override_health_check.store(false, Ordering::Relaxed); } pub struct TestValidator {