From 40737e9efa4834300df17383f4479fa90b5d8260 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Tue, 14 Apr 2020 13:22:58 -0700 Subject: [PATCH] Add health check URI --- core/src/rpc_service.rs | 226 ++++++++++++++++++++++++++++++++--- core/src/validator.rs | 1 + docs/src/apps/jsonrpc-api.md | 10 ++ 3 files changed, 221 insertions(+), 16 deletions(-) diff --git a/core/src/rpc_service.rs b/core/src/rpc_service.rs index f3887f012f..030a90baa3 100644 --- a/core/src/rpc_service.rs +++ b/core/src/rpc_service.rs @@ -15,8 +15,9 @@ use solana_ledger::{ blockstore::Blockstore, snapshot_utils, }; -use solana_sdk::hash::Hash; +use solana_sdk::{hash::Hash, pubkey::Pubkey}; use std::{ + collections::HashSet, net::SocketAddr, path::{Path, PathBuf}, sync::{mpsc::channel, Arc, RwLock}, @@ -24,6 +25,10 @@ use std::{ }; use tokio::prelude::Future; +// If trusted validators are specified, consider this validator healthy if its latest account hash +// is no further behind than this distance from the latest trusted validator account hash +const HEALTH_CHECK_SLOT_DISTANCE: u64 = 150; + pub struct JsonRpcService { thread_hdl: JoinHandle<()>, @@ -37,10 +42,17 @@ struct RpcRequestMiddleware { ledger_path: PathBuf, snapshot_archive_path_regex: Regex, snapshot_config: Option, + cluster_info: Arc>, + trusted_validators: Option>, } impl RpcRequestMiddleware { - pub fn new(ledger_path: PathBuf, snapshot_config: Option) -> Self { + pub fn new( + ledger_path: PathBuf, + snapshot_config: Option, + cluster_info: Arc>, + trusted_validators: Option>, + ) -> Self { Self { ledger_path, snapshot_archive_path_regex: Regex::new( @@ -48,6 +60,8 @@ impl RpcRequestMiddleware { ) .unwrap(), snapshot_config, + cluster_info, + trusted_validators, } } @@ -116,6 +130,58 @@ impl RpcRequestMiddleware { ), } } + + fn health_check(&self) -> &'static str { + let response = if let Some(trusted_validators) = &self.trusted_validators { + let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = { + let cluster_info = self.cluster_info.read().unwrap(); + ( + cluster_info + .get_accounts_hash_for_node(&cluster_info.id()) + .map(|hashes| hashes.iter().max_by(|a, b| a.0.cmp(&b.0))) + .flatten() + .map(|slot_hash| slot_hash.0) + .unwrap_or(0), + trusted_validators + .iter() + .map(|trusted_validator| { + cluster_info + .get_accounts_hash_for_node(&trusted_validator) + .map(|hashes| hashes.iter().max_by(|a, b| a.0.cmp(&b.0))) + .flatten() + .map(|slot_hash| slot_hash.0) + .unwrap_or(0) + }) + .max() + .unwrap_or(0), + ) + }; + + // This validator is considered healthy if its latest account hash slot is within + // `HEALTH_CHECK_SLOT_DISTANCE` of the latest trusted validator's account hash slot + if latest_account_hash_slot > 0 + && latest_trusted_validator_account_hash_slot > 0 + && latest_account_hash_slot + > latest_trusted_validator_account_hash_slot + .saturating_sub(HEALTH_CHECK_SLOT_DISTANCE) + { + "ok" + } else { + warn!( + "health check: me={}, latest trusted_validator={}", + latest_account_hash_slot, latest_trusted_validator_account_hash_slot + ); + "behind" + } + } else { + // No trusted validator point of reference available, so this validator is healthy + // because it's running + "ok" + }; + + info!("health check: {}", response); + response + } } impl RequestMiddleware for RpcRequestMiddleware { @@ -150,6 +216,16 @@ impl RequestMiddleware for RpcRequestMiddleware { } if self.is_get_path(request.uri().path()) { self.get(request.uri().path()) + } else if request.uri().path() == "/health" { + RequestMiddlewareAction::Respond { + should_validate_hosts: true, + response: Box::new(jsonrpc_core::futures::future::ok( + hyper::Response::builder() + .status(hyper::StatusCode::OK) + .body(hyper::Body::from(self.health_check())) + .unwrap(), + )), + } } else { RequestMiddlewareAction::Proceed { should_continue_on_invalid_cors: false, @@ -173,6 +249,7 @@ impl JsonRpcService { ledger_path: &Path, storage_state: StorageState, validator_exit: Arc>>, + trusted_validators: Option>, ) -> Self { info!("rpc bound to {:?}", rpc_addr); info!("rpc configuration: {:?}", config); @@ -198,20 +275,35 @@ impl JsonRpcService { let rpc = RpcSolImpl; io.extend_with(rpc.to_delegate()); - let server = - ServerBuilder::with_meta_extractor(io, move |_req: &hyper::Request| Meta { + let request_middleware = RpcRequestMiddleware::new( + ledger_path, + snapshot_config, + cluster_info.clone(), + trusted_validators, + ); + let server = ServerBuilder::with_meta_extractor( + io, + move |_req: &hyper::Request| Meta { request_processor: request_processor.clone(), cluster_info: cluster_info.clone(), - genesis_hash - }).threads(4) - .cors(DomainsValidation::AllowOnly(vec![ - AccessControlAllowOrigin::Any, - ])) - .cors_max_age(86400) - .request_middleware(RpcRequestMiddleware::new(ledger_path, snapshot_config)) - .start_http(&rpc_addr); + genesis_hash, + }, + ) + .threads(4) + .cors(DomainsValidation::AllowOnly(vec![ + AccessControlAllowOrigin::Any, + ])) + .cors_max_age(86400) + .request_middleware(request_middleware) + .start_http(&rpc_addr); + if let Err(e) = server { - warn!("JSON RPC service unavailable error: {:?}. \nAlso, check that port {} is not already in use by another application", e, rpc_addr.port()); + warn!( + "JSON RPC service unavailable error: {:?}. \n\ + Also, check that port {} is not already in use by another application", + e, + rpc_addr.port() + ); return; } @@ -250,9 +342,13 @@ impl JsonRpcService { #[cfg(test)] mod tests { use super::*; - use crate::{contact_info::ContactInfo, rpc::tests::create_validator_exit}; - use solana_ledger::bank_forks::CompressionType; + use crate::{ + contact_info::ContactInfo, + crds_value::{CrdsData, CrdsValue, SnapshotHash}, + rpc::tests::create_validator_exit, + }; use solana_ledger::{ + bank_forks::CompressionType, genesis_utils::{create_genesis_config, GenesisConfigInfo}, get_tmp_ledger_path, }; @@ -295,6 +391,7 @@ mod tests { &PathBuf::from("farf"), StorageState::default(), validator_exit, + None, ); let thread = rpc_service.thread_hdl.thread(); assert_eq!(thread.name().unwrap(), "solana-jsonrpc"); @@ -315,7 +412,11 @@ mod tests { #[test] fn test_is_get_path() { - let rrm = RpcRequestMiddleware::new(PathBuf::from("/"), None); + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new_with_invalid_keypair( + ContactInfo::default(), + ))); + + let rrm = RpcRequestMiddleware::new(PathBuf::from("/"), None, cluster_info.clone(), None); let rrm_with_snapshot_config = RpcRequestMiddleware::new( PathBuf::from("/"), Some(SnapshotConfig { @@ -324,6 +425,8 @@ mod tests { snapshot_path: PathBuf::from("/"), compression: CompressionType::Bzip2, }), + cluster_info, + None, ); assert!(rrm.is_get_path("/genesis.tar.bz2")); @@ -345,4 +448,95 @@ mod tests { assert!(!rrm.is_get_path("..")); assert!(!rrm.is_get_path("🎣")); } + + #[test] + fn test_health_check_with_no_trusted_validators() { + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new_with_invalid_keypair( + ContactInfo::default(), + ))); + + let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, cluster_info.clone(), None); + assert_eq!(rm.health_check(), "ok"); + } + + #[test] + fn test_health_check_with_trusted_validators() { + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new_with_invalid_keypair( + ContactInfo::default(), + ))); + + let trusted_validators = vec![Pubkey::new_rand(), Pubkey::new_rand(), Pubkey::new_rand()]; + let rm = RpcRequestMiddleware::new( + PathBuf::from("/"), + None, + cluster_info.clone(), + Some(trusted_validators.clone().into_iter().collect()), + ); + + // No account hashes for this node or any trusted validators == "behind" + assert_eq!(rm.health_check(), "behind"); + + // No account hashes for any trusted validators == "behind" + { + let mut cluster_info = cluster_info.write().unwrap(); + cluster_info + .push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]); + } + assert_eq!(rm.health_check(), "behind"); + + // This node is ahead of the trusted validators == "ok" + { + let mut cluster_info = cluster_info.write().unwrap(); + cluster_info + .gossip + .crds + .insert( + CrdsValue::new_unsigned(CrdsData::AccountsHashes(SnapshotHash::new( + trusted_validators[0].clone(), + vec![ + (1, Hash::default()), + (1001, Hash::default()), + (2, Hash::default()), + ], + ))), + 1, + ) + .unwrap(); + } + assert_eq!(rm.health_check(), "ok"); + + // Node is slightly behind the trusted validators == "ok" + { + let mut cluster_info = cluster_info.write().unwrap(); + cluster_info + .gossip + .crds + .insert( + CrdsValue::new_unsigned(CrdsData::AccountsHashes(SnapshotHash::new( + trusted_validators[1].clone(), + vec![(1000 + HEALTH_CHECK_SLOT_DISTANCE - 1, Hash::default())], + ))), + 1, + ) + .unwrap(); + } + assert_eq!(rm.health_check(), "ok"); + + // Node is far behind the trusted validators == "behind" + { + let mut cluster_info = cluster_info.write().unwrap(); + cluster_info + .gossip + .crds + .insert( + CrdsValue::new_unsigned(CrdsData::AccountsHashes(SnapshotHash::new( + trusted_validators[2].clone(), + vec![(1000 + HEALTH_CHECK_SLOT_DISTANCE, Hash::default())], + ))), + 1, + ) + .unwrap(); + } + assert_eq!(rm.health_check(), "behind"); + } } diff --git a/core/src/validator.rs b/core/src/validator.rs index c12de5fc34..1a6e232b42 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -265,6 +265,7 @@ impl Validator { ledger_path, storage_state.clone(), validator_exit.clone(), + config.trusted_validators.clone(), ), PubSubService::new( &subscriptions, diff --git a/docs/src/apps/jsonrpc-api.md b/docs/src/apps/jsonrpc-api.md index 43a0ae8f84..e5b7b7a9b1 100644 --- a/docs/src/apps/jsonrpc-api.md +++ b/docs/src/apps/jsonrpc-api.md @@ -115,6 +115,16 @@ Many methods that take a commitment parameter return an RpcResponse JSON object * `value` : The value returned by the operation itself. +## Health Check +Although not a JSON RPC API, a `GET /heath` at the RPC HTTP Endpoint provides a +health-check mechanism for use by load balancers or other network +infrastructure. This request will always return a HTTP 200 OK response with a body of +"ok" or "behind" based on the following conditions: +1. If one or more `--trusted-validator` arguments are provided to `solana-validator`, "ok" is returned + when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator, + otherwise "behind" is returned. +2. "ok" is always returned if no trusted validators are provided. + ## JSON RPC API Reference ### getAccountInfo