diff --git a/client/src/http_sender.rs b/client/src/http_sender.rs index bc200afbda..9d54eb22cc 100644 --- a/client/src/http_sender.rs +++ b/client/src/http_sender.rs @@ -85,6 +85,14 @@ impl RpcSender for HttpSender { } } }, + rpc_custom_error::JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY => { + match serde_json::from_value::(json["error"]["data"].clone()) { + Ok(rpc_custom_error::RpcNodeUnhealthyErrorData { num_slots_behind}) => RpcResponseErrorData::NodeUnhealthy {num_slots_behind}, + Err(_err) => { + RpcResponseErrorData::Empty + } + } + }, _ => RpcResponseErrorData::Empty }; diff --git a/client/src/rpc_client.rs b/client/src/rpc_client.rs index 6987d69f4c..3fc3197c30 100644 --- a/client/src/rpc_client.rs +++ b/client/src/rpc_client.rs @@ -928,6 +928,11 @@ impl RpcClient { Ok(hash) } + pub fn get_health(&self) -> ClientResult<()> { + self.send::(RpcRequest::GetHealth, Value::Null) + .map(|_| ()) + } + pub fn get_token_account(&self, pubkey: &Pubkey) -> ClientResult> { Ok(self .get_token_account_with_commitment(pubkey, self.commitment_config)? diff --git a/client/src/rpc_custom_error.rs b/client/src/rpc_custom_error.rs index ed4607cacc..db47e496ae 100644 --- a/client/src/rpc_custom_error.rs +++ b/client/src/rpc_custom_error.rs @@ -25,13 +25,21 @@ pub enum RpcCustomError { BlockNotAvailable { slot: Slot, }, - RpcNodeUnhealthy, + RpcNodeUnhealthy { + num_slots_behind: Slot, + }, TransactionPrecompileVerificationFailure(solana_sdk::transaction::TransactionError), SlotSkipped { slot: Slot, }, } +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RpcNodeUnhealthyErrorData { + pub num_slots_behind: Slot, +} + impl From for Error { fn from(e: RpcCustomError) -> Self { match e { @@ -65,10 +73,12 @@ impl From for Error { message: format!("Block not available for slot {}", slot), data: None, }, - RpcCustomError::RpcNodeUnhealthy => Self { + RpcCustomError::RpcNodeUnhealthy { num_slots_behind } => Self { code: ErrorCode::ServerError(JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY), - message: "RPC node is unhealthy".to_string(), - data: None, + message: format!("RPC node is behind by {} slots", num_slots_behind), + data: Some(serde_json::json!(RpcNodeUnhealthyErrorData { + num_slots_behind + })), }, RpcCustomError::TransactionPrecompileVerificationFailure(e) => Self { code: ErrorCode::ServerError( diff --git a/client/src/rpc_request.rs b/client/src/rpc_request.rs index 731d80ca67..437649490b 100644 --- a/client/src/rpc_request.rs +++ b/client/src/rpc_request.rs @@ -1,6 +1,6 @@ use crate::rpc_response::RpcSimulateTransactionResult; use serde_json::{json, Value}; -use solana_sdk::pubkey::Pubkey; +use solana_sdk::{clock::Slot, pubkey::Pubkey}; use std::fmt; use thiserror::Error; @@ -25,6 +25,7 @@ pub enum RpcRequest { GetFees, GetFirstAvailableBlock, GetGenesisHash, + GetHealth, GetIdentity, GetInflationGovernor, GetInflationRate, @@ -80,6 +81,7 @@ impl fmt::Display for RpcRequest { RpcRequest::GetFees => "getFees", RpcRequest::GetFirstAvailableBlock => "getFirstAvailableBlock", RpcRequest::GetGenesisHash => "getGenesisHash", + RpcRequest::GetHealth => "getHealth", RpcRequest::GetIdentity => "getIdentity", RpcRequest::GetInflationGovernor => "getInflationGovernor", RpcRequest::GetInflationRate => "getInflationRate", @@ -143,6 +145,7 @@ impl RpcRequest { pub enum RpcResponseErrorData { Empty, SendTransactionPreflightFailure(RpcSimulateTransactionResult), + NodeUnhealthy { num_slots_behind: Slot }, } impl fmt::Display for RpcResponseErrorData { diff --git a/core/src/rpc.rs b/core/src/rpc.rs index 45b31f5d04..b4c9910c7f 100644 --- a/core/src/rpc.rs +++ b/core/src/rpc.rs @@ -1760,6 +1760,9 @@ pub trait RpcSol { #[rpc(meta, name = "getGenesisHash")] fn get_genesis_hash(&self, meta: Self::Metadata) -> Result; + #[rpc(meta, name = "getHealth")] + fn get_health(&self, meta: Self::Metadata) -> Result; + #[rpc(meta, name = "getLeaderSchedule")] fn get_leader_schedule( &self, @@ -2247,6 +2250,15 @@ impl RpcSol for RpcSolImpl { Ok(meta.genesis_hash.to_string()) } + fn get_health(&self, meta: Self::Metadata) -> Result { + match meta.health.check() { + RpcHealthStatus::Ok => Ok("ok".to_string()), + RpcHealthStatus::Behind { + num_slots: num_slots_behind, + } => Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into()), + } + } + fn get_leader_schedule( &self, meta: Self::Metadata, @@ -2486,9 +2498,15 @@ impl RpcSol for RpcSolImpl { return Err(e); } - if meta.health.check() != RpcHealthStatus::Ok { - return Err(RpcCustomError::RpcNodeUnhealthy.into()); + match meta.health.check() { + RpcHealthStatus::Ok => (), + RpcHealthStatus::Behind { + num_slots: num_slots_behind, + } => { + return Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into()); + } } + if let (Err(err), logs) = preflight_bank.simulate_transaction(transaction.clone()) { return Err(RpcCustomError::SendTransactionPreflightFailure { message: format!("Transaction simulation failed: {}", err), @@ -4518,7 +4536,7 @@ pub mod tests { ); // sendTransaction will fail due to poor node health - health.stub_set_health_status(Some(RpcHealthStatus::Behind)); + health.stub_set_health_status(Some(RpcHealthStatus::Behind { num_slots: 42 })); let req = format!( r#"{{"jsonrpc":"2.0","id":1,"method":"sendTransaction","params":["{}"]}}"#, bs58::encode(serialize(&bad_transaction).unwrap()).into_string() @@ -4527,7 +4545,7 @@ pub mod tests { assert_eq!( res, Some( - r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is unhealthy"},"id":1}"#.to_string(), + r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is behind by 42 slots","data":{"numSlotsBehind":42}},"id":1}"#.to_string(), ) ); health.stub_set_health_status(None); diff --git a/core/src/rpc_health.rs b/core/src/rpc_health.rs index d4995dbf3b..51e35fca16 100644 --- a/core/src/rpc_health.rs +++ b/core/src/rpc_health.rs @@ -1,15 +1,17 @@ -use crate::cluster_info::ClusterInfo; -use solana_sdk::pubkey::Pubkey; -use std::{ - collections::HashSet, - sync::atomic::{AtomicBool, Ordering}, - sync::Arc, +use { + crate::cluster_info::ClusterInfo, + solana_sdk::{clock::Slot, pubkey::Pubkey}, + std::{ + collections::HashSet, + sync::atomic::{AtomicBool, Ordering}, + sync::Arc, + }, }; #[derive(PartialEq, Clone, Copy)] pub enum RpcHealthStatus { Ok, - Behind, // Validator is behind its trusted validators + Behind { num_slots: Slot }, // Validator is behind its trusted validators } pub struct RpcHealth { @@ -88,11 +90,13 @@ impl RpcHealth { { RpcHealthStatus::Ok } else { + let num_slots = latest_trusted_validator_account_hash_slot + .saturating_sub(latest_account_hash_slot); warn!( - "health check: me={}, latest trusted_validator={}", - latest_account_hash_slot, latest_trusted_validator_account_hash_slot + "health check: behind by {} slots: me={}, latest trusted_validator={}", + num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot ); - RpcHealthStatus::Behind + RpcHealthStatus::Behind { num_slots } } } else { // No trusted validator point of reference available, so this validator is healthy diff --git a/core/src/rpc_service.rs b/core/src/rpc_service.rs index d75e9c2604..1308032af2 100644 --- a/core/src/rpc_service.rs +++ b/core/src/rpc_service.rs @@ -159,7 +159,7 @@ impl RpcRequestMiddleware { fn health_check(&self) -> &'static str { let response = match self.health.check() { RpcHealthStatus::Ok => "ok", - RpcHealthStatus::Behind => "behind", + RpcHealthStatus::Behind { num_slots: _ } => "behind", }; info!("health check: {}", response); response diff --git a/docs/src/developing/clients/jsonrpc-api.md b/docs/src/developing/clients/jsonrpc-api.md index e07f449464..4c126b9772 100644 --- a/docs/src/developing/clients/jsonrpc-api.md +++ b/docs/src/developing/clients/jsonrpc-api.md @@ -36,6 +36,7 @@ gives a convenient interface for the RPC methods. - [getFees](jsonrpc-api.md#getfees) - [getFirstAvailableBlock](jsonrpc-api.md#getfirstavailableblock) - [getGenesisHash](jsonrpc-api.md#getgenesishash) +- [getHealth](jsonrpc-api.md#gethealth) - [getIdentity](jsonrpc-api.md#getidentity) - [getInflationGovernor](jsonrpc-api.md#getinflationgovernor) - [getInflationRate](jsonrpc-api.md#getinflationrate) @@ -1276,6 +1277,54 @@ Result: {"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1} ``` +### getHealth + +Returns the current health of the node. + +If one or more `--trusted-validator` arguments are provided to +`solana-validator`, "ok" is returned when the node has within +`HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator, otherwise +an error is returned. "ok" is always returned if no trusted validators are +provided. + +#### Parameters: + +None + +#### Results: + +If the node is healthy: "ok" +If the node is unhealthy, a JSON RPC error response is returned indicating how far behind the node is. + +#### Example: + +Request: +```bash +curl http://localhost:8899 -X POST -H "Content-Type: application/json" -d ' + {"jsonrpc":"2.0","id":1, "method":"getHealth"} +' +``` + +Healthy Result: +```json +{"jsonrpc":"2.0","result": "ok","id":1} +``` + +Unhealthy Result: +```json +{ + "jsonrpc": "2.0", + "error": { + "code": -32005, + "message": "RPC node is behind by 42 slots", + "data": { + "numSlotsBehind": 42 + } + }, + "id": 1 +} +``` + ### getIdentity Returns the identity pubkey for the current node diff --git a/validator/src/bin/solana-test-validator.rs b/validator/src/bin/solana-test-validator.rs index d9e703a00e..2b02ed1035 100644 --- a/validator/src/bin/solana-test-validator.rs +++ b/validator/src/bin/solana-test-validator.rs @@ -4,7 +4,7 @@ use { fd_lock::FdLock, indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}, solana_clap_utils::{input_parsers::pubkey_of, input_validators::is_pubkey}, - solana_client::{client_error, rpc_client::RpcClient}, + solana_client::{client_error, rpc_client::RpcClient, rpc_request}, solana_core::rpc::JsonRpcConfig, solana_faucet::faucet::{run_local_faucet_with_port, FAUCET_PORT}, solana_sdk::{ @@ -339,7 +339,7 @@ fn main() { fn get_validator_stats( rpc_client: &RpcClient, identity: &Pubkey, - ) -> client_error::Result<(Slot, Slot, Slot, u64, Sol)> { + ) -> client_error::Result<(Slot, Slot, Slot, u64, Sol, String)> { let processed_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::recent())?; let confirmed_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::single_gossip())?; @@ -350,12 +350,32 @@ fn main() { .get_balance_with_commitment(identity, CommitmentConfig::single_gossip())? .value; + let health = match rpc_client.get_health() { + Ok(()) => "ok".to_string(), + Err(err) => { + if let client_error::ClientErrorKind::RpcError( + rpc_request::RpcError::RpcResponseError { + code: _, + message: _, + data: + rpc_request::RpcResponseErrorData::NodeUnhealthy { num_slots_behind }, + }, + ) = &err.kind + { + format!("{} slots behind", num_slots_behind) + } else { + "unhealthy".to_string() + } + } + }; + Ok(( processed_slot, confirmed_slot, finalized_slot, transaction_count, Sol(identity_balance), + health, )) } @@ -373,13 +393,21 @@ fn main() { finalized_slot, transaction_count, identity_balance, + health, )) => { let uptime = chrono::Duration::from_std(validator_start.elapsed()).unwrap(); + progress_bar.set_message(&format!( - "{:02}:{:02}:{:02} | \ + "{:02}:{:02}:{:02} \ + {}| \ Processed Slot: {} | Confirmed Slot: {} | Finalized Slot: {} | Snapshot Slot: {} | \ Transactions: {} | {}", uptime.num_hours(), uptime.num_minutes() % 60, uptime.num_seconds() % 60, + if health == "ok" { + "".to_string() + } else { + format!("| {} ", style(health).bold().red()) + }, processed_slot, confirmed_slot, finalized_slot, snapshot_slot, transaction_count, identity_balance ));