Add 'unknown' health check state
This commit is contained in:
committed by
mergify[bot]
parent
ee621878b0
commit
66b781eec3
@ -1866,6 +1866,10 @@ pub mod rpc_minimal {
|
|||||||
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
|
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
|
||||||
match meta.health.check() {
|
match meta.health.check() {
|
||||||
RpcHealthStatus::Ok => Ok("ok".to_string()),
|
RpcHealthStatus::Ok => Ok("ok".to_string()),
|
||||||
|
RpcHealthStatus::Unknown => Err(RpcCustomError::NodeUnhealthy {
|
||||||
|
num_slots_behind: None,
|
||||||
|
}
|
||||||
|
.into()),
|
||||||
RpcHealthStatus::Behind { num_slots } => Err(RpcCustomError::NodeUnhealthy {
|
RpcHealthStatus::Behind { num_slots } => Err(RpcCustomError::NodeUnhealthy {
|
||||||
num_slots_behind: Some(num_slots),
|
num_slots_behind: Some(num_slots),
|
||||||
}
|
}
|
||||||
@ -2700,6 +2704,12 @@ pub mod rpc_full {
|
|||||||
|
|
||||||
match meta.health.check() {
|
match meta.health.check() {
|
||||||
RpcHealthStatus::Ok => (),
|
RpcHealthStatus::Ok => (),
|
||||||
|
RpcHealthStatus::Unknown => {
|
||||||
|
return Err(RpcCustomError::NodeUnhealthy {
|
||||||
|
num_slots_behind: None,
|
||||||
|
}
|
||||||
|
.into());
|
||||||
|
}
|
||||||
RpcHealthStatus::Behind { num_slots } => {
|
RpcHealthStatus::Behind { num_slots } => {
|
||||||
return Err(RpcCustomError::NodeUnhealthy {
|
return Err(RpcCustomError::NodeUnhealthy {
|
||||||
num_slots_behind: Some(num_slots),
|
num_slots_behind: Some(num_slots),
|
||||||
|
@ -8,10 +8,11 @@ use {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Copy)]
|
#[derive(PartialEq, Clone, Copy, Debug)]
|
||||||
pub enum RpcHealthStatus {
|
pub enum RpcHealthStatus {
|
||||||
Ok,
|
Ok,
|
||||||
Behind { num_slots: Slot }, // Validator is behind its trusted validators
|
Behind { num_slots: Slot }, // Validator is behind its trusted validators
|
||||||
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct RpcHealth {
|
pub struct RpcHealth {
|
||||||
@ -51,8 +52,7 @@ impl RpcHealth {
|
|||||||
if self.override_health_check.load(Ordering::Relaxed) {
|
if self.override_health_check.load(Ordering::Relaxed) {
|
||||||
RpcHealthStatus::Ok
|
RpcHealthStatus::Ok
|
||||||
} else if let Some(trusted_validators) = &self.trusted_validators {
|
} else if let Some(trusted_validators) = &self.trusted_validators {
|
||||||
let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = {
|
match (
|
||||||
(
|
|
||||||
self.cluster_info
|
self.cluster_info
|
||||||
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
|
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
|
||||||
hashes
|
hashes
|
||||||
@ -60,11 +60,10 @@ impl RpcHealth {
|
|||||||
.max_by(|a, b| a.0.cmp(&b.0))
|
.max_by(|a, b| a.0.cmp(&b.0))
|
||||||
.map(|slot_hash| slot_hash.0)
|
.map(|slot_hash| slot_hash.0)
|
||||||
})
|
})
|
||||||
.flatten()
|
.flatten(),
|
||||||
.unwrap_or(0),
|
|
||||||
trusted_validators
|
trusted_validators
|
||||||
.iter()
|
.iter()
|
||||||
.map(|trusted_validator| {
|
.filter_map(|trusted_validator| {
|
||||||
self.cluster_info
|
self.cluster_info
|
||||||
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
|
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
|
||||||
hashes
|
hashes
|
||||||
@ -73,18 +72,16 @@ impl RpcHealth {
|
|||||||
.map(|slot_hash| slot_hash.0)
|
.map(|slot_hash| slot_hash.0)
|
||||||
})
|
})
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or(0)
|
|
||||||
})
|
})
|
||||||
.max()
|
.max(),
|
||||||
.unwrap_or(0),
|
) {
|
||||||
)
|
(
|
||||||
};
|
Some(latest_account_hash_slot),
|
||||||
|
Some(latest_trusted_validator_account_hash_slot),
|
||||||
// This validator is considered healthy if its latest account hash slot is within
|
) => {
|
||||||
|
// The validator is considered healthy if its latest account hash slot is within
|
||||||
// `health_check_slot_distance` of the latest trusted validator's account hash slot
|
// `health_check_slot_distance` of the latest trusted validator's account hash slot
|
||||||
if latest_account_hash_slot > 0
|
if latest_account_hash_slot
|
||||||
&& latest_trusted_validator_account_hash_slot > 0
|
|
||||||
&& latest_account_hash_slot
|
|
||||||
> latest_trusted_validator_account_hash_slot
|
> latest_trusted_validator_account_hash_slot
|
||||||
.saturating_sub(self.health_check_slot_distance)
|
.saturating_sub(self.health_check_slot_distance)
|
||||||
{
|
{
|
||||||
@ -94,10 +91,15 @@ impl RpcHealth {
|
|||||||
.saturating_sub(latest_account_hash_slot);
|
.saturating_sub(latest_account_hash_slot);
|
||||||
warn!(
|
warn!(
|
||||||
"health check: behind by {} slots: me={}, latest trusted_validator={}",
|
"health check: behind by {} slots: me={}, latest trusted_validator={}",
|
||||||
num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
|
num_slots,
|
||||||
|
latest_account_hash_slot,
|
||||||
|
latest_trusted_validator_account_hash_slot
|
||||||
);
|
);
|
||||||
RpcHealthStatus::Behind { num_slots }
|
RpcHealthStatus::Behind { num_slots }
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
_ => RpcHealthStatus::Unknown,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// No trusted validator point of reference available, so this validator is healthy
|
// No trusted validator point of reference available, so this validator is healthy
|
||||||
// because it's running
|
// because it's running
|
||||||
|
@ -178,7 +178,8 @@ impl RpcRequestMiddleware {
|
|||||||
fn health_check(&self) -> &'static str {
|
fn health_check(&self) -> &'static str {
|
||||||
let response = match self.health.check() {
|
let response = match self.health.check() {
|
||||||
RpcHealthStatus::Ok => "ok",
|
RpcHealthStatus::Ok => "ok",
|
||||||
RpcHealthStatus::Behind { num_slots: _ } => "behind",
|
RpcHealthStatus::Behind { .. } => "behind",
|
||||||
|
RpcHealthStatus::Unknown => "unknown",
|
||||||
};
|
};
|
||||||
info!("health check: {}", response);
|
info!("health check: {}", response);
|
||||||
response
|
response
|
||||||
@ -696,18 +697,20 @@ mod tests {
|
|||||||
|
|
||||||
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
|
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
|
||||||
|
|
||||||
// No account hashes for this node or any trusted validators == "behind"
|
// No account hashes for this node or any trusted validators
|
||||||
assert_eq!(rm.health_check(), "behind");
|
assert_eq!(rm.health_check(), "unknown");
|
||||||
|
|
||||||
// No account hashes for any trusted validators == "behind"
|
// No account hashes for any trusted validators
|
||||||
cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]);
|
cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]);
|
||||||
cluster_info.flush_push_queue();
|
cluster_info.flush_push_queue();
|
||||||
assert_eq!(rm.health_check(), "behind");
|
assert_eq!(rm.health_check(), "unknown");
|
||||||
|
|
||||||
|
// Override health check
|
||||||
override_health_check.store(true, Ordering::Relaxed);
|
override_health_check.store(true, Ordering::Relaxed);
|
||||||
assert_eq!(rm.health_check(), "ok");
|
assert_eq!(rm.health_check(), "ok");
|
||||||
override_health_check.store(false, Ordering::Relaxed);
|
override_health_check.store(false, Ordering::Relaxed);
|
||||||
|
|
||||||
// This node is ahead of the trusted validators == "ok"
|
// This node is ahead of the trusted validators
|
||||||
cluster_info
|
cluster_info
|
||||||
.gossip
|
.gossip
|
||||||
.write()
|
.write()
|
||||||
@ -727,7 +730,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(rm.health_check(), "ok");
|
assert_eq!(rm.health_check(), "ok");
|
||||||
|
|
||||||
// Node is slightly behind the trusted validators == "ok"
|
// Node is slightly behind the trusted validators
|
||||||
cluster_info
|
cluster_info
|
||||||
.gossip
|
.gossip
|
||||||
.write()
|
.write()
|
||||||
@ -743,7 +746,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(rm.health_check(), "ok");
|
assert_eq!(rm.health_check(), "ok");
|
||||||
|
|
||||||
// Node is far behind the trusted validators == "behind"
|
// Node is far behind the trusted validators
|
||||||
cluster_info
|
cluster_info
|
||||||
.gossip
|
.gossip
|
||||||
.write()
|
.write()
|
||||||
|
@ -187,11 +187,12 @@ Many methods that take a commitment parameter return an RpcResponse JSON object
|
|||||||
Although not a JSON RPC API, a `GET /health` at the RPC HTTP Endpoint provides a
|
Although not a JSON RPC API, a `GET /health` at the RPC HTTP Endpoint provides a
|
||||||
health-check mechanism for use by load balancers or other network
|
health-check mechanism for use by load balancers or other network
|
||||||
infrastructure. This request will always return a HTTP 200 OK response with a body of
|
infrastructure. This request will always return a HTTP 200 OK response with a body of
|
||||||
"ok" or "behind" based on the following conditions:
|
"ok", "behind" or "unknown" based on the following conditions:
|
||||||
|
|
||||||
1. If one or more `--trusted-validator` arguments are provided to `solana-validator`, "ok" is returned
|
1. If one or more `--trusted-validator` arguments are provided to `solana-validator`, "ok" is returned
|
||||||
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator,
|
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest
|
||||||
otherwise "behind" is returned.
|
trusted validator, otherwise "behind". "unknown" is returned when no slot
|
||||||
|
information from trusted validators is not yet available.
|
||||||
2. "ok" is always returned if no trusted validators are provided.
|
2. "ok" is always returned if no trusted validators are provided.
|
||||||
|
|
||||||
## JSON RPC API Reference
|
## JSON RPC API Reference
|
||||||
|
@ -266,7 +266,7 @@ fn get_validator_stats(
|
|||||||
{
|
{
|
||||||
format!("{} slots behind", num_slots_behind)
|
format!("{} slots behind", num_slots_behind)
|
||||||
} else {
|
} else {
|
||||||
"unhealthy".to_string()
|
"health unknown".to_string()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user