Add 'unknown' health check state

This commit is contained in:
Michael Vines
2021-03-04 21:18:08 -08:00
committed by mergify[bot]
parent ee621878b0
commit 66b781eec3
5 changed files with 74 additions and 58 deletions

View File

@ -1866,6 +1866,10 @@ pub mod rpc_minimal {
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
match meta.health.check() {
RpcHealthStatus::Ok => Ok("ok".to_string()),
RpcHealthStatus::Unknown => Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: None,
}
.into()),
RpcHealthStatus::Behind { num_slots } => Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: Some(num_slots),
}
@ -2700,6 +2704,12 @@ pub mod rpc_full {
match meta.health.check() {
RpcHealthStatus::Ok => (),
RpcHealthStatus::Unknown => {
return Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: None,
}
.into());
}
RpcHealthStatus::Behind { num_slots } => {
return Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: Some(num_slots),

View File

@ -8,10 +8,11 @@ use {
},
};
#[derive(PartialEq, Clone, Copy)]
#[derive(PartialEq, Clone, Copy, Debug)]
pub enum RpcHealthStatus {
Ok,
Behind { num_slots: Slot }, // Validator is behind its trusted validators
Unknown,
}
pub struct RpcHealth {
@ -51,52 +52,53 @@ impl RpcHealth {
if self.override_health_check.load(Ordering::Relaxed) {
RpcHealthStatus::Ok
} else if let Some(trusted_validators) = &self.trusted_validators {
let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = {
match (
self.cluster_info
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten(),
trusted_validators
.iter()
.filter_map(|trusted_validator| {
self.cluster_info
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
})
.max(),
) {
(
self.cluster_info
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
.unwrap_or(0),
trusted_validators
.iter()
.map(|trusted_validator| {
self.cluster_info
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
.unwrap_or(0)
})
.max()
.unwrap_or(0),
)
};
// This validator is considered healthy if its latest account hash slot is within
// `health_check_slot_distance` of the latest trusted validator's account hash slot
if latest_account_hash_slot > 0
&& latest_trusted_validator_account_hash_slot > 0
&& latest_account_hash_slot
> latest_trusted_validator_account_hash_slot
.saturating_sub(self.health_check_slot_distance)
{
RpcHealthStatus::Ok
} else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!(
"health check: behind by {} slots: me={}, latest trusted_validator={}",
num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
);
RpcHealthStatus::Behind { num_slots }
Some(latest_account_hash_slot),
Some(latest_trusted_validator_account_hash_slot),
) => {
// The validator is considered healthy if its latest account hash slot is within
// `health_check_slot_distance` of the latest trusted validator's account hash slot
if latest_account_hash_slot
> latest_trusted_validator_account_hash_slot
.saturating_sub(self.health_check_slot_distance)
{
RpcHealthStatus::Ok
} else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!(
"health check: behind by {} slots: me={}, latest trusted_validator={}",
num_slots,
latest_account_hash_slot,
latest_trusted_validator_account_hash_slot
);
RpcHealthStatus::Behind { num_slots }
}
}
_ => RpcHealthStatus::Unknown,
}
} else {
// No trusted validator point of reference available, so this validator is healthy

View File

@ -178,7 +178,8 @@ impl RpcRequestMiddleware {
fn health_check(&self) -> &'static str {
let response = match self.health.check() {
RpcHealthStatus::Ok => "ok",
RpcHealthStatus::Behind { num_slots: _ } => "behind",
RpcHealthStatus::Behind { .. } => "behind",
RpcHealthStatus::Unknown => "unknown",
};
info!("health check: {}", response);
response
@ -696,18 +697,20 @@ mod tests {
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
// No account hashes for this node or any trusted validators == "behind"
assert_eq!(rm.health_check(), "behind");
// No account hashes for this node or any trusted validators
assert_eq!(rm.health_check(), "unknown");
// No account hashes for any trusted validators == "behind"
// No account hashes for any trusted validators
cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]);
cluster_info.flush_push_queue();
assert_eq!(rm.health_check(), "behind");
assert_eq!(rm.health_check(), "unknown");
// Override health check
override_health_check.store(true, Ordering::Relaxed);
assert_eq!(rm.health_check(), "ok");
override_health_check.store(false, Ordering::Relaxed);
// This node is ahead of the trusted validators == "ok"
// This node is ahead of the trusted validators
cluster_info
.gossip
.write()
@ -727,7 +730,7 @@ mod tests {
.unwrap();
assert_eq!(rm.health_check(), "ok");
// Node is slightly behind the trusted validators == "ok"
// Node is slightly behind the trusted validators
cluster_info
.gossip
.write()
@ -743,7 +746,7 @@ mod tests {
.unwrap();
assert_eq!(rm.health_check(), "ok");
// Node is far behind the trusted validators == "behind"
// Node is far behind the trusted validators
cluster_info
.gossip
.write()