From 8b2ad776998ee7ec551de41cd2c4f2455290add4 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2019 11:25:34 -0800 Subject: [PATCH] Add validator-identity argument to support monitoring a specific validator only (#7501) automerge --- watchtower/README.md | 4 +++ watchtower/src/main.rs | 59 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/watchtower/README.md b/watchtower/README.md index 1779eb2528..fb19250769 100644 --- a/watchtower/README.md +++ b/watchtower/README.md @@ -3,6 +3,10 @@ periodically polls the cluster over an RPC API to confirm that the transaction count is advancing, new blockhashes are available, and no validators are delinquent. Results are reported as InfluxDB metrics. +If you only care about the health of one specific validator, the +`--validator-identity` command-line argument can be used to restrict failure +notifications to issues only affecting that validator. + ### Metrics #### `watchtower-sanity` On every iteration this data point will be emitted indicating the overall result diff --git a/watchtower/src/main.rs b/watchtower/src/main.rs index 81aca44669..9bd91c4216 100644 --- a/watchtower/src/main.rs +++ b/watchtower/src/main.rs @@ -2,7 +2,10 @@ use clap::{crate_description, crate_name, value_t_or_exit, App, Arg}; use log::*; -use solana_clap_utils::input_validators::is_url; +use solana_clap_utils::{ + input_parsers::pubkey_of, + input_validators::{is_pubkey_or_keypair, is_url}, +}; use solana_client::rpc_client::RpcClient; use solana_metrics::{datapoint_error, datapoint_info}; use std::{error, io, thread::sleep, time::Duration}; @@ -28,10 +31,19 @@ fn main() -> Result<(), Box> { .default_value("60") .help("Wait interval seconds between checking the cluster"), ) + .arg( + Arg::with_name("validator_identity") + .long("validator-identity") + .value_name("VALIDATOR IDENTITY PUBKEY") + .takes_value(true) + .validator(is_pubkey_or_keypair) + .help("Monitor a specific validator only instead of the entire cluster"), + ) .get_matches(); let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64)); let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String); + let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string()); solana_logger::setup_with_filter("solana=info"); solana_metrics::set_panic_hook("watchtower"); @@ -92,13 +104,44 @@ fn main() -> Result<(), Box> { "Delinquent validator count: {}", vote_accounts.delinquent.len() ); - if vote_accounts.delinquent.is_empty() { - Ok(true) - } else { - Err(io::Error::new( - io::ErrorKind::Other, - format!("{} delinquent validators", vote_accounts.delinquent.len()), - )) + + match validator_identity.as_ref() { + Some(validator_identity) => { + if vote_accounts + .current + .iter() + .any(|vai| vai.node_pubkey == *validator_identity) + { + Ok(true) + } else if vote_accounts + .delinquent + .iter() + .any(|vai| vai.node_pubkey == *validator_identity) + { + Err(io::Error::new( + io::ErrorKind::Other, + format!("Validator {} is delinquent", validator_identity), + )) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + format!("Validator {} is missing", validator_identity), + )) + } + } + None => { + if vote_accounts.delinquent.is_empty() { + Ok(true) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + format!( + "{} delinquent validators", + vote_accounts.delinquent.len() + ), + )) + } + } } }) .unwrap_or_else(|err| {