watchtower: Add --monitor-active-stake flag (bp #8765) (#8769)

automerge
This commit is contained in:
mergify[bot]
2020-03-10 13:27:19 -07:00
committed by GitHub
parent 38e661d7ba
commit 1f80346d97

View File

@ -9,13 +9,20 @@ use solana_clap_utils::{
input_parsers::pubkeys_of, input_parsers::pubkeys_of,
input_validators::{is_pubkey_or_keypair, is_url}, input_validators::{is_pubkey_or_keypair, is_url},
}; };
use solana_cli_config::{Config, CONFIG_FILE}; use solana_client::{rpc_client::RpcClient, rpc_response::RpcVoteAccountStatus};
use solana_client::rpc_client::RpcClient;
use solana_metrics::{datapoint_error, datapoint_info}; use solana_metrics::{datapoint_error, datapoint_info};
use solana_sdk::native_token::lamports_to_sol; use solana_sdk::{hash::Hash, native_token::lamports_to_sol};
use std::{error, io, thread::sleep, time::Duration}; use std::{error, io, thread::sleep, time::Duration};
fn main() -> Result<(), Box<dyn error::Error>> { struct Config {
interval: Duration,
json_rpc_url: String,
validator_identity_pubkeys: Vec<String>,
no_duplicate_notifications: bool,
monitor_active_stake: bool,
}
fn get_config() -> Config {
let matches = App::new(crate_name!()) let matches = App::new(crate_name!())
.about(crate_description!()) .about(crate_description!())
.version(solana_clap_utils::version!()) .version(solana_clap_utils::version!())
@ -27,7 +34,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
.takes_value(true) .takes_value(true)
.global(true) .global(true)
.help("Configuration file to use"); .help("Configuration file to use");
if let Some(ref config_file) = *CONFIG_FILE { if let Some(ref config_file) = *solana_cli_config::CONFIG_FILE {
arg.default_value(&config_file) arg.default_value(&config_file)
} else { } else {
arg arg
@ -50,7 +57,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
.help("Wait interval seconds between checking the cluster"), .help("Wait interval seconds between checking the cluster"),
) )
.arg( .arg(
Arg::with_name("validator_identitys") Arg::with_name("validator_identities")
.long("validator-identity") .long("validator-identity")
.value_name("VALIDATOR IDENTITY PUBKEY") .value_name("VALIDATOR IDENTITY PUBKEY")
.takes_value(true) .takes_value(true)
@ -64,90 +71,81 @@ fn main() -> Result<(), Box<dyn error::Error>> {
.takes_value(false) .takes_value(false)
.help("Subsequent identical notifications will be suppressed"), .help("Subsequent identical notifications will be suppressed"),
) )
.arg(
Arg::with_name("monitor_active_stake")
.long("monitor-active-stake")
.takes_value(false)
.help("Alert when the current stake for the cluster drops below 80%"),
)
.get_matches(); .get_matches();
let config = if let Some(config_file) = matches.value_of("config_file") { let config = if let Some(config_file) = matches.value_of("config_file") {
Config::load(config_file).unwrap_or_default() solana_cli_config::Config::load(config_file).unwrap_or_default()
} else { } else {
Config::default() solana_cli_config::Config::default()
}; };
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64)); let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
let json_rpc_url = let json_rpc_url =
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url); value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
let validator_identity_pubkeys: Vec<_> = pubkeys_of(&matches, "validator_identitys") let validator_identity_pubkeys: Vec<_> = pubkeys_of(&matches, "validator_identities")
.unwrap_or_else(|| vec![]) .unwrap_or_else(|| vec![])
.into_iter() .into_iter()
.map(|i| i.to_string()) .map(|i| i.to_string())
.collect(); .collect();
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications"); let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
let monitor_active_stake = matches.is_present("monitor_active_stake");
Config {
interval,
json_rpc_url,
validator_identity_pubkeys,
no_duplicate_notifications,
monitor_active_stake,
}
}
fn get_cluster_info(rpc_client: &RpcClient) -> io::Result<(u64, Hash, RpcVoteAccountStatus)> {
let transaction_count = rpc_client.get_transaction_count()?;
let recent_blockhash = rpc_client.get_recent_blockhash()?.0;
let vote_accounts = rpc_client.get_vote_accounts()?;
Ok((transaction_count, recent_blockhash, vote_accounts))
}
fn main() -> Result<(), Box<dyn error::Error>> {
let config = get_config();
solana_logger::setup_with_default("solana=info"); solana_logger::setup_with_default("solana=info");
solana_metrics::set_panic_hook("watchtower"); solana_metrics::set_panic_hook("watchtower");
info!("RPC URL: {}", json_rpc_url); info!("RPC URL: {}", config.json_rpc_url);
if !validator_identity_pubkeys.is_empty() { if !config.validator_identity_pubkeys.is_empty() {
info!("Monitored validators: {:?}", validator_identity_pubkeys); info!(
"Monitored validators: {:?}",
config.validator_identity_pubkeys
);
} }
let rpc_client = RpcClient::new(json_rpc_url); let rpc_client = RpcClient::new(config.json_rpc_url);
let notifier = Notifier::new(); let notifier = Notifier::new();
let mut last_transaction_count = 0; let mut last_transaction_count = 0;
let mut last_check_notification_sent = false; let mut last_recent_blockhash = Hash::default();
let mut last_notification_msg = String::from(""); let mut last_notification_msg = "".into();
loop {
let mut notify_msg = String::from("solana-watchtower: undefined error");
let ok = rpc_client
.get_transaction_count()
.and_then(|transaction_count| {
info!("Current transaction count: {}", transaction_count);
if transaction_count > last_transaction_count { loop {
last_transaction_count = transaction_count; let failure = match get_cluster_info(&rpc_client) {
Ok(true) Ok((transaction_count, recent_blockhash, vote_accounts)) => {
} else { info!("Current transaction count: {}", transaction_count);
Err(io::Error::new( info!("Recent blockhash: {}", recent_blockhash);
io::ErrorKind::Other, info!("Current validator count: {}", vote_accounts.current.len());
format!( info!(
"Transaction count is not advancing: {} <= {}", "Delinquent validator count: {}",
transaction_count, last_transaction_count vote_accounts.delinquent.len()
),
))
}
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
datapoint_error!(
"watchtower-sanity-failure",
("test", "transaction-count", String),
("err", err.to_string(), String)
); );
false
}) let mut failures = vec![];
&& rpc_client
.get_recent_blockhash()
.and_then(|(blockhash, _fee_calculator)| {
info!("Current blockhash: {}", blockhash);
rpc_client.get_new_blockhash(&blockhash)
})
.and_then(|(blockhash, _fee_calculator)| {
info!("New blockhash: {}", blockhash);
Ok(true)
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
datapoint_error!(
"watchtower-sanity-failure",
("test", "blockhash", String),
("err", err.to_string(), String)
);
false
})
&& rpc_client
.get_vote_accounts()
.and_then(|vote_accounts| {
let total_current_stake = vote_accounts let total_current_stake = vote_accounts
.current .current
@ -168,24 +166,44 @@ fn main() -> Result<(), Box<dyn error::Error>> {
lamports_to_sol(total_delinquent_stake) lamports_to_sol(total_delinquent_stake)
); );
info!("Current validator count: {}", vote_accounts.current.len()); if transaction_count > last_transaction_count {
info!( last_transaction_count = transaction_count;
"Delinquent validator count: {}",
vote_accounts.delinquent.len()
);
if validator_identity_pubkeys.is_empty() {
if vote_accounts.delinquent.is_empty() {
Ok(true)
} else { } else {
Err(io::Error::new( failures.push((
io::ErrorKind::Other, "transaction-count",
format!(
"Transaction count is not advancing: {} <= {}",
transaction_count, last_transaction_count
),
));
}
if recent_blockhash != last_recent_blockhash {
last_recent_blockhash = recent_blockhash;
} else {
failures.push((
"recent-blockhash",
format!("Unable to get new blockhash: {}", recent_blockhash),
));
}
if config.monitor_active_stake && current_stake_percent < 80 {
failures.push((
"current-stake",
format!("Current stake is {}%", current_stake_percent),
));
}
if config.validator_identity_pubkeys.is_empty() {
if !vote_accounts.delinquent.is_empty() {
failures.push((
"delinquent",
format!("{} delinquent validators", vote_accounts.delinquent.len()), format!("{} delinquent validators", vote_accounts.delinquent.len()),
)) ));
} }
} else { } else {
let mut errors = vec![]; let mut errors = vec![];
for validator_identity in validator_identity_pubkeys.iter() { for validator_identity in config.validator_identity_pubkeys.iter() {
if vote_accounts if vote_accounts
.delinquent .delinquent
.iter() .iter()
@ -201,46 +219,38 @@ fn main() -> Result<(), Box<dyn error::Error>> {
} }
} }
if errors.is_empty() { if !errors.is_empty() {
Ok(true) failures.push(("delinquent", errors.join(",")));
} else {
Err(io::Error::new(io::ErrorKind::Other, errors.join(",")))
} }
} }
})
.unwrap_or_else(|err| { failures.into_iter().next() // Only report the first failure if any
notify_msg = format!("solana-watchtower: {}", err.to_string()); }
Err(err) => Some(("rpc", err.to_string())),
};
datapoint_info!("watchtower-sanity", ("ok", failure.is_none(), bool));
if let Some((failure_test_name, failure_error_message)) = &failure {
let notification_msg = format!(
"solana-watchtower: Error: {}: {}",
failure_test_name, failure_error_message
);
if !config.no_duplicate_notifications || last_notification_msg != notification_msg {
notifier.send(&notification_msg);
}
datapoint_error!( datapoint_error!(
"watchtower-sanity-failure", "watchtower-sanity-failure",
("test", "delinquent-validators", String), ("test", failure_test_name, String),
("err", err.to_string(), String) ("err", failure_error_message, String)
); );
false last_notification_msg = notification_msg;
});
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
if !ok {
last_check_notification_sent = true;
if no_duplicate_notifications {
if last_notification_msg != notify_msg {
notifier.send(&notify_msg);
last_notification_msg = notify_msg;
} else { } else {
datapoint_info!( if !last_notification_msg.is_empty() {
"watchtower-sanity", info!("All clear");
("Suppressing duplicate notification", ok, bool) notifier.send("solana-watchtower: All clear");
);
} }
} else { last_notification_msg = "".into();
notifier.send(&notify_msg);
} }
} else { sleep(config.interval);
if last_check_notification_sent {
notifier.send("solana-watchtower: All Clear");
}
last_check_notification_sent = false;
last_notification_msg = String::from("");
}
sleep(interval);
} }
} }