automerge
This commit is contained in:
@ -9,13 +9,20 @@ use solana_clap_utils::{
|
|||||||
input_parsers::pubkeys_of,
|
input_parsers::pubkeys_of,
|
||||||
input_validators::{is_pubkey_or_keypair, is_url},
|
input_validators::{is_pubkey_or_keypair, is_url},
|
||||||
};
|
};
|
||||||
use solana_cli_config::{Config, CONFIG_FILE};
|
use solana_client::{rpc_client::RpcClient, rpc_response::RpcVoteAccountStatus};
|
||||||
use solana_client::rpc_client::RpcClient;
|
|
||||||
use solana_metrics::{datapoint_error, datapoint_info};
|
use solana_metrics::{datapoint_error, datapoint_info};
|
||||||
use solana_sdk::native_token::lamports_to_sol;
|
use solana_sdk::{hash::Hash, native_token::lamports_to_sol};
|
||||||
use std::{error, io, thread::sleep, time::Duration};
|
use std::{error, io, thread::sleep, time::Duration};
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn error::Error>> {
|
struct Config {
|
||||||
|
interval: Duration,
|
||||||
|
json_rpc_url: String,
|
||||||
|
validator_identity_pubkeys: Vec<String>,
|
||||||
|
no_duplicate_notifications: bool,
|
||||||
|
monitor_active_stake: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_config() -> Config {
|
||||||
let matches = App::new(crate_name!())
|
let matches = App::new(crate_name!())
|
||||||
.about(crate_description!())
|
.about(crate_description!())
|
||||||
.version(solana_clap_utils::version!())
|
.version(solana_clap_utils::version!())
|
||||||
@ -27,7 +34,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
.global(true)
|
.global(true)
|
||||||
.help("Configuration file to use");
|
.help("Configuration file to use");
|
||||||
if let Some(ref config_file) = *CONFIG_FILE {
|
if let Some(ref config_file) = *solana_cli_config::CONFIG_FILE {
|
||||||
arg.default_value(&config_file)
|
arg.default_value(&config_file)
|
||||||
} else {
|
} else {
|
||||||
arg
|
arg
|
||||||
@ -50,7 +57,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
.help("Wait interval seconds between checking the cluster"),
|
.help("Wait interval seconds between checking the cluster"),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("validator_identitys")
|
Arg::with_name("validator_identities")
|
||||||
.long("validator-identity")
|
.long("validator-identity")
|
||||||
.value_name("VALIDATOR IDENTITY PUBKEY")
|
.value_name("VALIDATOR IDENTITY PUBKEY")
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
@ -64,90 +71,81 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
.takes_value(false)
|
.takes_value(false)
|
||||||
.help("Subsequent identical notifications will be suppressed"),
|
.help("Subsequent identical notifications will be suppressed"),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("monitor_active_stake")
|
||||||
|
.long("monitor-active-stake")
|
||||||
|
.takes_value(false)
|
||||||
|
.help("Alert when the current stake for the cluster drops below 80%"),
|
||||||
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
let config = if let Some(config_file) = matches.value_of("config_file") {
|
let config = if let Some(config_file) = matches.value_of("config_file") {
|
||||||
Config::load(config_file).unwrap_or_default()
|
solana_cli_config::Config::load(config_file).unwrap_or_default()
|
||||||
} else {
|
} else {
|
||||||
Config::default()
|
solana_cli_config::Config::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
||||||
let json_rpc_url =
|
let json_rpc_url =
|
||||||
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
|
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
|
||||||
let validator_identity_pubkeys: Vec<_> = pubkeys_of(&matches, "validator_identitys")
|
let validator_identity_pubkeys: Vec<_> = pubkeys_of(&matches, "validator_identities")
|
||||||
.unwrap_or_else(|| vec![])
|
.unwrap_or_else(|| vec![])
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|i| i.to_string())
|
.map(|i| i.to_string())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
|
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
|
||||||
|
let monitor_active_stake = matches.is_present("monitor_active_stake");
|
||||||
|
|
||||||
|
Config {
|
||||||
|
interval,
|
||||||
|
json_rpc_url,
|
||||||
|
validator_identity_pubkeys,
|
||||||
|
no_duplicate_notifications,
|
||||||
|
monitor_active_stake,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_cluster_info(rpc_client: &RpcClient) -> io::Result<(u64, Hash, RpcVoteAccountStatus)> {
|
||||||
|
let transaction_count = rpc_client.get_transaction_count()?;
|
||||||
|
let recent_blockhash = rpc_client.get_recent_blockhash()?.0;
|
||||||
|
let vote_accounts = rpc_client.get_vote_accounts()?;
|
||||||
|
Ok((transaction_count, recent_blockhash, vote_accounts))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
|
let config = get_config();
|
||||||
|
|
||||||
solana_logger::setup_with_default("solana=info");
|
solana_logger::setup_with_default("solana=info");
|
||||||
solana_metrics::set_panic_hook("watchtower");
|
solana_metrics::set_panic_hook("watchtower");
|
||||||
|
|
||||||
info!("RPC URL: {}", json_rpc_url);
|
info!("RPC URL: {}", config.json_rpc_url);
|
||||||
if !validator_identity_pubkeys.is_empty() {
|
if !config.validator_identity_pubkeys.is_empty() {
|
||||||
info!("Monitored validators: {:?}", validator_identity_pubkeys);
|
info!(
|
||||||
|
"Monitored validators: {:?}",
|
||||||
|
config.validator_identity_pubkeys
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let rpc_client = RpcClient::new(json_rpc_url);
|
let rpc_client = RpcClient::new(config.json_rpc_url);
|
||||||
|
|
||||||
let notifier = Notifier::new();
|
let notifier = Notifier::new();
|
||||||
let mut last_transaction_count = 0;
|
let mut last_transaction_count = 0;
|
||||||
let mut last_check_notification_sent = false;
|
let mut last_recent_blockhash = Hash::default();
|
||||||
let mut last_notification_msg = String::from("");
|
let mut last_notification_msg = "".into();
|
||||||
loop {
|
|
||||||
let mut notify_msg = String::from("solana-watchtower: undefined error");
|
|
||||||
let ok = rpc_client
|
|
||||||
.get_transaction_count()
|
|
||||||
.and_then(|transaction_count| {
|
|
||||||
info!("Current transaction count: {}", transaction_count);
|
|
||||||
|
|
||||||
if transaction_count > last_transaction_count {
|
loop {
|
||||||
last_transaction_count = transaction_count;
|
let failure = match get_cluster_info(&rpc_client) {
|
||||||
Ok(true)
|
Ok((transaction_count, recent_blockhash, vote_accounts)) => {
|
||||||
} else {
|
info!("Current transaction count: {}", transaction_count);
|
||||||
Err(io::Error::new(
|
info!("Recent blockhash: {}", recent_blockhash);
|
||||||
io::ErrorKind::Other,
|
info!("Current validator count: {}", vote_accounts.current.len());
|
||||||
format!(
|
info!(
|
||||||
"Transaction count is not advancing: {} <= {}",
|
"Delinquent validator count: {}",
|
||||||
transaction_count, last_transaction_count
|
vote_accounts.delinquent.len()
|
||||||
),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap_or_else(|err| {
|
|
||||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
|
||||||
datapoint_error!(
|
|
||||||
"watchtower-sanity-failure",
|
|
||||||
("test", "transaction-count", String),
|
|
||||||
("err", err.to_string(), String)
|
|
||||||
);
|
);
|
||||||
false
|
|
||||||
})
|
let mut failures = vec![];
|
||||||
&& rpc_client
|
|
||||||
.get_recent_blockhash()
|
|
||||||
.and_then(|(blockhash, _fee_calculator)| {
|
|
||||||
info!("Current blockhash: {}", blockhash);
|
|
||||||
rpc_client.get_new_blockhash(&blockhash)
|
|
||||||
})
|
|
||||||
.and_then(|(blockhash, _fee_calculator)| {
|
|
||||||
info!("New blockhash: {}", blockhash);
|
|
||||||
Ok(true)
|
|
||||||
})
|
|
||||||
.unwrap_or_else(|err| {
|
|
||||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
|
||||||
datapoint_error!(
|
|
||||||
"watchtower-sanity-failure",
|
|
||||||
("test", "blockhash", String),
|
|
||||||
("err", err.to_string(), String)
|
|
||||||
);
|
|
||||||
false
|
|
||||||
})
|
|
||||||
&& rpc_client
|
|
||||||
.get_vote_accounts()
|
|
||||||
.and_then(|vote_accounts| {
|
|
||||||
|
|
||||||
let total_current_stake = vote_accounts
|
let total_current_stake = vote_accounts
|
||||||
.current
|
.current
|
||||||
@ -168,24 +166,44 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
lamports_to_sol(total_delinquent_stake)
|
lamports_to_sol(total_delinquent_stake)
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Current validator count: {}", vote_accounts.current.len());
|
if transaction_count > last_transaction_count {
|
||||||
info!(
|
last_transaction_count = transaction_count;
|
||||||
"Delinquent validator count: {}",
|
|
||||||
vote_accounts.delinquent.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
if validator_identity_pubkeys.is_empty() {
|
|
||||||
if vote_accounts.delinquent.is_empty() {
|
|
||||||
Ok(true)
|
|
||||||
} else {
|
} else {
|
||||||
Err(io::Error::new(
|
failures.push((
|
||||||
io::ErrorKind::Other,
|
"transaction-count",
|
||||||
|
format!(
|
||||||
|
"Transaction count is not advancing: {} <= {}",
|
||||||
|
transaction_count, last_transaction_count
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if recent_blockhash != last_recent_blockhash {
|
||||||
|
last_recent_blockhash = recent_blockhash;
|
||||||
|
} else {
|
||||||
|
failures.push((
|
||||||
|
"recent-blockhash",
|
||||||
|
format!("Unable to get new blockhash: {}", recent_blockhash),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.monitor_active_stake && current_stake_percent < 80 {
|
||||||
|
failures.push((
|
||||||
|
"current-stake",
|
||||||
|
format!("Current stake is {}%", current_stake_percent),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.validator_identity_pubkeys.is_empty() {
|
||||||
|
if !vote_accounts.delinquent.is_empty() {
|
||||||
|
failures.push((
|
||||||
|
"delinquent",
|
||||||
format!("{} delinquent validators", vote_accounts.delinquent.len()),
|
format!("{} delinquent validators", vote_accounts.delinquent.len()),
|
||||||
))
|
));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut errors = vec![];
|
let mut errors = vec![];
|
||||||
for validator_identity in validator_identity_pubkeys.iter() {
|
for validator_identity in config.validator_identity_pubkeys.iter() {
|
||||||
if vote_accounts
|
if vote_accounts
|
||||||
.delinquent
|
.delinquent
|
||||||
.iter()
|
.iter()
|
||||||
@ -201,46 +219,38 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if errors.is_empty() {
|
if !errors.is_empty() {
|
||||||
Ok(true)
|
failures.push(("delinquent", errors.join(",")));
|
||||||
} else {
|
|
||||||
Err(io::Error::new(io::ErrorKind::Other, errors.join(",")))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.unwrap_or_else(|err| {
|
failures.into_iter().next() // Only report the first failure if any
|
||||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
}
|
||||||
|
Err(err) => Some(("rpc", err.to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
datapoint_info!("watchtower-sanity", ("ok", failure.is_none(), bool));
|
||||||
|
if let Some((failure_test_name, failure_error_message)) = &failure {
|
||||||
|
let notification_msg = format!(
|
||||||
|
"solana-watchtower: Error: {}: {}",
|
||||||
|
failure_test_name, failure_error_message
|
||||||
|
);
|
||||||
|
if !config.no_duplicate_notifications || last_notification_msg != notification_msg {
|
||||||
|
notifier.send(¬ification_msg);
|
||||||
|
}
|
||||||
datapoint_error!(
|
datapoint_error!(
|
||||||
"watchtower-sanity-failure",
|
"watchtower-sanity-failure",
|
||||||
("test", "delinquent-validators", String),
|
("test", failure_test_name, String),
|
||||||
("err", err.to_string(), String)
|
("err", failure_error_message, String)
|
||||||
);
|
);
|
||||||
false
|
last_notification_msg = notification_msg;
|
||||||
});
|
|
||||||
|
|
||||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
|
||||||
if !ok {
|
|
||||||
last_check_notification_sent = true;
|
|
||||||
if no_duplicate_notifications {
|
|
||||||
if last_notification_msg != notify_msg {
|
|
||||||
notifier.send(¬ify_msg);
|
|
||||||
last_notification_msg = notify_msg;
|
|
||||||
} else {
|
} else {
|
||||||
datapoint_info!(
|
if !last_notification_msg.is_empty() {
|
||||||
"watchtower-sanity",
|
info!("All clear");
|
||||||
("Suppressing duplicate notification", ok, bool)
|
notifier.send("solana-watchtower: All clear");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
} else {
|
last_notification_msg = "".into();
|
||||||
notifier.send(¬ify_msg);
|
|
||||||
}
|
}
|
||||||
} else {
|
sleep(config.interval);
|
||||||
if last_check_notification_sent {
|
|
||||||
notifier.send("solana-watchtower: All Clear");
|
|
||||||
}
|
|
||||||
last_check_notification_sent = false;
|
|
||||||
last_notification_msg = String::from("");
|
|
||||||
}
|
|
||||||
sleep(interval);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user