From 55dee2901ec47ff660c8ef3b9999eaadc0555a7d Mon Sep 17 00:00:00 2001 From: HM <43654523+HM999@users.noreply.github.com> Date: Tue, 3 Mar 2020 06:37:57 +0000 Subject: [PATCH] watchtower: flag to suppress duplicate notifications (#8549) * watchtower: send error message as notification * watchtower: send all clear notification when ok again * watchtower: add twilio sms notifications * watchtower: flag to suppress duplicate notifications * remove trailing space character * changes as per suggestion on PR * all changes together * cargo fmt --- watchtower/README.md | 12 +++++++ watchtower/src/main.rs | 34 +++++++++++++++++- watchtower/src/notifier.rs | 70 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/watchtower/README.md b/watchtower/README.md index 3b27ebee26..8bcd59ff27 100644 --- a/watchtower/README.md +++ b/watchtower/README.md @@ -8,6 +8,11 @@ If you only care about the health of one specific validator, the `--validator-identity` command-line argument can be used to restrict failure notifications to issues only affecting that validator. +If you do not want duplicate notifications, for example if you have elected to +recieve notifications by SMS the +`--no-duplicate-notifications` command-line argument will suppress identical +failure notifications. + ### Metrics #### `watchtower-sanity` On every iteration this data point will be emitted indicating the overall result @@ -33,3 +38,10 @@ Telegram requires the following two variables: export TELEGRAM_BOT_TOKEN=... export TELEGRAM_CHAT_ID=... ``` + +To receive a Twilio SMS notification on failure, having a Twilio account, +and a sending number owned by that account, +define environment variable before running `solana-watchtower`: +``` +export TWILIO_CONFIG='ACCOUNT=,TOKEN=,TO=,FROM=' +``` diff --git a/watchtower/src/main.rs b/watchtower/src/main.rs index e466857739..f39e3b492e 100644 --- a/watchtower/src/main.rs +++ b/watchtower/src/main.rs @@ -56,6 +56,12 @@ fn main() -> Result<(), Box> { .validator(is_pubkey_or_keypair) .help("Monitor a specific validator only instead of the entire cluster"), ) + .arg( + Arg::with_name("no_duplicate_notifications") + .long("no-duplicate-notifications") + .takes_value(false) + .help("Subsequent identical notifications will be suppressed"), + ) .get_matches(); let config = if let Some(config_file) = matches.value_of("config_file") { @@ -68,6 +74,7 @@ fn main() -> Result<(), Box> { let json_rpc_url = value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url); let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string()); + let no_duplicate_notifications = matches.is_present("no_duplicate_notifications"); solana_logger::setup_with_default("solana=info"); solana_metrics::set_panic_hook("watchtower"); @@ -77,7 +84,10 @@ fn main() -> Result<(), Box> { let notifier = Notifier::new(); let mut last_transaction_count = 0; + let mut last_check_notification_sent = false; + let mut last_notification_msg = String::from(""); loop { + let mut notify_msg = String::from("solana-watchtower: undefined error"); let ok = rpc_client .get_transaction_count() .and_then(|transaction_count| { @@ -97,6 +107,7 @@ fn main() -> Result<(), Box> { } }) .unwrap_or_else(|err| { + notify_msg = format!("solana-watchtower: {}", err.to_string()); datapoint_error!( "watchtower-sanity-failure", ("test", "transaction-count", String), @@ -115,6 +126,7 @@ fn main() -> Result<(), Box> { Ok(true) }) .unwrap_or_else(|err| { + notify_msg = format!("solana-watchtower: {}", err.to_string()); datapoint_error!( "watchtower-sanity-failure", ("test", "blockhash", String), @@ -171,6 +183,7 @@ fn main() -> Result<(), Box> { } }) .unwrap_or_else(|err| { + notify_msg = format!("solana-watchtower: {}", err.to_string()); datapoint_error!( "watchtower-sanity-failure", ("test", "delinquent-validators", String), @@ -181,7 +194,26 @@ fn main() -> Result<(), Box> { datapoint_info!("watchtower-sanity", ("ok", ok, bool)); if !ok { - notifier.send("solana-watchtower sanity failure"); + last_check_notification_sent = true; + if no_duplicate_notifications { + if last_notification_msg != notify_msg { + notifier.send(¬ify_msg); + last_notification_msg = notify_msg; + } else { + datapoint_info!( + "watchtower-sanity", + ("Suppressing duplicate notification", ok, bool) + ); + } + } else { + notifier.send(¬ify_msg); + } + } else { + if last_check_notification_sent { + notifier.send("solana-watchtower: All Clear"); + } + last_check_notification_sent = false; + last_notification_msg = String::from(""); } sleep(interval); } diff --git a/watchtower/src/notifier.rs b/watchtower/src/notifier.rs index d88b067520..e0e414fba6 100644 --- a/watchtower/src/notifier.rs +++ b/watchtower/src/notifier.rs @@ -8,11 +8,60 @@ struct TelegramWebHook { chat_id: String, } +#[derive(Debug, Default)] +struct TwilioWebHook { + account: String, + token: String, + to: String, + from: String, +} + +impl TwilioWebHook { + fn complete(&self) -> bool { + !(self.account.is_empty() + || self.token.is_empty() + || self.to.is_empty() + || self.from.is_empty()) + } +} + +fn get_twilio_config() -> Result, String> { + let config_var = env::var("TWILIO_CONFIG"); + + if config_var.is_err() { + info!("Twilio notifications disabled"); + return Ok(None); + } + + let mut config = TwilioWebHook::default(); + + for pair in config_var.unwrap().split(',') { + let nv: Vec<_> = pair.split('=').collect(); + if nv.len() != 2 { + return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair)); + } + let v = nv[1].to_string(); + match nv[0] { + "ACCOUNT" => config.account = v, + "TOKEN" => config.token = v, + "TO" => config.to = v, + "FROM" => config.from = v, + _ => return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair)), + } + } + + if !config.complete() { + return Err("TWILIO_CONFIG is incomplete".to_string()); + } + Ok(Some(config)) +} + pub struct Notifier { client: Client, discord_webhook: Option, slack_webhook: Option, telegram_webhook: Option, + twilio_webhook: Option, } impl Notifier { @@ -35,12 +84,16 @@ impl Notifier { info!("Telegram notifications disabled"); None }; + let twilio_webhook = get_twilio_config() + .map_err(|err| panic!("Twilio config error: {}", err)) + .unwrap(); Notifier { client: Client::new(), discord_webhook, slack_webhook, telegram_webhook, + twilio_webhook, } } @@ -67,5 +120,22 @@ impl Notifier { warn!("Failed to send Telegram message: {:?}", err); } } + + if let Some(TwilioWebHook { + account, + token, + to, + from, + }) = &self.twilio_webhook + { + let url = format!( + "https://{}:{}@api.twilio.com/2010-04-01/Accounts/{}/Messages.json", + account, token, account + ); + let params = [("To", to), ("From", from), ("Body", &msg.to_string())]; + if let Err(err) = self.client.post(&url).form(¶ms).send() { + warn!("Failed to send Twilio message: {:?}", err); + } + } } }