watchtower: flag to suppress duplicate notifications (#8549)

* watchtower: send error message as notification

* watchtower: send all clear notification when ok again

* watchtower: add twilio sms notifications

* watchtower: flag to suppress duplicate notifications

* remove trailing space character

* changes as per suggestion on PR

* all changes together

* cargo fmt
This commit is contained in:
HM
2020-03-03 06:37:57 +00:00
committed by Michael Vines
parent 2b0824d18b
commit 55dee2901e
3 changed files with 115 additions and 1 deletions

View File

@ -8,6 +8,11 @@ If you only care about the health of one specific validator, the
`--validator-identity` command-line argument can be used to restrict failure
notifications to issues only affecting that validator.
If you do not want duplicate notifications, for example if you have elected to
recieve notifications by SMS the
`--no-duplicate-notifications` command-line argument will suppress identical
failure notifications.
### Metrics
#### `watchtower-sanity`
On every iteration this data point will be emitted indicating the overall result
@ -33,3 +38,10 @@ Telegram requires the following two variables:
export TELEGRAM_BOT_TOKEN=...
export TELEGRAM_CHAT_ID=...
```
To receive a Twilio SMS notification on failure, having a Twilio account,
and a sending number owned by that account,
define environment variable before running `solana-watchtower`:
```
export TWILIO_CONFIG='ACCOUNT=<account>,TOKEN=<securityToken>,TO=<receivingNumber>,FROM=<sendingNumber>'
```

View File

@ -56,6 +56,12 @@ fn main() -> Result<(), Box<dyn error::Error>> {
.validator(is_pubkey_or_keypair)
.help("Monitor a specific validator only instead of the entire cluster"),
)
.arg(
Arg::with_name("no_duplicate_notifications")
.long("no-duplicate-notifications")
.takes_value(false)
.help("Subsequent identical notifications will be suppressed"),
)
.get_matches();
let config = if let Some(config_file) = matches.value_of("config_file") {
@ -68,6 +74,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
let json_rpc_url =
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
solana_logger::setup_with_default("solana=info");
solana_metrics::set_panic_hook("watchtower");
@ -77,7 +84,10 @@ fn main() -> Result<(), Box<dyn error::Error>> {
let notifier = Notifier::new();
let mut last_transaction_count = 0;
let mut last_check_notification_sent = false;
let mut last_notification_msg = String::from("");
loop {
let mut notify_msg = String::from("solana-watchtower: undefined error");
let ok = rpc_client
.get_transaction_count()
.and_then(|transaction_count| {
@ -97,6 +107,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
}
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
datapoint_error!(
"watchtower-sanity-failure",
("test", "transaction-count", String),
@ -115,6 +126,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
Ok(true)
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
datapoint_error!(
"watchtower-sanity-failure",
("test", "blockhash", String),
@ -171,6 +183,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
}
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
datapoint_error!(
"watchtower-sanity-failure",
("test", "delinquent-validators", String),
@ -181,7 +194,26 @@ fn main() -> Result<(), Box<dyn error::Error>> {
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
if !ok {
notifier.send("solana-watchtower sanity failure");
last_check_notification_sent = true;
if no_duplicate_notifications {
if last_notification_msg != notify_msg {
notifier.send(&notify_msg);
last_notification_msg = notify_msg;
} else {
datapoint_info!(
"watchtower-sanity",
("Suppressing duplicate notification", ok, bool)
);
}
} else {
notifier.send(&notify_msg);
}
} else {
if last_check_notification_sent {
notifier.send("solana-watchtower: All Clear");
}
last_check_notification_sent = false;
last_notification_msg = String::from("");
}
sleep(interval);
}

View File

@ -8,11 +8,60 @@ struct TelegramWebHook {
chat_id: String,
}
#[derive(Debug, Default)]
struct TwilioWebHook {
account: String,
token: String,
to: String,
from: String,
}
impl TwilioWebHook {
fn complete(&self) -> bool {
!(self.account.is_empty()
|| self.token.is_empty()
|| self.to.is_empty()
|| self.from.is_empty())
}
}
fn get_twilio_config() -> Result<Option<TwilioWebHook>, String> {
let config_var = env::var("TWILIO_CONFIG");
if config_var.is_err() {
info!("Twilio notifications disabled");
return Ok(None);
}
let mut config = TwilioWebHook::default();
for pair in config_var.unwrap().split(',') {
let nv: Vec<_> = pair.split('=').collect();
if nv.len() != 2 {
return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair));
}
let v = nv[1].to_string();
match nv[0] {
"ACCOUNT" => config.account = v,
"TOKEN" => config.token = v,
"TO" => config.to = v,
"FROM" => config.from = v,
_ => return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair)),
}
}
if !config.complete() {
return Err("TWILIO_CONFIG is incomplete".to_string());
}
Ok(Some(config))
}
pub struct Notifier {
client: Client,
discord_webhook: Option<String>,
slack_webhook: Option<String>,
telegram_webhook: Option<TelegramWebHook>,
twilio_webhook: Option<TwilioWebHook>,
}
impl Notifier {
@ -35,12 +84,16 @@ impl Notifier {
info!("Telegram notifications disabled");
None
};
let twilio_webhook = get_twilio_config()
.map_err(|err| panic!("Twilio config error: {}", err))
.unwrap();
Notifier {
client: Client::new(),
discord_webhook,
slack_webhook,
telegram_webhook,
twilio_webhook,
}
}
@ -67,5 +120,22 @@ impl Notifier {
warn!("Failed to send Telegram message: {:?}", err);
}
}
if let Some(TwilioWebHook {
account,
token,
to,
from,
}) = &self.twilio_webhook
{
let url = format!(
"https://{}:{}@api.twilio.com/2010-04-01/Accounts/{}/Messages.json",
account, token, account
);
let params = [("To", to), ("From", from), ("Body", &msg.to_string())];
if let Err(err) = self.client.post(&url).form(&params).send() {
warn!("Failed to send Twilio message: {:?}", err);
}
}
}
}