watchtower: flag to suppress duplicate notifications (#8549)
* watchtower: send error message as notification * watchtower: send all clear notification when ok again * watchtower: add twilio sms notifications * watchtower: flag to suppress duplicate notifications * remove trailing space character * changes as per suggestion on PR * all changes together * cargo fmt
This commit is contained in:
@ -8,6 +8,11 @@ If you only care about the health of one specific validator, the
|
||||
`--validator-identity` command-line argument can be used to restrict failure
|
||||
notifications to issues only affecting that validator.
|
||||
|
||||
If you do not want duplicate notifications, for example if you have elected to
|
||||
recieve notifications by SMS the
|
||||
`--no-duplicate-notifications` command-line argument will suppress identical
|
||||
failure notifications.
|
||||
|
||||
### Metrics
|
||||
#### `watchtower-sanity`
|
||||
On every iteration this data point will be emitted indicating the overall result
|
||||
@ -33,3 +38,10 @@ Telegram requires the following two variables:
|
||||
export TELEGRAM_BOT_TOKEN=...
|
||||
export TELEGRAM_CHAT_ID=...
|
||||
```
|
||||
|
||||
To receive a Twilio SMS notification on failure, having a Twilio account,
|
||||
and a sending number owned by that account,
|
||||
define environment variable before running `solana-watchtower`:
|
||||
```
|
||||
export TWILIO_CONFIG='ACCOUNT=<account>,TOKEN=<securityToken>,TO=<receivingNumber>,FROM=<sendingNumber>'
|
||||
```
|
||||
|
@ -56,6 +56,12 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
.validator(is_pubkey_or_keypair)
|
||||
.help("Monitor a specific validator only instead of the entire cluster"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("no_duplicate_notifications")
|
||||
.long("no-duplicate-notifications")
|
||||
.takes_value(false)
|
||||
.help("Subsequent identical notifications will be suppressed"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let config = if let Some(config_file) = matches.value_of("config_file") {
|
||||
@ -68,6 +74,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
let json_rpc_url =
|
||||
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
|
||||
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
|
||||
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
|
||||
|
||||
solana_logger::setup_with_default("solana=info");
|
||||
solana_metrics::set_panic_hook("watchtower");
|
||||
@ -77,7 +84,10 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
|
||||
let notifier = Notifier::new();
|
||||
let mut last_transaction_count = 0;
|
||||
let mut last_check_notification_sent = false;
|
||||
let mut last_notification_msg = String::from("");
|
||||
loop {
|
||||
let mut notify_msg = String::from("solana-watchtower: undefined error");
|
||||
let ok = rpc_client
|
||||
.get_transaction_count()
|
||||
.and_then(|transaction_count| {
|
||||
@ -97,6 +107,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "transaction-count", String),
|
||||
@ -115,6 +126,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
Ok(true)
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "blockhash", String),
|
||||
@ -171,6 +183,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "delinquent-validators", String),
|
||||
@ -181,7 +194,26 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
|
||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||
if !ok {
|
||||
notifier.send("solana-watchtower sanity failure");
|
||||
last_check_notification_sent = true;
|
||||
if no_duplicate_notifications {
|
||||
if last_notification_msg != notify_msg {
|
||||
notifier.send(¬ify_msg);
|
||||
last_notification_msg = notify_msg;
|
||||
} else {
|
||||
datapoint_info!(
|
||||
"watchtower-sanity",
|
||||
("Suppressing duplicate notification", ok, bool)
|
||||
);
|
||||
}
|
||||
} else {
|
||||
notifier.send(¬ify_msg);
|
||||
}
|
||||
} else {
|
||||
if last_check_notification_sent {
|
||||
notifier.send("solana-watchtower: All Clear");
|
||||
}
|
||||
last_check_notification_sent = false;
|
||||
last_notification_msg = String::from("");
|
||||
}
|
||||
sleep(interval);
|
||||
}
|
||||
|
@ -8,11 +8,60 @@ struct TelegramWebHook {
|
||||
chat_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct TwilioWebHook {
|
||||
account: String,
|
||||
token: String,
|
||||
to: String,
|
||||
from: String,
|
||||
}
|
||||
|
||||
impl TwilioWebHook {
|
||||
fn complete(&self) -> bool {
|
||||
!(self.account.is_empty()
|
||||
|| self.token.is_empty()
|
||||
|| self.to.is_empty()
|
||||
|| self.from.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
fn get_twilio_config() -> Result<Option<TwilioWebHook>, String> {
|
||||
let config_var = env::var("TWILIO_CONFIG");
|
||||
|
||||
if config_var.is_err() {
|
||||
info!("Twilio notifications disabled");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut config = TwilioWebHook::default();
|
||||
|
||||
for pair in config_var.unwrap().split(',') {
|
||||
let nv: Vec<_> = pair.split('=').collect();
|
||||
if nv.len() != 2 {
|
||||
return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair));
|
||||
}
|
||||
let v = nv[1].to_string();
|
||||
match nv[0] {
|
||||
"ACCOUNT" => config.account = v,
|
||||
"TOKEN" => config.token = v,
|
||||
"TO" => config.to = v,
|
||||
"FROM" => config.from = v,
|
||||
_ => return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair)),
|
||||
}
|
||||
}
|
||||
|
||||
if !config.complete() {
|
||||
return Err("TWILIO_CONFIG is incomplete".to_string());
|
||||
}
|
||||
Ok(Some(config))
|
||||
}
|
||||
|
||||
pub struct Notifier {
|
||||
client: Client,
|
||||
discord_webhook: Option<String>,
|
||||
slack_webhook: Option<String>,
|
||||
telegram_webhook: Option<TelegramWebHook>,
|
||||
twilio_webhook: Option<TwilioWebHook>,
|
||||
}
|
||||
|
||||
impl Notifier {
|
||||
@ -35,12 +84,16 @@ impl Notifier {
|
||||
info!("Telegram notifications disabled");
|
||||
None
|
||||
};
|
||||
let twilio_webhook = get_twilio_config()
|
||||
.map_err(|err| panic!("Twilio config error: {}", err))
|
||||
.unwrap();
|
||||
|
||||
Notifier {
|
||||
client: Client::new(),
|
||||
discord_webhook,
|
||||
slack_webhook,
|
||||
telegram_webhook,
|
||||
twilio_webhook,
|
||||
}
|
||||
}
|
||||
|
||||
@ -67,5 +120,22 @@ impl Notifier {
|
||||
warn!("Failed to send Telegram message: {:?}", err);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(TwilioWebHook {
|
||||
account,
|
||||
token,
|
||||
to,
|
||||
from,
|
||||
}) = &self.twilio_webhook
|
||||
{
|
||||
let url = format!(
|
||||
"https://{}:{}@api.twilio.com/2010-04-01/Accounts/{}/Messages.json",
|
||||
account, token, account
|
||||
);
|
||||
let params = [("To", to), ("From", from), ("Body", &msg.to_string())];
|
||||
if let Err(err) = self.client.post(&url).form(¶ms).send() {
|
||||
warn!("Failed to send Twilio message: {:?}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user