watchtower: Add Slack/Discord sanity failure notification (#7467)

automerge
This commit is contained in:
Michael Vines
2019-12-13 00:49:16 -07:00
parent 2ef7579b6c
commit 5a2a34b035
6 changed files with 81 additions and 14 deletions

22
Cargo.lock generated
View File

@ -294,7 +294,7 @@ version = "0.21.5"
dependencies = [ dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hex 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2700,7 +2700,7 @@ dependencies = [
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.9.22" version = "0.9.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3255,7 +3255,7 @@ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty-hex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "pretty-hex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3289,7 +3289,7 @@ dependencies = [
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3359,7 +3359,7 @@ dependencies = [
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3410,7 +3410,7 @@ dependencies = [
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.42 (registry+https://github.com/rust-lang/crates.io-index)", "syn 0.15.42 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "syn 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3537,7 +3537,7 @@ dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)", "serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3721,7 +3721,7 @@ dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serial_test 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serial_test 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serial_test_derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serial_test_derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"solana-sdk 0.21.5", "solana-sdk 0.21.5",
@ -4029,7 +4029,7 @@ dependencies = [
"gag 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "gag 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
"solana-clap-utils 0.21.5", "solana-clap-utils 0.21.5",
"solana-client 0.21.5", "solana-client 0.21.5",
@ -4102,6 +4102,8 @@ version = "0.21.5"
dependencies = [ dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
"solana-clap-utils 0.21.5", "solana-clap-utils 0.21.5",
"solana-client 0.21.5", "solana-client 0.21.5",
"solana-logger 0.21.5", "solana-logger 0.21.5",
@ -5733,7 +5735,7 @@ dependencies = [
"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" "checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
"checksum rental 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "01916ebd9fc2e81978a5dc9542a2fa47f5bb2ca3402e14c7cc42d6e3c5123e1f" "checksum rental 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "01916ebd9fc2e81978a5dc9542a2fa47f5bb2ca3402e14c7cc42d6e3c5123e1f"
"checksum rental-impl 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "82260d54cf2cbe9608df161f7e7c98e81fae702aa13af9e4d5d39dc2ffb25ab6" "checksum rental-impl 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "82260d54cf2cbe9608df161f7e7c98e81fae702aa13af9e4d5d39dc2ffb25ab6"
"checksum reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)" = "2c2064233e442ce85c77231ebd67d9eca395207dec2127fe0bbedde4bd29a650" "checksum reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)" = "f88643aea3c1343c804950d7bf983bd2067f5ab59db6d613a08e05572f2714ab"
"checksum rgb 0.8.13 (registry+https://github.com/rust-lang/crates.io-index)" = "4f089652ca87f5a82a62935ec6172a534066c7b97be003cc8f702ee9a7a59c92" "checksum rgb 0.8.13 (registry+https://github.com/rust-lang/crates.io-index)" = "4f089652ca87f5a82a62935ec6172a534066c7b97be003cc8f702ee9a7a59c92"
"checksum ring 0.16.7 (registry+https://github.com/rust-lang/crates.io-index)" = "796ae8317a07b04dffb1983bdc7045ccd02f741f0b411704f07fd35dbf99f757" "checksum ring 0.16.7 (registry+https://github.com/rust-lang/crates.io-index)" = "796ae8317a07b04dffb1983bdc7045ccd02f741f0b411704f07fd35dbf99f757"
"checksum rocksdb 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "12069b106981c6103d3eab7dd1c86751482d0779a520b7c14954c8b586c1e643" "checksum rocksdb 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "12069b106981c6103d3eab7dd1c86751482d0779a520b7c14954c8b586c1e643"

View File

@ -108,11 +108,12 @@ pub fn authorize(
} }
pub fn update_node( pub fn update_node(
vote_pubkey: &Pubkey, // vote account vote_pubkey: &Pubkey,
authorized_pubkey: &Pubkey, // currently authorized authorized_voter_pubkey: &Pubkey,
node_pubkey: &Pubkey, node_pubkey: &Pubkey,
) -> Instruction { ) -> Instruction {
let account_metas = vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_pubkey); let account_metas =
vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_voter_pubkey);
Instruction::new( Instruction::new(
id(), id(),

View File

@ -11,6 +11,8 @@ homepage = "https://solana.com/"
[dependencies] [dependencies]
clap = "2.33.0" clap = "2.33.0"
log = "0.4.8" log = "0.4.8"
reqwest = { version = "0.9.24", default-features = false, features = ["rustls-tls"] }
serde_json = "1.0"
solana-clap-utils = { path = "../clap-utils", version = "0.21.5" } solana-clap-utils = { path = "../clap-utils", version = "0.21.5" }
solana-client = { path = "../client", version = "0.21.5" } solana-client = { path = "../client", version = "0.21.5" }
solana-logger = { path = "../logger", version = "0.21.5" } solana-logger = { path = "../logger", version = "0.21.5" }

View File

@ -1,7 +1,8 @@
The `solana-watchtower` program is used to monitor the health of a cluster. It The `solana-watchtower` program is used to monitor the health of a cluster. It
periodically polls the cluster over an RPC API to confirm that the transaction periodically polls the cluster over an RPC API to confirm that the transaction
count is advancing, new blockhashes are available, and no validators are count is advancing, new blockhashes are available, and no validators are
delinquent. Results are reported as InfluxDB metrics. delinquent. Results are reported as InfluxDB metrics, with an optional
Slack/Discord push notification on sanity failure.
If you only care about the health of one specific validator, the If you only care about the health of one specific validator, the
`--validator-identity` command-line argument can be used to restrict failure `--validator-identity` command-line argument can be used to restrict failure
@ -18,3 +19,11 @@ the following fields:
* `test`: name of the sanity test that failed * `test`: name of the sanity test that failed
* `err`: exact sanity failure message * `err`: exact sanity failure message
### Sanity failure push notification
To receive a Slack and/or Discord notification on sanity failure, define one or
both of these environment variables before running `solana-watchtower`:
```
export SLACK_WEBHOOK=...
export DISCORD_WEBHOOK=...
```

View File

@ -1,5 +1,8 @@
//! A command-line executable for monitoring the health of a cluster //! A command-line executable for monitoring the health of a cluster
mod notifier;
use crate::notifier::Notifier;
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg}; use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
use log::*; use log::*;
use solana_clap_utils::{ use solana_clap_utils::{
@ -50,6 +53,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
let rpc_client = RpcClient::new(json_rpc_url.to_string()); let rpc_client = RpcClient::new(json_rpc_url.to_string());
let notifier = Notifier::new();
let mut last_transaction_count = 0; let mut last_transaction_count = 0;
loop { loop {
let ok = rpc_client let ok = rpc_client
@ -154,6 +158,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {
}); });
datapoint_info!("watchtower-sanity", ("ok", ok, bool)); datapoint_info!("watchtower-sanity", ("ok", ok, bool));
if !ok {
notifier.send("solana-watchtower sanity failure");
}
sleep(interval); sleep(interval);
} }
} }

View File

@ -0,0 +1,46 @@
use log::*;
use reqwest::Client;
use serde_json::json;
use std::env;
pub struct Notifier {
client: Client,
discord_webhook: Option<String>,
slack_webhook: Option<String>,
}
impl Notifier {
pub fn new() -> Self {
let discord_webhook = env::var("DISCORD_WEBHOOK")
.map_err(|_| {
warn!("Discord notifications disabled");
})
.ok();
let slack_webhook = env::var("SLACK_WEBHOOK")
.map_err(|_| {
warn!("Slack notifications disabled");
})
.ok();
Notifier {
client: Client::new(),
discord_webhook,
slack_webhook,
}
}
pub fn send(&self, msg: &str) {
if let Some(webhook) = &self.discord_webhook {
let data = json!({ "content": msg });
if let Err(err) = self.client.post(webhook).json(&data).send() {
warn!("Failed to send Discord message: {:?}", err);
}
}
if let Some(webhook) = &self.slack_webhook {
let data = json!({ "text": msg });
if let Err(err) = self.client.post(webhook).json(&data).send() {
warn!("Failed to send Slack message: {:?}", err);
}
}
}
}