watchtower: Add Slack/Discord sanity failure notification (#7467)
automerge
This commit is contained in:
22
Cargo.lock
generated
22
Cargo.lock
generated
@ -294,7 +294,7 @@ version = "0.21.5"
|
||||
dependencies = [
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hex 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -2700,7 +2700,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.9.22"
|
||||
version = "0.9.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3255,7 +3255,7 @@ dependencies = [
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num-traits 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pretty-hex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3289,7 +3289,7 @@ dependencies = [
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3359,7 +3359,7 @@ dependencies = [
|
||||
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3410,7 +3410,7 @@ dependencies = [
|
||||
"libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3537,7 +3537,7 @@ dependencies = [
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -3721,7 +3721,7 @@ dependencies = [
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serial_test 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serial_test_derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solana-sdk 0.21.5",
|
||||
@ -4029,7 +4029,7 @@ dependencies = [
|
||||
"gag 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solana-clap-utils 0.21.5",
|
||||
"solana-client 0.21.5",
|
||||
@ -4102,6 +4102,8 @@ version = "0.21.5"
|
||||
dependencies = [
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solana-clap-utils 0.21.5",
|
||||
"solana-client 0.21.5",
|
||||
"solana-logger 0.21.5",
|
||||
@ -5733,7 +5735,7 @@ dependencies = [
|
||||
"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
|
||||
"checksum rental 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "01916ebd9fc2e81978a5dc9542a2fa47f5bb2ca3402e14c7cc42d6e3c5123e1f"
|
||||
"checksum rental-impl 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "82260d54cf2cbe9608df161f7e7c98e81fae702aa13af9e4d5d39dc2ffb25ab6"
|
||||
"checksum reqwest 0.9.22 (registry+https://github.com/rust-lang/crates.io-index)" = "2c2064233e442ce85c77231ebd67d9eca395207dec2127fe0bbedde4bd29a650"
|
||||
"checksum reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)" = "f88643aea3c1343c804950d7bf983bd2067f5ab59db6d613a08e05572f2714ab"
|
||||
"checksum rgb 0.8.13 (registry+https://github.com/rust-lang/crates.io-index)" = "4f089652ca87f5a82a62935ec6172a534066c7b97be003cc8f702ee9a7a59c92"
|
||||
"checksum ring 0.16.7 (registry+https://github.com/rust-lang/crates.io-index)" = "796ae8317a07b04dffb1983bdc7045ccd02f741f0b411704f07fd35dbf99f757"
|
||||
"checksum rocksdb 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "12069b106981c6103d3eab7dd1c86751482d0779a520b7c14954c8b586c1e643"
|
||||
|
@ -108,11 +108,12 @@ pub fn authorize(
|
||||
}
|
||||
|
||||
pub fn update_node(
|
||||
vote_pubkey: &Pubkey, // vote account
|
||||
authorized_pubkey: &Pubkey, // currently authorized
|
||||
vote_pubkey: &Pubkey,
|
||||
authorized_voter_pubkey: &Pubkey,
|
||||
node_pubkey: &Pubkey,
|
||||
) -> Instruction {
|
||||
let account_metas = vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_pubkey);
|
||||
let account_metas =
|
||||
vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_voter_pubkey);
|
||||
|
||||
Instruction::new(
|
||||
id(),
|
||||
|
@ -11,6 +11,8 @@ homepage = "https://solana.com/"
|
||||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
log = "0.4.8"
|
||||
reqwest = { version = "0.9.24", default-features = false, features = ["rustls-tls"] }
|
||||
serde_json = "1.0"
|
||||
solana-clap-utils = { path = "../clap-utils", version = "0.21.5" }
|
||||
solana-client = { path = "../client", version = "0.21.5" }
|
||||
solana-logger = { path = "../logger", version = "0.21.5" }
|
||||
|
@ -1,7 +1,8 @@
|
||||
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
||||
periodically polls the cluster over an RPC API to confirm that the transaction
|
||||
count is advancing, new blockhashes are available, and no validators are
|
||||
delinquent. Results are reported as InfluxDB metrics.
|
||||
delinquent. Results are reported as InfluxDB metrics, with an optional
|
||||
Slack/Discord push notification on sanity failure.
|
||||
|
||||
If you only care about the health of one specific validator, the
|
||||
`--validator-identity` command-line argument can be used to restrict failure
|
||||
@ -18,3 +19,11 @@ the following fields:
|
||||
* `test`: name of the sanity test that failed
|
||||
* `err`: exact sanity failure message
|
||||
|
||||
|
||||
### Sanity failure push notification
|
||||
To receive a Slack and/or Discord notification on sanity failure, define one or
|
||||
both of these environment variables before running `solana-watchtower`:
|
||||
```
|
||||
export SLACK_WEBHOOK=...
|
||||
export DISCORD_WEBHOOK=...
|
||||
```
|
||||
|
@ -1,5 +1,8 @@
|
||||
//! A command-line executable for monitoring the health of a cluster
|
||||
|
||||
mod notifier;
|
||||
|
||||
use crate::notifier::Notifier;
|
||||
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||
use log::*;
|
||||
use solana_clap_utils::{
|
||||
@ -50,6 +53,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
|
||||
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
||||
|
||||
let notifier = Notifier::new();
|
||||
let mut last_transaction_count = 0;
|
||||
loop {
|
||||
let ok = rpc_client
|
||||
@ -154,6 +158,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
});
|
||||
|
||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||
if !ok {
|
||||
notifier.send("solana-watchtower sanity failure");
|
||||
}
|
||||
sleep(interval);
|
||||
}
|
||||
}
|
||||
|
46
watchtower/src/notifier.rs
Normal file
46
watchtower/src/notifier.rs
Normal file
@ -0,0 +1,46 @@
|
||||
use log::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
use std::env;
|
||||
|
||||
pub struct Notifier {
|
||||
client: Client,
|
||||
discord_webhook: Option<String>,
|
||||
slack_webhook: Option<String>,
|
||||
}
|
||||
|
||||
impl Notifier {
|
||||
pub fn new() -> Self {
|
||||
let discord_webhook = env::var("DISCORD_WEBHOOK")
|
||||
.map_err(|_| {
|
||||
warn!("Discord notifications disabled");
|
||||
})
|
||||
.ok();
|
||||
let slack_webhook = env::var("SLACK_WEBHOOK")
|
||||
.map_err(|_| {
|
||||
warn!("Slack notifications disabled");
|
||||
})
|
||||
.ok();
|
||||
Notifier {
|
||||
client: Client::new(),
|
||||
discord_webhook,
|
||||
slack_webhook,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn send(&self, msg: &str) {
|
||||
if let Some(webhook) = &self.discord_webhook {
|
||||
let data = json!({ "content": msg });
|
||||
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||
warn!("Failed to send Discord message: {:?}", err);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(webhook) = &self.slack_webhook {
|
||||
let data = json!({ "text": msg });
|
||||
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||
warn!("Failed to send Slack message: {:?}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user