watchtower: Add Slack/Discord sanity failure notification (#7467)
automerge
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -4103,6 +4103,8 @@ version = "0.22.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"solana-clap-utils 0.22.0",
|
"solana-clap-utils 0.22.0",
|
||||||
"solana-client 0.22.0",
|
"solana-client 0.22.0",
|
||||||
"solana-logger 0.22.0",
|
"solana-logger 0.22.0",
|
||||||
|
@ -103,11 +103,12 @@ pub fn authorize(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_node(
|
pub fn update_node(
|
||||||
vote_pubkey: &Pubkey, // vote account
|
vote_pubkey: &Pubkey,
|
||||||
authorized_pubkey: &Pubkey, // currently authorized
|
authorized_voter_pubkey: &Pubkey,
|
||||||
node_pubkey: &Pubkey,
|
node_pubkey: &Pubkey,
|
||||||
) -> Instruction {
|
) -> Instruction {
|
||||||
let account_metas = vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_pubkey);
|
let account_metas =
|
||||||
|
vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_voter_pubkey);
|
||||||
|
|
||||||
Instruction::new(
|
Instruction::new(
|
||||||
id(),
|
id(),
|
||||||
|
@ -11,6 +11,8 @@ homepage = "https://solana.com/"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
clap = "2.33.0"
|
clap = "2.33.0"
|
||||||
log = "0.4.8"
|
log = "0.4.8"
|
||||||
|
reqwest = { version = "0.9.24", default-features = false, features = ["rustls-tls"] }
|
||||||
|
serde_json = "1.0"
|
||||||
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
|
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
|
||||||
solana-client = { path = "../client", version = "0.22.0" }
|
solana-client = { path = "../client", version = "0.22.0" }
|
||||||
solana-logger = { path = "../logger", version = "0.22.0" }
|
solana-logger = { path = "../logger", version = "0.22.0" }
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
||||||
periodically polls the cluster over an RPC API to confirm that the transaction
|
periodically polls the cluster over an RPC API to confirm that the transaction
|
||||||
count is advancing, new blockhashes are available, and no validators are
|
count is advancing, new blockhashes are available, and no validators are
|
||||||
delinquent. Results are reported as InfluxDB metrics.
|
delinquent. Results are reported as InfluxDB metrics, with an optional
|
||||||
|
Slack/Discord push notification on sanity failure.
|
||||||
|
|
||||||
### Metrics
|
### Metrics
|
||||||
#### `watchtower-sanity`
|
#### `watchtower-sanity`
|
||||||
@ -14,3 +15,11 @@ the following fields:
|
|||||||
* `test`: name of the sanity test that failed
|
* `test`: name of the sanity test that failed
|
||||||
* `err`: exact sanity failure message
|
* `err`: exact sanity failure message
|
||||||
|
|
||||||
|
|
||||||
|
### Sanity failure push notification
|
||||||
|
To receive a Slack and/or Discord notification on sanity failure, define one or
|
||||||
|
both of these environment variables before running `solana-watchtower`:
|
||||||
|
```
|
||||||
|
export SLACK_WEBHOOK=...
|
||||||
|
export DISCORD_WEBHOOK=...
|
||||||
|
```
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
//! A command-line executable for monitoring the health of a cluster
|
//! A command-line executable for monitoring the health of a cluster
|
||||||
|
|
||||||
|
mod notifier;
|
||||||
|
|
||||||
|
use crate::notifier::Notifier;
|
||||||
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||||
use log::*;
|
use log::*;
|
||||||
use solana_clap_utils::input_validators::is_url;
|
use solana_clap_utils::input_validators::is_url;
|
||||||
@ -38,6 +41,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
|
|
||||||
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
||||||
|
|
||||||
|
let notifier = Notifier::new();
|
||||||
let mut last_transaction_count = 0;
|
let mut last_transaction_count = 0;
|
||||||
loop {
|
loop {
|
||||||
let ok = rpc_client
|
let ok = rpc_client
|
||||||
@ -111,6 +115,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||||
|
if !ok {
|
||||||
|
notifier.send("solana-watchtower sanity failure");
|
||||||
|
}
|
||||||
sleep(interval);
|
sleep(interval);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
46
watchtower/src/notifier.rs
Normal file
46
watchtower/src/notifier.rs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
use log::*;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde_json::json;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
pub struct Notifier {
|
||||||
|
client: Client,
|
||||||
|
discord_webhook: Option<String>,
|
||||||
|
slack_webhook: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Notifier {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let discord_webhook = env::var("DISCORD_WEBHOOK")
|
||||||
|
.map_err(|_| {
|
||||||
|
warn!("Discord notifications disabled");
|
||||||
|
})
|
||||||
|
.ok();
|
||||||
|
let slack_webhook = env::var("SLACK_WEBHOOK")
|
||||||
|
.map_err(|_| {
|
||||||
|
warn!("Slack notifications disabled");
|
||||||
|
})
|
||||||
|
.ok();
|
||||||
|
Notifier {
|
||||||
|
client: Client::new(),
|
||||||
|
discord_webhook,
|
||||||
|
slack_webhook,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn send(&self, msg: &str) {
|
||||||
|
if let Some(webhook) = &self.discord_webhook {
|
||||||
|
let data = json!({ "content": msg });
|
||||||
|
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||||
|
warn!("Failed to send Discord message: {:?}", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(webhook) = &self.slack_webhook {
|
||||||
|
let data = json!({ "text": msg });
|
||||||
|
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||||
|
warn!("Failed to send Slack message: {:?}", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user