Add solana-watchtower program
This commit is contained in:
parent
dd54fff978
commit
2db28cae41
13
Cargo.lock
generated
13
Cargo.lock
generated
@ -4090,6 +4090,19 @@ dependencies = [
|
||||
"solana-sdk 0.22.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "solana-watchtower"
|
||||
version = "0.22.0"
|
||||
dependencies = [
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solana-clap-utils 0.22.0",
|
||||
"solana-client 0.22.0",
|
||||
"solana-logger 0.22.0",
|
||||
"solana-metrics 0.22.0",
|
||||
"solana-sdk 0.22.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "solana_libra_bytecode_verifier"
|
||||
version = "0.0.1-sol4"
|
||||
|
@ -49,6 +49,7 @@ members = [
|
||||
"vote-signer",
|
||||
"cli",
|
||||
"rayon-threadlimit",
|
||||
"watchtower",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
|
2
watchtower/.gitignore
vendored
Normal file
2
watchtower/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/target/
|
||||
/farf/
|
23
watchtower/Cargo.toml
Normal file
23
watchtower/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[package]
|
||||
authors = ["Solana Maintainers <maintainers@solana.com>"]
|
||||
edition = "2018"
|
||||
name = "solana-watchtower"
|
||||
description = "Blockchain, Rebuilt for Scale"
|
||||
version = "0.22.0"
|
||||
repository = "https://github.com/solana-labs/solana"
|
||||
license = "Apache-2.0"
|
||||
homepage = "https://solana.com/"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
log = "0.4.8"
|
||||
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
|
||||
solana-client = { path = "../client", version = "0.22.0" }
|
||||
solana-logger = { path = "../logger", version = "0.22.0" }
|
||||
solana-metrics = { path = "../metrics", version = "0.22.0" }
|
||||
solana-sdk = { path = "../sdk", version = "0.22.0" }
|
||||
|
||||
[[bin]]
|
||||
name = "solana-watchtower"
|
||||
path = "src/main.rs"
|
||||
|
16
watchtower/README.md
Normal file
16
watchtower/README.md
Normal file
@ -0,0 +1,16 @@
|
||||
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
||||
periodically polls the cluster over an RPC API to confirm that the transaction
|
||||
count is advancing, new blockhashes are available, and no validators are
|
||||
delinquent. Results are reported as InfluxDB metrics.
|
||||
|
||||
### Metrics
|
||||
#### `watchtower-sanity`
|
||||
On every iteration this data point will be emitted indicating the overall result
|
||||
using a boolean `ok` field.
|
||||
|
||||
#### `watchtower-sanity-failure`
|
||||
On failure this data point contains details about the specific test that failed via
|
||||
the following fields:
|
||||
* `test`: name of the sanity test that failed
|
||||
* `err`: exact sanity failure message
|
||||
|
116
watchtower/src/main.rs
Normal file
116
watchtower/src/main.rs
Normal file
@ -0,0 +1,116 @@
|
||||
//! A command-line executable for monitoring the health of a cluster
|
||||
|
||||
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||
use log::*;
|
||||
use solana_clap_utils::input_validators::is_url;
|
||||
use solana_client::rpc_client::RpcClient;
|
||||
use solana_metrics::{datapoint_error, datapoint_info};
|
||||
use std::{error, io, thread::sleep, time::Duration};
|
||||
|
||||
fn main() -> Result<(), Box<dyn error::Error>> {
|
||||
let matches = App::new(crate_name!())
|
||||
.about(crate_description!())
|
||||
.version(solana_clap_utils::version!())
|
||||
.arg(
|
||||
Arg::with_name("json_rpc_url")
|
||||
.long("url")
|
||||
.value_name("URL")
|
||||
.takes_value(true)
|
||||
.required(true)
|
||||
.validator(is_url)
|
||||
.help("JSON RPC URL for the cluster"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("interval")
|
||||
.long("interval")
|
||||
.value_name("SECONDS")
|
||||
.takes_value(true)
|
||||
.default_value("60")
|
||||
.help("Wait interval seconds between checking the cluster"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
||||
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
||||
|
||||
solana_logger::setup_with_filter("solana=info");
|
||||
solana_metrics::set_panic_hook("watchtower");
|
||||
|
||||
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
||||
|
||||
let mut last_transaction_count = 0;
|
||||
loop {
|
||||
let ok = rpc_client
|
||||
.get_transaction_count()
|
||||
.and_then(|transaction_count| {
|
||||
info!("Current transaction count: {}", transaction_count);
|
||||
|
||||
if transaction_count > last_transaction_count {
|
||||
last_transaction_count = transaction_count;
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"Transaction count is not advancing: {} <= {}",
|
||||
transaction_count, last_transaction_count
|
||||
),
|
||||
))
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "transaction-count", String),
|
||||
("err", err.to_string(), String)
|
||||
);
|
||||
false
|
||||
})
|
||||
&& rpc_client
|
||||
.get_recent_blockhash()
|
||||
.and_then(|(blockhash, _fee_calculator)| {
|
||||
info!("Current blockhash: {}", blockhash);
|
||||
rpc_client.get_new_blockhash(&blockhash)
|
||||
})
|
||||
.and_then(|(blockhash, _fee_calculator)| {
|
||||
info!("New blockhash: {}", blockhash);
|
||||
Ok(true)
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "blockhash", String),
|
||||
("err", err.to_string(), String)
|
||||
);
|
||||
false
|
||||
})
|
||||
&& rpc_client
|
||||
.get_vote_accounts()
|
||||
.and_then(|vote_accounts| {
|
||||
info!("Current validator count: {}", vote_accounts.current.len());
|
||||
info!(
|
||||
"Delinquent validator count: {}",
|
||||
vote_accounts.delinquent.len()
|
||||
);
|
||||
if vote_accounts.delinquent.is_empty() {
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{} delinquent validators", vote_accounts.delinquent.len()),
|
||||
))
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|err| {
|
||||
datapoint_error!(
|
||||
"watchtower-sanity-failure",
|
||||
("test", "delinquent-validators", String),
|
||||
("err", err.to_string(), String)
|
||||
);
|
||||
false
|
||||
});
|
||||
|
||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||
sleep(interval);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user