Add solana-watchtower program
This commit is contained in:
parent
dd54fff978
commit
2db28cae41
13
Cargo.lock
generated
13
Cargo.lock
generated
@ -4090,6 +4090,19 @@ dependencies = [
|
|||||||
"solana-sdk 0.22.0",
|
"solana-sdk 0.22.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "solana-watchtower"
|
||||||
|
version = "0.22.0"
|
||||||
|
dependencies = [
|
||||||
|
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"solana-clap-utils 0.22.0",
|
||||||
|
"solana-client 0.22.0",
|
||||||
|
"solana-logger 0.22.0",
|
||||||
|
"solana-metrics 0.22.0",
|
||||||
|
"solana-sdk 0.22.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "solana_libra_bytecode_verifier"
|
name = "solana_libra_bytecode_verifier"
|
||||||
version = "0.0.1-sol4"
|
version = "0.0.1-sol4"
|
||||||
|
@ -49,6 +49,7 @@ members = [
|
|||||||
"vote-signer",
|
"vote-signer",
|
||||||
"cli",
|
"cli",
|
||||||
"rayon-threadlimit",
|
"rayon-threadlimit",
|
||||||
|
"watchtower",
|
||||||
]
|
]
|
||||||
|
|
||||||
exclude = [
|
exclude = [
|
||||||
|
2
watchtower/.gitignore
vendored
Normal file
2
watchtower/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/target/
|
||||||
|
/farf/
|
23
watchtower/Cargo.toml
Normal file
23
watchtower/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[package]
|
||||||
|
authors = ["Solana Maintainers <maintainers@solana.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
name = "solana-watchtower"
|
||||||
|
description = "Blockchain, Rebuilt for Scale"
|
||||||
|
version = "0.22.0"
|
||||||
|
repository = "https://github.com/solana-labs/solana"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
homepage = "https://solana.com/"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = "2.33.0"
|
||||||
|
log = "0.4.8"
|
||||||
|
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
|
||||||
|
solana-client = { path = "../client", version = "0.22.0" }
|
||||||
|
solana-logger = { path = "../logger", version = "0.22.0" }
|
||||||
|
solana-metrics = { path = "../metrics", version = "0.22.0" }
|
||||||
|
solana-sdk = { path = "../sdk", version = "0.22.0" }
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "solana-watchtower"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
16
watchtower/README.md
Normal file
16
watchtower/README.md
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
||||||
|
periodically polls the cluster over an RPC API to confirm that the transaction
|
||||||
|
count is advancing, new blockhashes are available, and no validators are
|
||||||
|
delinquent. Results are reported as InfluxDB metrics.
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
#### `watchtower-sanity`
|
||||||
|
On every iteration this data point will be emitted indicating the overall result
|
||||||
|
using a boolean `ok` field.
|
||||||
|
|
||||||
|
#### `watchtower-sanity-failure`
|
||||||
|
On failure this data point contains details about the specific test that failed via
|
||||||
|
the following fields:
|
||||||
|
* `test`: name of the sanity test that failed
|
||||||
|
* `err`: exact sanity failure message
|
||||||
|
|
116
watchtower/src/main.rs
Normal file
116
watchtower/src/main.rs
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
//! A command-line executable for monitoring the health of a cluster
|
||||||
|
|
||||||
|
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||||
|
use log::*;
|
||||||
|
use solana_clap_utils::input_validators::is_url;
|
||||||
|
use solana_client::rpc_client::RpcClient;
|
||||||
|
use solana_metrics::{datapoint_error, datapoint_info};
|
||||||
|
use std::{error, io, thread::sleep, time::Duration};
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
|
let matches = App::new(crate_name!())
|
||||||
|
.about(crate_description!())
|
||||||
|
.version(solana_clap_utils::version!())
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("json_rpc_url")
|
||||||
|
.long("url")
|
||||||
|
.value_name("URL")
|
||||||
|
.takes_value(true)
|
||||||
|
.required(true)
|
||||||
|
.validator(is_url)
|
||||||
|
.help("JSON RPC URL for the cluster"),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("interval")
|
||||||
|
.long("interval")
|
||||||
|
.value_name("SECONDS")
|
||||||
|
.takes_value(true)
|
||||||
|
.default_value("60")
|
||||||
|
.help("Wait interval seconds between checking the cluster"),
|
||||||
|
)
|
||||||
|
.get_matches();
|
||||||
|
|
||||||
|
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
||||||
|
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
||||||
|
|
||||||
|
solana_logger::setup_with_filter("solana=info");
|
||||||
|
solana_metrics::set_panic_hook("watchtower");
|
||||||
|
|
||||||
|
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
||||||
|
|
||||||
|
let mut last_transaction_count = 0;
|
||||||
|
loop {
|
||||||
|
let ok = rpc_client
|
||||||
|
.get_transaction_count()
|
||||||
|
.and_then(|transaction_count| {
|
||||||
|
info!("Current transaction count: {}", transaction_count);
|
||||||
|
|
||||||
|
if transaction_count > last_transaction_count {
|
||||||
|
last_transaction_count = transaction_count;
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!(
|
||||||
|
"Transaction count is not advancing: {} <= {}",
|
||||||
|
transaction_count, last_transaction_count
|
||||||
|
),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
datapoint_error!(
|
||||||
|
"watchtower-sanity-failure",
|
||||||
|
("test", "transaction-count", String),
|
||||||
|
("err", err.to_string(), String)
|
||||||
|
);
|
||||||
|
false
|
||||||
|
})
|
||||||
|
&& rpc_client
|
||||||
|
.get_recent_blockhash()
|
||||||
|
.and_then(|(blockhash, _fee_calculator)| {
|
||||||
|
info!("Current blockhash: {}", blockhash);
|
||||||
|
rpc_client.get_new_blockhash(&blockhash)
|
||||||
|
})
|
||||||
|
.and_then(|(blockhash, _fee_calculator)| {
|
||||||
|
info!("New blockhash: {}", blockhash);
|
||||||
|
Ok(true)
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
datapoint_error!(
|
||||||
|
"watchtower-sanity-failure",
|
||||||
|
("test", "blockhash", String),
|
||||||
|
("err", err.to_string(), String)
|
||||||
|
);
|
||||||
|
false
|
||||||
|
})
|
||||||
|
&& rpc_client
|
||||||
|
.get_vote_accounts()
|
||||||
|
.and_then(|vote_accounts| {
|
||||||
|
info!("Current validator count: {}", vote_accounts.current.len());
|
||||||
|
info!(
|
||||||
|
"Delinquent validator count: {}",
|
||||||
|
vote_accounts.delinquent.len()
|
||||||
|
);
|
||||||
|
if vote_accounts.delinquent.is_empty() {
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!("{} delinquent validators", vote_accounts.delinquent.len()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
datapoint_error!(
|
||||||
|
"watchtower-sanity-failure",
|
||||||
|
("test", "delinquent-validators", String),
|
||||||
|
("err", err.to_string(), String)
|
||||||
|
);
|
||||||
|
false
|
||||||
|
});
|
||||||
|
|
||||||
|
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||||
|
sleep(interval);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user