Add solana-watchtower program
This commit is contained in:
		
							
								
								
									
										13
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										13
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -4090,6 +4090,19 @@ dependencies = [ | ||||
|  "solana-sdk 0.22.0", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "solana-watchtower" | ||||
| version = "0.22.0" | ||||
| dependencies = [ | ||||
|  "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
|  "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
|  "solana-clap-utils 0.22.0", | ||||
|  "solana-client 0.22.0", | ||||
|  "solana-logger 0.22.0", | ||||
|  "solana-metrics 0.22.0", | ||||
|  "solana-sdk 0.22.0", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "solana_libra_bytecode_verifier" | ||||
| version = "0.0.1-sol4" | ||||
|   | ||||
| @@ -49,6 +49,7 @@ members = [ | ||||
|     "vote-signer", | ||||
|     "cli", | ||||
|     "rayon-threadlimit", | ||||
|     "watchtower", | ||||
| ] | ||||
|  | ||||
| exclude = [ | ||||
|   | ||||
							
								
								
									
										2
									
								
								watchtower/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								watchtower/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| /target/ | ||||
| /farf/ | ||||
							
								
								
									
										23
									
								
								watchtower/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								watchtower/Cargo.toml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| [package] | ||||
| authors = ["Solana Maintainers <maintainers@solana.com>"] | ||||
| edition = "2018" | ||||
| name = "solana-watchtower" | ||||
| description = "Blockchain, Rebuilt for Scale" | ||||
| version = "0.22.0" | ||||
| repository = "https://github.com/solana-labs/solana" | ||||
| license = "Apache-2.0" | ||||
| homepage = "https://solana.com/" | ||||
|  | ||||
| [dependencies] | ||||
| clap = "2.33.0" | ||||
| log = "0.4.8" | ||||
| solana-clap-utils = { path = "../clap-utils", version = "0.22.0" } | ||||
| solana-client = { path = "../client", version = "0.22.0" } | ||||
| solana-logger = { path = "../logger", version = "0.22.0" } | ||||
| solana-metrics = { path = "../metrics", version = "0.22.0" } | ||||
| solana-sdk = { path = "../sdk", version = "0.22.0" } | ||||
|  | ||||
| [[bin]] | ||||
| name = "solana-watchtower" | ||||
| path = "src/main.rs" | ||||
|  | ||||
							
								
								
									
										16
									
								
								watchtower/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								watchtower/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| The `solana-watchtower` program is used to monitor the health of a cluster.  It | ||||
| periodically polls the cluster over an RPC API to confirm that the transaction | ||||
| count is advancing, new blockhashes are available, and no validators are | ||||
| delinquent.  Results are reported as InfluxDB metrics. | ||||
|  | ||||
| ### Metrics | ||||
| #### `watchtower-sanity` | ||||
| On every iteration this data point will be emitted indicating the overall result | ||||
| using a boolean `ok` field. | ||||
|  | ||||
| #### `watchtower-sanity-failure` | ||||
| On failure this data point contains details about the specific test that failed via | ||||
| the following fields: | ||||
| * `test`: name of the sanity test that failed | ||||
| * `err`: exact sanity failure message | ||||
|  | ||||
							
								
								
									
										116
									
								
								watchtower/src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								watchtower/src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| //! A command-line executable for monitoring the health of a cluster | ||||
|  | ||||
| use clap::{crate_description, crate_name, value_t_or_exit, App, Arg}; | ||||
| use log::*; | ||||
| use solana_clap_utils::input_validators::is_url; | ||||
| use solana_client::rpc_client::RpcClient; | ||||
| use solana_metrics::{datapoint_error, datapoint_info}; | ||||
| use std::{error, io, thread::sleep, time::Duration}; | ||||
|  | ||||
| fn main() -> Result<(), Box<dyn error::Error>> { | ||||
|     let matches = App::new(crate_name!()) | ||||
|         .about(crate_description!()) | ||||
|         .version(solana_clap_utils::version!()) | ||||
|         .arg( | ||||
|             Arg::with_name("json_rpc_url") | ||||
|                 .long("url") | ||||
|                 .value_name("URL") | ||||
|                 .takes_value(true) | ||||
|                 .required(true) | ||||
|                 .validator(is_url) | ||||
|                 .help("JSON RPC URL for the cluster"), | ||||
|         ) | ||||
|         .arg( | ||||
|             Arg::with_name("interval") | ||||
|                 .long("interval") | ||||
|                 .value_name("SECONDS") | ||||
|                 .takes_value(true) | ||||
|                 .default_value("60") | ||||
|                 .help("Wait interval seconds between checking the cluster"), | ||||
|         ) | ||||
|         .get_matches(); | ||||
|  | ||||
|     let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64)); | ||||
|     let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String); | ||||
|  | ||||
|     solana_logger::setup_with_filter("solana=info"); | ||||
|     solana_metrics::set_panic_hook("watchtower"); | ||||
|  | ||||
|     let rpc_client = RpcClient::new(json_rpc_url.to_string()); | ||||
|  | ||||
|     let mut last_transaction_count = 0; | ||||
|     loop { | ||||
|         let ok = rpc_client | ||||
|             .get_transaction_count() | ||||
|             .and_then(|transaction_count| { | ||||
|                 info!("Current transaction count: {}", transaction_count); | ||||
|  | ||||
|                 if transaction_count > last_transaction_count { | ||||
|                     last_transaction_count = transaction_count; | ||||
|                     Ok(true) | ||||
|                 } else { | ||||
|                     Err(io::Error::new( | ||||
|                         io::ErrorKind::Other, | ||||
|                         format!( | ||||
|                             "Transaction count is not advancing: {} <= {}", | ||||
|                             transaction_count, last_transaction_count | ||||
|                         ), | ||||
|                     )) | ||||
|                 } | ||||
|             }) | ||||
|             .unwrap_or_else(|err| { | ||||
|                 datapoint_error!( | ||||
|                     "watchtower-sanity-failure", | ||||
|                     ("test", "transaction-count", String), | ||||
|                     ("err", err.to_string(), String) | ||||
|                 ); | ||||
|                 false | ||||
|             }) | ||||
|             && rpc_client | ||||
|                 .get_recent_blockhash() | ||||
|                 .and_then(|(blockhash, _fee_calculator)| { | ||||
|                     info!("Current blockhash: {}", blockhash); | ||||
|                     rpc_client.get_new_blockhash(&blockhash) | ||||
|                 }) | ||||
|                 .and_then(|(blockhash, _fee_calculator)| { | ||||
|                     info!("New blockhash: {}", blockhash); | ||||
|                     Ok(true) | ||||
|                 }) | ||||
|                 .unwrap_or_else(|err| { | ||||
|                     datapoint_error!( | ||||
|                         "watchtower-sanity-failure", | ||||
|                         ("test", "blockhash", String), | ||||
|                         ("err", err.to_string(), String) | ||||
|                     ); | ||||
|                     false | ||||
|                 }) | ||||
|             && rpc_client | ||||
|                 .get_vote_accounts() | ||||
|                 .and_then(|vote_accounts| { | ||||
|                     info!("Current validator count: {}", vote_accounts.current.len()); | ||||
|                     info!( | ||||
|                         "Delinquent validator count: {}", | ||||
|                         vote_accounts.delinquent.len() | ||||
|                     ); | ||||
|                     if vote_accounts.delinquent.is_empty() { | ||||
|                         Ok(true) | ||||
|                     } else { | ||||
|                         Err(io::Error::new( | ||||
|                             io::ErrorKind::Other, | ||||
|                             format!("{} delinquent validators", vote_accounts.delinquent.len()), | ||||
|                         )) | ||||
|                     } | ||||
|                 }) | ||||
|                 .unwrap_or_else(|err| { | ||||
|                     datapoint_error!( | ||||
|                         "watchtower-sanity-failure", | ||||
|                         ("test", "delinquent-validators", String), | ||||
|                         ("err", err.to_string(), String) | ||||
|                     ); | ||||
|                     false | ||||
|                 }); | ||||
|  | ||||
|         datapoint_info!("watchtower-sanity", ("ok", ok, bool)); | ||||
|         sleep(interval); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user