diff --git a/Cargo.lock b/Cargo.lock index f14227640b..2b6e8899e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1308,6 +1308,21 @@ dependencies = [ "termcolor", ] +[[package]] +name = "etcd-client" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d1f66c65d1b777fc92a5b57a32c35dcb28b644a8c2c5fbc363cc90e8b99e60" +dependencies = [ + "http", + "prost", + "tokio 1.9.0", + "tokio-stream", + "tonic", + "tonic-build", + "tower-service", +] + [[package]] name = "failure" version = "0.1.8" @@ -1833,11 +1848,11 @@ dependencies = [ [[package]] name = "http" -version = "0.2.1" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d569972648b2c512421b5f2a405ad6ac9666547189d0c5477a3f200f3e02f9" +checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11" dependencies = [ - "bytes 0.5.4", + "bytes 1.0.1", "fnv", "itoa", ] @@ -4677,6 +4692,7 @@ dependencies = [ "crossbeam-channel", "dashmap", "ed25519-dalek", + "etcd-client", "flate2", "fs_extra", "indexmap", @@ -6785,9 +6801,9 @@ checksum = "343bc9466d3fe6b0f960ef45960509f84480bf4fd96f92901afe7ff3df9d3a62" [[package]] name = "tower-service" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860" +checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" diff --git a/core/Cargo.toml b/core/Cargo.toml index 96a1a88f0e..495435c580 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -25,6 +25,7 @@ chrono = { version = "0.4.11", features = ["serde"] } crossbeam-channel = "0.5" dashmap = { version = "4.0.2", features = ["rayon", "raw-api"] } ed25519-dalek = "=1.0.1" +etcd-client = { version = "0.7.1", features = ["tls"]} fs_extra = "1.2.0" flate2 = "1.0" indexmap = { version = "1.7", features = ["rayon"] } @@ -71,6 +72,7 @@ solana-vote-program = { path = "../programs/vote", version = "=1.8.0" } tempfile = "3.2.0" thiserror = "1.0" solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.8.0" } +tokio = { version = "1", features = ["full"] } trees = "0.4.2" [dev-dependencies] @@ -86,7 +88,6 @@ solana-version = { path = "../version", version = "=1.8.0" } static_assertions = "1.1.0" symlink = "0.1.0" systemstat = "0.1.8" -tokio = { version = "1", features = ["full"] } [build-dependencies] rustc_version = "0.4" diff --git a/core/src/tower_storage.rs b/core/src/tower_storage.rs index f2d748125b..d3779ea1a8 100644 --- a/core/src/tower_storage.rs +++ b/core/src/tower_storage.rs @@ -8,6 +8,7 @@ use { fs::{self, File}, io::{self, BufReader}, path::PathBuf, + sync::RwLock, }, }; @@ -127,3 +128,157 @@ impl TowerStorage for FileTowerStorage { Ok(()) } } + +pub struct EtcdTowerStorage { + client: RwLock, + instance_id: [u8; 8], + runtime: tokio::runtime::Runtime, +} + +pub struct EtcdTlsConfig { + pub domain_name: String, + pub ca_certificate: Vec, + pub identity_certificate: Vec, + pub identity_private_key: Vec, +} + +impl EtcdTowerStorage { + pub fn new, S: AsRef<[E]>>( + endpoints: S, + tls_config: Option, + ) -> Result { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_io() + .enable_time() + .build() + .unwrap(); + + let client = runtime + .block_on(async { + etcd_client::Client::connect( + endpoints, + tls_config.map(|tls_config| { + etcd_client::ConnectOptions::default().with_tls( + etcd_client::TlsOptions::new() + .domain_name(tls_config.domain_name) + .ca_certificate(etcd_client::Certificate::from_pem( + tls_config.ca_certificate, + )) + .identity(etcd_client::Identity::from_pem( + tls_config.identity_certificate, + tls_config.identity_private_key, + )), + ) + }), + ) + .await + }) + .map_err(Self::etdc_to_tower_error)?; + + Ok(Self { + client: RwLock::new(client), + instance_id: solana_sdk::timing::timestamp().to_le_bytes(), + runtime, + }) + } + + fn get_keys(node_pubkey: &Pubkey) -> (String, String) { + let instance_key = format!("{}/instance", node_pubkey); + let tower_key = format!("{}/tower", node_pubkey); + (instance_key, tower_key) + } + + fn etdc_to_tower_error(error: etcd_client::Error) -> TowerError { + TowerError::IoError(io::Error::new(io::ErrorKind::Other, error.to_string())) + } +} + +impl TowerStorage for EtcdTowerStorage { + fn load(&self, node_pubkey: &Pubkey) -> Result { + let (instance_key, tower_key) = Self::get_keys(node_pubkey); + let mut client = self.client.write().unwrap(); + + let txn = etcd_client::Txn::new().and_then(vec![etcd_client::TxnOp::put( + instance_key.clone(), + self.instance_id, + None, + )]); + self.runtime + .block_on(async { client.txn(txn).await }) + .map_err(|err| { + error!("Failed to acquire etcd instance lock: {}", err); + Self::etdc_to_tower_error(err) + })?; + + let txn = etcd_client::Txn::new() + .when(vec![etcd_client::Compare::value( + instance_key, + etcd_client::CompareOp::Equal, + self.instance_id, + )]) + .and_then(vec![etcd_client::TxnOp::get(tower_key, None)]); + + let response = self + .runtime + .block_on(async { client.txn(txn).await }) + .map_err(|err| { + error!("Failed to read etcd saved tower: {}", err); + Self::etdc_to_tower_error(err) + })?; + + if !response.succeeded() { + return Err(TowerError::IoError(io::Error::new( + io::ErrorKind::Other, + format!("Lost etcd instance lock for {}", node_pubkey), + ))); + } + + for op_response in response.op_responses() { + if let etcd_client::TxnOpResponse::Get(get_response) = op_response { + if let Some(kv) = get_response.kvs().get(0) { + return bincode::deserialize_from(kv.value()).map_err(|e| e.into()); + } + } + } + + // Should never happen... + Err(TowerError::IoError(io::Error::new( + io::ErrorKind::Other, + "Saved tower response missing".to_string(), + ))) + } + + fn store(&self, saved_tower: &SavedTower) -> Result<()> { + let (instance_key, tower_key) = Self::get_keys(&saved_tower.node_pubkey); + let mut client = self.client.write().unwrap(); + + let txn = etcd_client::Txn::new() + .when(vec![etcd_client::Compare::value( + instance_key, + etcd_client::CompareOp::Equal, + self.instance_id, + )]) + .and_then(vec![etcd_client::TxnOp::put( + tower_key, + bincode::serialize(saved_tower)?, + None, + )]); + + let response = self + .runtime + .block_on(async { client.txn(txn).await }) + .map_err(|err| { + error!("Failed to write etcd saved tower: {}", err); + err + }) + .map_err(Self::etdc_to_tower_error)?; + + if !response.succeeded() { + return Err(TowerError::IoError(io::Error::new( + io::ErrorKind::Other, + format!("Lost etcd instance lock for {}", saved_tower.node_pubkey), + ))); + } + Ok(()) + } +} diff --git a/docs/sidebars.js b/docs/sidebars.js index 0106f9e937..cbdfbdd43e 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -101,6 +101,7 @@ module.exports = { "running-validator/validator-stake", "running-validator/validator-monitor", "running-validator/validator-info", + "running-validator/validator-failover", "running-validator/validator-troubleshoot", ], Clusters: [ diff --git a/docs/src/running-validator/validator-failover.md b/docs/src/running-validator/validator-failover.md new file mode 100644 index 0000000000..d6dfa2047b --- /dev/null +++ b/docs/src/running-validator/validator-failover.md @@ -0,0 +1,146 @@ +--- +title: Failover Setup +--- + +A simple two machine instance failover method is described here, which allows you to: +* upgrade your validator software with virtually no down time, and +* failover to the secondary instance when your monitoring detects a problem with + the primary instance +without any safety issues that would otherwise be associated with running two +instances of your validator. + +You will need two validator-class machines for your primary and secondary +validator. A third machine for running an [etcd](https://etcd.io/) cluster, +which is used to store the tower voting record for your validator. + +## Setup + +### etcd cluster setup + +There is ample documentation regarding etcd setup and configuration at +https://etcd.io/, please generally familiarize yourself with etcd before +continuing. + +It's recommended that etcd be installed on a separate machine from your primary +and secondary validator machines. This machine must be highly available, and +depending on your needs you may wish to configure etcd with more than just +one node. + +First install `etcd` as desired for your machine. Then TLS certificates must be +created for authentication between the etcd cluster and your validator. Here is +one way to do this: + +With [Golang](https://golang.org/) installed, run `go get +github.com/cloudflare/cfssl/cmd/cfssl`. The `cfssl` program should now be +available at `~/go/bin/cfssl`. Ensure `~/go/bin` is in your PATH by running +`PATH=$PATH:~/go/bin/`. + +Now create a certificate directory and configuration file: +``` +mkdir -p certs/ +echo '{"CN":"etcd","hosts":["localhost", "127.0.0.1"],"key":{"algo":"rsa","size":2048}}' > certs/config.json +``` + +then create certificates for the etcd server and the validator: +``` +cfssl gencert -initca certs/config.json | cfssljson -bare certs/etcd-ca +cfssl gencert -ca certs/etcd-ca.pem -ca-key certs/etcd-ca-key.pem certs/config.json | cfssljson -bare certs/validator +cfssl gencert -ca certs/etcd-ca.pem -ca-key certs/etcd-ca-key.pem certs/config.json | cfssljson -bare certs/etcd +``` + +Copy these files to your primary and secondary validator machines: +* `certs/validator-key.pem` +* `certs/validator.pem` +* `certs/etcd-ca.pem` + +and these files to the machine running the etcd server: +* `certs/etcd.pem` +* `certs/etcd-key.pem` +* `certs/etcd-ca.pem` + +With this configuration, both the validator and etdc will share the same +TLS certificate authority and will each authenticate the other with it. + + +Start `etcd` with the following arguments: +```bash +etcd --auto-compaction-retention 2 --auto-compaction-mode revision \ + --cert-file=certs/etcd.pem --key-file=certs/etcd-key.pem \ + --client-cert-auth \ + --trusted-ca-file=certs/etcd-ca.pem \ + --listen-client-urls=https://127.0.0.1:2379 \ + --advertise-client-urls=https://127.0.0.1:2379 +``` + +and use `curl` to confirm the etcd TLS certificates are properly configured: +```bash +curl --cacert certs/etcd-ca.pem https://127.0.0.1:2379/ --cert certs/validator.pem --key certs/validator-key.pem +``` +On success, curl will return a 404 response. + +For more information on etcd TLS setup, please refer to +https://etcd.io/docs/v3.5/op-guide/security/#example-2-client-to-server-authentication-with-https-client-certificates + +### Primary Validator +The following additional `solana-validator` parameters are required to enable +tower storage into etcd: + +``` +solana-validator ... \ + --tower-storage etcd \ + --etcd-cacert-file certs/etcd-ca.pem \ + --etcd-cert-file certs/validator.pem \ + --etcd-key-file certs/validator-key.pem \ + --etcd-endpoint 127.0.0.1:2379 # <-- replace 127.0.0.1 with the actual IP address +``` + +Note that once running your validator *will terminate* if it's not able to write +its tower into etcd before submitting a vote transactioin, so it's essential +that your etcd endpoint remain accessible at all times. + +### Secondary Validator +Configure the secondary validator like the primary with the exception of the +following `solana-validator` command-line argument changes: +* Generate and use a secondary validator identity: `--identity secondary-validator-keypair.json` +* Add `--no-check-vote-account` +* Add `--authorized-voter validator-keypair.json` (where + `validator-keypair.json` is the identity keypair for your primary validator) + +## Triggering a failover manually +When both validators are running normally and caught up to the cluster, a +failover from primary to secondary can be triggered by running the following +command on the secondary validator: +```bash +$ solana-validator wait-for-restart-window --identity validator-keypair.json \ + && solana-validator set-identity validator-keypair.json +``` + +The secondary validator will acquire a lock on the tower in etcd to ensure +voting and block production safely switches over from the primary validator. + +The primary validator will then terminate as soon as it detects the secondary +validator using its identity. + +Note: When the primary validator restarts (which may be immediate if you have +configured your primary validator to do so) it will reclaim its identity +from the secondary validator. This will in turn cause the secondary validator to +exit. However if/when the secondary validator restarts, it will do so using the +secondary validator identity and thus the restart cycle is broken. + +## Triggering a failover via monitoring +Monitoring of your choosing can invoke the `solana-validator set-identity +validator-keypair.json` command mentioned in the previous section. + +It is not necessary to guarantee the primary validator has halted before failing +over to the secondary, as the failover process will prevent the primary +validator from voting and producing blocks even if it is in an unknown state. + +## Validator Software Upgrades +To perform a software upgrade using this failover method: +1. Install the new software version on your primary validator system but do not + restart it yet. +2. Trigger a manual failover to your secondary validator. This should cause your + primary validator to terminate. +3. When your primary validator restarts it will now be using the new software version. +4. Once the primary validator catches up upgrade the secondary validator at + your convenience. diff --git a/validator/src/main.rs b/validator/src/main.rs index 632051b825..10b16b6522 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -21,7 +21,7 @@ use { }, solana_core::{ ledger_cleanup_service::{DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS}, - tower_storage::FileTowerStorage, + tower_storage, tpu::DEFAULT_TPU_COALESCE_MS, validator::{ is_snapshot_config_invalid, Validator, ValidatorConfig, ValidatorStartProgress, @@ -1299,7 +1299,58 @@ pub fn main() { .long("tower") .value_name("DIR") .takes_value(true) - .help("Use DIR as tower location [default: --ledger value]"), + .help("Use DIR as file tower storage location [default: --ledger value]"), + ) + .arg( + Arg::with_name("tower_storage") + .long("tower-storage") + .possible_values(&["file", "etcd"]) + .default_value("file") + .takes_value(true) + .help("Where to store the tower"), + ) + .arg( + Arg::with_name("etcd_endpoint") + .long("etcd-endpoint") + .required_if("tower_storage", "etcd") + .value_name("HOST:PORT") + .takes_value(true) + .multiple(true) + .validator(solana_net_utils::is_host_port) + .help("etcd gRPC endpoint to connect with") + ) + .arg( + Arg::with_name("etcd_domain_name") + .long("etcd-domain-name") + .required_if("tower_storage", "etcd") + .value_name("DOMAIN") + .default_value("localhost") + .takes_value(true) + .help("domain name against which to verify the etcd server’s TLS certificate") + ) + .arg( + Arg::with_name("etcd_cacert_file") + .long("etcd-cacert-file") + .required_if("tower_storage", "etcd") + .value_name("FILE") + .takes_value(true) + .help("verify the TLS certificate of the etcd endpoint using this CA bundle") + ) + .arg( + Arg::with_name("etcd_key_file") + .long("etcd-key-file") + .required_if("tower_storage", "etcd") + .value_name("FILE") + .takes_value(true) + .help("TLS key file to use when establishing a connection to the etcd endpoint") + ) + .arg( + Arg::with_name("etcd_cert_file") + .long("etcd-cert-file") + .required_if("tower_storage", "etcd") + .value_name("FILE") + .takes_value(true) + .help("TLS certificate to use when establishing a connection to the etcd endpoint") ) .arg( Arg::with_name("gossip_port") @@ -1316,7 +1367,6 @@ pub fn main() { .validator(solana_net_utils::is_host) .help("Gossip DNS name or IP address for the validator to advertise in gossip \ [default: ask --entrypoint, or 127.0.0.1 when --entrypoint is not provided]"), - ) .arg( Arg::with_name("public_rpc_addr") @@ -2296,13 +2346,50 @@ pub fn main() { .ok() .or_else(|| get_cluster_shred_version(&entrypoint_addrs)); - let tower_path = value_t!(matches, "tower", PathBuf) - .ok() - .unwrap_or_else(|| ledger_path.clone()); + let tower_storage: Arc = + match value_t_or_exit!(matches, "tower_storage", String).as_str() { + "file" => { + let tower_path = value_t!(matches, "tower", PathBuf) + .ok() + .unwrap_or_else(|| ledger_path.clone()); + + Arc::new(tower_storage::FileTowerStorage::new(tower_path)) + } + "etcd" => { + let endpoints = values_t_or_exit!(matches, "etcd_endpoint", String); + let domain_name = value_t_or_exit!(matches, "etcd_domain_name", String); + let ca_certificate_file = value_t_or_exit!(matches, "etcd_cacert_file", String); + let identity_certificate_file = value_t_or_exit!(matches, "etcd_cert_file", String); + let identity_private_key_file = value_t_or_exit!(matches, "etcd_key_file", String); + + let read = |file| { + fs::read(&file).unwrap_or_else(|err| { + eprintln!("Unable to read {}: {}", file, err); + exit(1) + }) + }; + + let tls_config = tower_storage::EtcdTlsConfig { + domain_name, + ca_certificate: read(ca_certificate_file), + identity_certificate: read(identity_certificate_file), + identity_private_key: read(identity_private_key_file), + }; + + Arc::new( + tower_storage::EtcdTowerStorage::new(endpoints, Some(tls_config)) + .unwrap_or_else(|err| { + eprintln!("Failed to connect to etcd: {}", err); + exit(1); + }), + ) + } + _ => unreachable!(), + }; let mut validator_config = ValidatorConfig { require_tower: matches.is_present("require_tower"), - tower_storage: Arc::new(FileTowerStorage::new(tower_path)), + tower_storage, dev_halt_at_slot: value_t!(matches, "dev_halt_at_slot", Slot).ok(), expected_genesis_hash: matches .value_of("expected_genesis_hash")