From 7607800d47116bcaaa10ce882948123a9e0260d6 Mon Sep 17 00:00:00 2001 From: carllin Date: Sun, 8 Sep 2019 17:53:34 -0700 Subject: [PATCH] Refactor restart function in local cluster to support separate exit and restart functions (#5845) --- core/src/cluster.rs | 9 --- core/src/lib.rs | 1 - local_cluster/src/cluster.rs | 37 +++++++++++ local_cluster/src/lib.rs | 1 + local_cluster/src/local_cluster.rs | 70 +++++++++------------ local_cluster/tests/local_cluster.rs | 93 +++++++++++++++++++++++++++- 6 files changed, 158 insertions(+), 53 deletions(-) delete mode 100644 core/src/cluster.rs create mode 100644 local_cluster/src/cluster.rs diff --git a/core/src/cluster.rs b/core/src/cluster.rs deleted file mode 100644 index 08227689db..0000000000 --- a/core/src/cluster.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::validator::ValidatorConfig; -use solana_client::thin_client::ThinClient; -use solana_sdk::pubkey::Pubkey; - -pub trait Cluster { - fn get_node_pubkeys(&self) -> Vec; - fn get_validator_client(&self, pubkey: &Pubkey) -> Option; - fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig); -} diff --git a/core/src/lib.rs b/core/src/lib.rs index ad227ff545..2283cb32b8 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -28,7 +28,6 @@ pub mod blocktree; pub mod blockstream; pub mod blockstream_service; pub mod blocktree_processor; -pub mod cluster; pub mod cluster_info; pub mod cluster_info_repair_listener; pub mod consensus; diff --git a/local_cluster/src/cluster.rs b/local_cluster/src/cluster.rs new file mode 100644 index 0000000000..8078617cc2 --- /dev/null +++ b/local_cluster/src/cluster.rs @@ -0,0 +1,37 @@ +use solana_client::thin_client::ThinClient; +use solana_core::contact_info::ContactInfo; +use solana_core::validator::ValidatorConfig; +use solana_sdk::pubkey::Pubkey; +use solana_sdk::signature::Keypair; +use std::path::PathBuf; +use std::sync::Arc; + +pub struct ValidatorInfo { + pub keypair: Arc, + pub voting_keypair: Arc, + pub storage_keypair: Arc, + pub ledger_path: PathBuf, + pub contact_info: ContactInfo, +} + +pub struct ClusterValidatorInfo { + pub info: ValidatorInfo, + pub config: ValidatorConfig, +} + +impl ClusterValidatorInfo { + pub fn new(validator_info: ValidatorInfo, config: ValidatorConfig) -> Self { + Self { + info: validator_info, + config, + } + } +} + +pub trait Cluster { + fn get_node_pubkeys(&self) -> Vec; + fn get_validator_client(&self, pubkey: &Pubkey) -> Option; + fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo; + fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo); + fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig); +} diff --git a/local_cluster/src/lib.rs b/local_cluster/src/lib.rs index d2c114e479..0480f76147 100644 --- a/local_cluster/src/lib.rs +++ b/local_cluster/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cluster; pub mod cluster_tests; pub mod local_cluster; diff --git a/local_cluster/src/local_cluster.rs b/local_cluster/src/local_cluster.rs index 8b034e684b..fdd8ddef84 100644 --- a/local_cluster/src/local_cluster.rs +++ b/local_cluster/src/local_cluster.rs @@ -1,7 +1,7 @@ +use crate::cluster::{Cluster, ClusterValidatorInfo, ValidatorInfo}; use solana_client::thin_client::{create_client, ThinClient}; use solana_core::{ blocktree::create_new_tmp_ledger, - cluster::Cluster, cluster_info::{Node, FULLNODE_PORT_RANGE}, contact_info::ContactInfo, genesis_utils::{create_genesis_block_with_leader, GenesisBlockInfo}, @@ -33,14 +33,6 @@ use std::{ sync::Arc, }; -pub struct ValidatorInfo { - pub keypair: Arc, - pub voting_keypair: Arc, - pub storage_keypair: Arc, - pub ledger_path: PathBuf, - pub contact_info: ContactInfo, -} - pub struct ReplicatorInfo { pub replicator_storage_pubkey: Pubkey, pub ledger_path: PathBuf, @@ -55,20 +47,6 @@ impl ReplicatorInfo { } } -pub struct ClusterValidatorInfo { - pub info: ValidatorInfo, - pub config: ValidatorConfig, -} - -impl ClusterValidatorInfo { - pub fn new(validator_info: ValidatorInfo, config: ValidatorConfig) -> Self { - Self { - info: validator_info, - config, - } - } -} - #[derive(Clone, Debug)] pub struct ClusterConfig { /// The fullnode config that should be applied to every node in the cluster @@ -585,27 +563,32 @@ impl Cluster for LocalCluster { }) } - fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig) { - // Shut down the fullnode + fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo { let mut node = self.fullnodes.remove(&pubkey).unwrap(); + + // Shut down the fullnode node.exit(); node.join().unwrap(); + self.fullnode_infos.remove(&pubkey).unwrap() + } + + fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) { // Update the stored ContactInfo for this node - let node_pubkey = &self.fullnode_infos[&pubkey].info.keypair.pubkey(); - let node = Node::new_localhost_with_pubkey(&node_pubkey); - self.fullnode_infos - .get_mut(&pubkey) - .unwrap() - .info - .contact_info = node.info.clone(); - if pubkey == self.entry_point_info.id { - self.entry_point_info = node.info.clone(); - } + let node = Node::new_localhost_with_pubkey(&pubkey); + cluster_validator_info.info.contact_info = node.info.clone(); + + let entry_point_info = { + if *pubkey == self.entry_point_info.id { + self.entry_point_info = node.info.clone(); + None + } else { + Some(&self.entry_point_info) + } + }; // Restart the node - self.fullnode_infos.get_mut(&pubkey).unwrap().config = config.clone(); - let fullnode_info = &self.fullnode_infos[&pubkey].info; + let fullnode_info = &cluster_validator_info.info; let restarted_node = Validator::new( node, @@ -614,12 +597,19 @@ impl Cluster for LocalCluster { &fullnode_info.voting_keypair.pubkey(), &fullnode_info.voting_keypair, &fullnode_info.storage_keypair, - None, + entry_point_info, true, - config, + &cluster_validator_info.config, ); - self.fullnodes.insert(pubkey, restarted_node); + self.fullnodes.insert(*pubkey, restarted_node); + self.fullnode_infos.insert(*pubkey, cluster_validator_info); + } + + fn exit_restart_node(&mut self, pubkey: &Pubkey, validator_config: ValidatorConfig) { + let mut cluster_validator_info = self.exit_node(pubkey); + cluster_validator_info.config = validator_config; + self.restart_node(pubkey, cluster_validator_info); } } diff --git a/local_cluster/tests/local_cluster.rs b/local_cluster/tests/local_cluster.rs index 9e11160ae7..3cfb3f31e4 100644 --- a/local_cluster/tests/local_cluster.rs +++ b/local_cluster/tests/local_cluster.rs @@ -4,8 +4,9 @@ use log::*; use serial_test_derive::serial; use solana_core::{ bank_forks::SnapshotConfig, blocktree::Blocktree, broadcast_stage::BroadcastStageType, - cluster::Cluster, gossip_service::discover_cluster, snapshot_utils, validator::ValidatorConfig, + gossip_service::discover_cluster, snapshot_utils, validator::ValidatorConfig, }; +use solana_local_cluster::cluster::Cluster; use solana_local_cluster::{ cluster_tests, local_cluster::{ClusterConfig, LocalCluster}, @@ -270,7 +271,7 @@ fn test_restart_node() { clock::DEFAULT_TICKS_PER_SLOT, slots_per_epoch, ); - cluster.restart_node(nodes[0], &validator_config); + cluster.exit_restart_node(&nodes[0], validator_config); cluster_tests::sleep_n_epochs( 0.5, &cluster.genesis_block.poh_config, @@ -300,6 +301,92 @@ fn test_listener_startup() { assert_eq!(cluster_nodes.len(), 4); } +/*#[allow(unused_attributes)] +#[test] +#[serial] +fn test_snapshot_restart_locktower() { + // First set up the cluster with 2 nodes + let snapshot_interval_slots = 10; + let num_account_paths = 4; + + let leader_snapshot_test_config = + setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths); + let validator_snapshot_test_config = + setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths); + + let snapshot_package_output_path = &leader_snapshot_test_config + .validator_config + .snapshot_config + .as_ref() + .unwrap() + .snapshot_package_output_path; + + let config = ClusterConfig { + node_stakes: vec![10000], + cluster_lamports: 100000, + validator_configs: vec![leader_snapshot_test_config.validator_config.clone()], + ..ClusterConfig::default() + }; + + let mut cluster = LocalCluster::new(&config); + + // Let the nodes run for a while, then stop one of the validators + sleep(Duration::from_millis(5000)); + cluster_tests::fullnode_exit(&local.entry_point_info, num_nodes); + + trace!("Waiting for snapshot tar to be generated with slot",); + let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path); + loop { + if tar.exists() { + trace!("snapshot tar exists"); + break; + } + sleep(Duration::from_millis(5000)); + } + + // Copy tar to validator's snapshot output directory + let validator_tar_path = + snapshot_utils::get_snapshot_tar_path(&validator_snapshot_test_config.snapshot_output_path); + fs::hard_link(tar, &validator_tar_path).unwrap(); + let slot_floor = snapshot_utils::bank_slot_from_archive(&validator_tar_path).unwrap(); + + // Start up a new node from a snapshot + let validator_stake = 5; + cluster.add_validator( + &validator_snapshot_test_config.validator_config, + validator_stake, + ); + let all_pubkeys = cluster.get_node_pubkeys(); + let validator_id = all_pubkeys + .into_iter() + .find(|x| *x != cluster.entry_point_info.id) + .unwrap(); + let validator_client = cluster.get_validator_client(&validator_id).unwrap(); + let mut current_slot = 0; + + // Let this validator run a while with repair + let target_slot = slot_floor + 40; + while current_slot <= target_slot { + trace!("current_slot: {}", current_slot); + if let Ok(slot) = validator_client.get_slot() { + current_slot = slot; + } else { + continue; + } + sleep(Duration::from_secs(1)); + } + + // Check the validator ledger doesn't contain any slots < slot_floor + cluster.close_preserve_ledgers(); + let validator_ledger_path = &cluster.fullnode_infos[&validator_id]; + let blocktree = Blocktree::open(&validator_ledger_path.info.ledger_path).unwrap(); + + // Skip the zeroth slot in blocktree that the ledger is initialized with + let (first_slot, _) = blocktree.slot_meta_iterator(1).unwrap().next().unwrap(); + + assert_eq!(first_slot, slot_floor); +}*/ + #[allow(unused_attributes)] #[test] #[serial] @@ -467,7 +554,7 @@ fn test_snapshots_restart_validity() { // Restart a node trace!("Restarting cluster from snapshot"); let nodes = cluster.get_node_pubkeys(); - cluster.restart_node(nodes[0], &snapshot_test_config.validator_config); + cluster.exit_restart_node(&nodes[0], snapshot_test_config.validator_config.clone()); // Verify account balances on validator trace!("Verifying balances");