diff --git a/core/src/cluster_info.rs b/core/src/cluster_info.rs index c60f10d1f2..ff7583da51 100644 --- a/core/src/cluster_info.rs +++ b/core/src/cluster_info.rs @@ -36,6 +36,7 @@ use solana_sdk::sanitize::{Sanitize, SanitizeError}; use bincode::{serialize, serialized_size}; use core::cmp; use itertools::Itertools; +use rand::thread_rng; use rayon::prelude::*; use rayon::{ThreadPool, ThreadPoolBuilder}; use serde::ser::Serialize; @@ -291,8 +292,8 @@ pub struct ClusterInfo { pub gossip: RwLock, /// set the keypair that will be used to sign crds values generated. It is unset only in tests. pub(crate) keypair: Arc, - /// The network entrypoint - entrypoint: RwLock>, + /// Network entrypoints + entrypoints: RwLock>, outbound_budget: DataBudget, my_contact_info: RwLock, ping_cache: RwLock, @@ -546,7 +547,7 @@ impl ClusterInfo { let me = Self { gossip: RwLock::new(CrdsGossip::default()), keypair, - entrypoint: RwLock::new(None), + entrypoints: RwLock::new(vec![]), outbound_budget: DataBudget::default(), my_contact_info: RwLock::new(contact_info), ping_cache: RwLock::new(PingCache::new( @@ -558,7 +559,7 @@ impl ClusterInfo { socket: UdpSocket::bind("0.0.0.0:0").unwrap(), local_message_pending_push_queue: RwLock::new(vec![]), contact_debug_interval: DEFAULT_CONTACT_DEBUG_INTERVAL, - instance: NodeInstance::new(&mut rand::thread_rng(), id, timestamp()), + instance: NodeInstance::new(&mut thread_rng(), id, timestamp()), }; { let mut gossip = me.gossip.write().unwrap(); @@ -579,7 +580,7 @@ impl ClusterInfo { ClusterInfo { gossip: RwLock::new(gossip), keypair: self.keypair.clone(), - entrypoint: RwLock::new(self.entrypoint.read().unwrap().clone()), + entrypoints: RwLock::new(self.entrypoints.read().unwrap().clone()), outbound_budget: self.outbound_budget.clone_non_atomic(), my_contact_info: RwLock::new(my_contact_info), ping_cache: RwLock::new(self.ping_cache.read().unwrap().mock_clone()), @@ -593,7 +594,7 @@ impl ClusterInfo { .clone(), ), contact_debug_interval: self.contact_debug_interval, - instance: NodeInstance::new(&mut rand::thread_rng(), *new_id, timestamp()), + instance: NodeInstance::new(&mut thread_rng(), *new_id, timestamp()), } } @@ -645,7 +646,11 @@ impl ClusterInfo { } pub fn set_entrypoint(&self, entrypoint: ContactInfo) { - *self.entrypoint.write().unwrap() = Some(entrypoint) + self.set_entrypoints(vec![entrypoint]); + } + + pub fn set_entrypoints(&self, entrypoints: Vec) { + *self.entrypoints.write().unwrap() = entrypoints; } pub fn id(&self) -> Pubkey { @@ -1501,52 +1506,49 @@ impl ClusterInfo { thread_pool: &ThreadPool, pulls: &mut Vec<(Pubkey, CrdsFilter, SocketAddr, CrdsValue)>, ) { - let pull_from_entrypoint = { - let mut w_entrypoint = self.entrypoint.write().unwrap(); - if let Some(ref mut entrypoint) = &mut *w_entrypoint { + let entrypoint_id_and_gossip = { + let mut entrypoints = self.entrypoints.write().unwrap(); + if entrypoints.is_empty() { + None + } else { + let i = thread_rng().gen_range(0, entrypoints.len()); + let entrypoint = &mut entrypoints[i]; + if pulls.is_empty() { - // Nobody else to pull from, try the entrypoint - true + // Nobody else to pull from, try an entrypoint + Some((entrypoint.id, entrypoint.gossip)) } else { let now = timestamp(); - // Only consider pulling from the entrypoint periodically to avoid spamming it - if timestamp() - entrypoint.wallclock <= CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 2 { - false + if now - entrypoint.wallclock <= CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 2 { + None } else { entrypoint.wallclock = now; - let found_entrypoint = self + if self .time_gossip_read_lock("entrypoint", &self.stats.entrypoint) .crds .get_nodes_contact_info() - .any(|node| node.gossip == entrypoint.gossip); - !found_entrypoint + .any(|node| node.gossip == entrypoint.gossip) + { + None // Found the entrypoint, no need to pull from it + } else { + Some((entrypoint.id, entrypoint.gossip)) + } } } - } else { - false } }; - if pull_from_entrypoint { - let id_and_gossip = { - self.entrypoint - .read() - .unwrap() - .as_ref() - .map(|e| (e.id, e.gossip)) - }; - if let Some((id, gossip)) = id_and_gossip { - let r_gossip = self.time_gossip_read_lock("entrypoint", &self.stats.entrypoint2); - let self_info = r_gossip - .crds - .lookup(&CrdsValueLabel::ContactInfo(self.id())) - .unwrap_or_else(|| panic!("self_id invalid {}", self.id())); - r_gossip - .pull - .build_crds_filters(thread_pool, &r_gossip.crds, MAX_BLOOM_SIZE) - .into_iter() - .for_each(|filter| pulls.push((id, filter, gossip, self_info.clone()))); - } + if let Some((id, gossip)) = entrypoint_id_and_gossip { + let r_gossip = self.time_gossip_read_lock("entrypoint", &self.stats.entrypoint2); + let self_info = r_gossip + .crds + .lookup(&CrdsValueLabel::ContactInfo(self.id())) + .unwrap_or_else(|| panic!("self_id invalid {}", self.id())); + r_gossip + .pull + .build_crds_filters(thread_pool, &r_gossip.crds, MAX_BLOOM_SIZE) + .into_iter() + .for_each(|filter| pulls.push((id, filter, gossip, self_info.clone()))); } } @@ -1731,46 +1733,51 @@ impl ClusterInfo { Ok(()) } - fn process_entrypoint(&self, entrypoint_processed: &mut bool) { - if *entrypoint_processed { + fn process_entrypoints(&self, entrypoints_processed: &mut bool) { + if *entrypoints_processed { return; } - let gossip_addr = self.entrypoint.read().unwrap().as_ref().map(|e| e.gossip); - if let Some(gossip_addr) = gossip_addr { - // If a pull from the entrypoint was successful it should exist in the CRDS table - let entrypoint = self.lookup_contact_info_by_gossip_addr(&gossip_addr); - - if let Some(entrypoint) = entrypoint { - // Adopt the entrypoint's `shred_version` if ours is unset - if self.my_shred_version() == 0 { - if entrypoint.shred_version == 0 { - warn!("Unable to adopt entrypoint shred version of 0"); - } else { - info!( - "Setting shred version to {:?} from entrypoint {:?}", - entrypoint.shred_version, entrypoint.id - ); - self.my_contact_info.write().unwrap().shred_version = - entrypoint.shred_version; - self.gossip - .write() - .unwrap() - .set_shred_version(entrypoint.shred_version); - self.insert_self(); - *entrypoint_processed = true; - } - } else { - *entrypoint_processed = true; - } - - // Update the entrypoint's id so future entrypoint pulls correctly reference it - *self.entrypoint.write().unwrap() = Some(entrypoint); - } - } else { + let mut entrypoints = self.entrypoints.write().unwrap(); + if entrypoints.is_empty() { // No entrypoint specified. Nothing more to process - *entrypoint_processed = true; + *entrypoints_processed = true; + return; } + + for entrypoint in entrypoints.iter_mut() { + if entrypoint.id == Pubkey::default() { + // If a pull from the entrypoint was successful it should exist in the CRDS table + if let Some(entrypoint_from_gossip) = + self.lookup_contact_info_by_gossip_addr(&entrypoint.gossip) + { + // Update the entrypoint's id so future entrypoint pulls correctly reference it + *entrypoint = entrypoint_from_gossip; + } + } + } + // Adopt an entrypoint's `shred_version` if ours is unset + if self.my_shred_version() == 0 { + if let Some(entrypoint) = entrypoints + .iter() + .find(|entrypoint| entrypoint.shred_version != 0) + { + info!( + "Setting shred version to {:?} from entrypoint {:?}", + entrypoint.shred_version, entrypoint.id + ); + self.my_contact_info.write().unwrap().shred_version = entrypoint.shred_version; + self.gossip + .write() + .unwrap() + .set_shred_version(entrypoint.shred_version); + } + } + + *entrypoints_processed = self.my_shred_version() != 0 + && entrypoints + .iter() + .all(|entrypoint| entrypoint.id != Pubkey::default()); } fn handle_purge( @@ -1816,7 +1823,7 @@ impl ClusterInfo { .spawn(move || { let mut last_push = timestamp(); let mut last_contact_info_trace = timestamp(); - let mut entrypoint_processed = false; + let mut entrypoints_processed = false; let recycler = PacketsRecycler::default(); let crds_data = vec![ CrdsData::Version(Version::new(self.id())), @@ -1863,7 +1870,7 @@ impl ClusterInfo { self.handle_purge(&thread_pool, &bank_forks, &stakes); - self.process_entrypoint(&mut entrypoint_processed); + self.process_entrypoints(&mut entrypoints_processed); //TODO: possibly tune this parameter //we saw a deadlock passing an self.read().unwrap().timeout into sleep @@ -3920,7 +3927,7 @@ mod tests { ); let pulls = cluster_info.new_pull_requests(&thread_pool, None, &HashMap::new()); assert_eq!(1, pulls.len() as u64); - assert_eq!(*cluster_info.entrypoint.read().unwrap(), Some(entrypoint)); + assert_eq!(*cluster_info.entrypoints.read().unwrap(), vec![entrypoint]); } #[test] @@ -4106,13 +4113,7 @@ mod tests { // Pull request 2: pretend it's been a while since we've pulled from `entrypoint`. There should // now be two pull requests - cluster_info - .entrypoint - .write() - .unwrap() - .as_mut() - .unwrap() - .wallclock = 0; + cluster_info.entrypoints.write().unwrap()[0].wallclock = 0; let pulls = cluster_info.new_pull_requests(&thread_pool, None, &stakes); assert_eq!(2, pulls.len() as u64); assert_eq!(pulls.get(0).unwrap().0, other_node.gossip); @@ -4242,46 +4243,75 @@ mod tests { )); assert_eq!(cluster_info.my_shred_version(), 0); - // Simulating starting up with default entrypoint, no known id, only a gossip + // Simulating starting up with two entrypoints, no known id, only a gossip // address - let entrypoint_gossip_addr = socketaddr!("127.0.0.2:1234"); - let mut entrypoint = ContactInfo::new_localhost(&Pubkey::default(), timestamp()); - entrypoint.gossip = entrypoint_gossip_addr; - assert_eq!(entrypoint.shred_version, 0); - cluster_info.set_entrypoint(entrypoint); + let entrypoint1_gossip_addr = socketaddr!("127.0.0.2:1234"); + let mut entrypoint1 = ContactInfo::new_localhost(&Pubkey::default(), timestamp()); + entrypoint1.gossip = entrypoint1_gossip_addr; + assert_eq!(entrypoint1.shred_version, 0); - // Simulate getting entrypoint ContactInfo from gossip with an entrypoint shred version of + let entrypoint2_gossip_addr = socketaddr!("127.0.0.2:5678"); + let mut entrypoint2 = ContactInfo::new_localhost(&Pubkey::default(), timestamp()); + entrypoint2.gossip = entrypoint2_gossip_addr; + assert_eq!(entrypoint2.shred_version, 0); + cluster_info.set_entrypoints(vec![entrypoint1, entrypoint2]); + + // Simulate getting entrypoint ContactInfo from gossip with an entrypoint1 shred version of // 0 - let mut gossiped_entrypoint_info = + let mut gossiped_entrypoint1_info = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp()); - gossiped_entrypoint_info.gossip = entrypoint_gossip_addr; - gossiped_entrypoint_info.shred_version = 0; - cluster_info.insert_info(gossiped_entrypoint_info.clone()); + gossiped_entrypoint1_info.gossip = entrypoint1_gossip_addr; + gossiped_entrypoint1_info.shred_version = 0; + cluster_info.insert_info(gossiped_entrypoint1_info.clone()); + assert!(!cluster_info + .entrypoints + .read() + .unwrap() + .iter() + .any(|entrypoint| *entrypoint == gossiped_entrypoint1_info)); // Adopt the entrypoint's gossiped contact info and verify - let mut entrypoint_processed = false; - ClusterInfo::process_entrypoint(&cluster_info, &mut entrypoint_processed); - assert_eq!( - cluster_info.entrypoint.read().unwrap().as_ref().unwrap(), - &gossiped_entrypoint_info - ); - assert!(!entrypoint_processed); // <--- entrypoint processing incomplete because shred adoption still pending + let mut entrypoints_processed = false; + ClusterInfo::process_entrypoints(&cluster_info, &mut entrypoints_processed); + assert_eq!(cluster_info.entrypoints.read().unwrap().len(), 2); + assert!(cluster_info + .entrypoints + .read() + .unwrap() + .iter() + .any(|entrypoint| *entrypoint == gossiped_entrypoint1_info)); + + assert!(!entrypoints_processed); // <--- entrypoint processing incomplete because shred adoption still pending assert_eq!(cluster_info.my_shred_version(), 0); // <-- shred version still 0 - // Simulate getting entrypoint ContactInfo from gossip with an entrypoint shred version of + // Simulate getting entrypoint ContactInfo from gossip with an entrypoint2 shred version of // !0 - gossiped_entrypoint_info.shred_version = 1; - cluster_info.insert_info(gossiped_entrypoint_info.clone()); + let mut gossiped_entrypoint2_info = + ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp()); + gossiped_entrypoint2_info.gossip = entrypoint2_gossip_addr; + gossiped_entrypoint2_info.shred_version = 1; + cluster_info.insert_info(gossiped_entrypoint2_info.clone()); + assert!(!cluster_info + .entrypoints + .read() + .unwrap() + .iter() + .any(|entrypoint| *entrypoint == gossiped_entrypoint2_info)); // Adopt the entrypoint's gossiped contact info and verify - let mut entrypoint_processed = false; - ClusterInfo::process_entrypoint(&cluster_info, &mut entrypoint_processed); - assert_eq!( - cluster_info.entrypoint.read().unwrap().as_ref().unwrap(), - &gossiped_entrypoint_info - ); - assert!(entrypoint_processed); - assert_eq!(cluster_info.my_shred_version(), 1); // <-- shred version now adopted from entrypoint + error!("Adopt the entrypoint's gossiped contact info and verify"); + let mut entrypoints_processed = false; + ClusterInfo::process_entrypoints(&cluster_info, &mut entrypoints_processed); + assert_eq!(cluster_info.entrypoints.read().unwrap().len(), 2); + assert!(cluster_info + .entrypoints + .read() + .unwrap() + .iter() + .any(|entrypoint| *entrypoint == gossiped_entrypoint2_info)); + + assert!(entrypoints_processed); + assert_eq!(cluster_info.my_shred_version(), 1); // <-- shred version now adopted from entrypoint2 } #[test] @@ -4314,13 +4344,14 @@ mod tests { cluster_info.insert_info(gossiped_entrypoint_info.clone()); // Adopt the entrypoint's gossiped contact info and verify - let mut entrypoint_processed = false; - ClusterInfo::process_entrypoint(&cluster_info, &mut entrypoint_processed); + let mut entrypoints_processed = false; + ClusterInfo::process_entrypoints(&cluster_info, &mut entrypoints_processed); + assert_eq!(cluster_info.entrypoints.read().unwrap().len(), 1); assert_eq!( - cluster_info.entrypoint.read().unwrap().as_ref().unwrap(), - &gossiped_entrypoint_info + cluster_info.entrypoints.read().unwrap()[0], + gossiped_entrypoint_info ); - assert!(entrypoint_processed); + assert!(entrypoints_processed); assert_eq!(cluster_info.my_shred_version(), 2); // <--- No change to shred version } } diff --git a/core/src/test_validator.rs b/core/src/test_validator.rs index 313b68d87d..588f3dda47 100644 --- a/core/src/test_validator.rs +++ b/core/src/test_validator.rs @@ -356,7 +356,7 @@ impl TestValidator { &ledger_path, &validator_vote_account.pubkey(), vec![Arc::new(validator_vote_account)], - None, + vec![], &validator_config, )); diff --git a/core/src/validator.rs b/core/src/validator.rs index 1f5bae1654..7e1e94248a 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -222,7 +222,7 @@ impl Validator { ledger_path: &Path, vote_account: &Pubkey, mut authorized_voter_keypairs: Vec>, - cluster_entrypoint: Option<&ContactInfo>, + cluster_entrypoints: Vec, config: &ValidatorConfig, ) -> Self { let id = identity_keypair.pubkey(); @@ -241,7 +241,9 @@ impl Validator { } report_target_features(); - info!("entrypoint: {:?}", cluster_entrypoint); + for cluster_entrypoint in &cluster_entrypoints { + info!("entrypoint: {:?}", cluster_entrypoint); + } if solana_perf::perf_libs::api().is_some() { info!("Initializing sigverify, this could take a while..."); @@ -492,6 +494,7 @@ impl Validator { config.gossip_validators.clone(), &exit, ); + cluster_info.set_entrypoints(cluster_entrypoints); let serve_repair = Arc::new(RwLock::new(ServeRepair::new(cluster_info.clone()))); let serve_repair_service = ServeRepairService::new( @@ -501,12 +504,6 @@ impl Validator { &exit, ); - // Insert the entrypoint info, should only be None if this node - // is the bootstrap validator - if let Some(cluster_entrypoint) = cluster_entrypoint { - cluster_info.set_entrypoint(cluster_entrypoint.clone()); - } - let (snapshot_packager_service, snapshot_config_and_package_sender) = if let Some(snapshot_config) = config.snapshot_config.clone() { if is_snapshot_config_invalid( @@ -1287,7 +1284,7 @@ mod tests { &validator_ledger_path, &voting_keypair.pubkey(), vec![voting_keypair.clone()], - Some(&leader_node.info), + vec![leader_node.info], &config, ); validator.close(); @@ -1357,7 +1354,7 @@ mod tests { &validator_ledger_path, &vote_account_keypair.pubkey(), vec![Arc::new(vote_account_keypair)], - Some(&leader_node.info), + vec![leader_node.info.clone()], &config, ) }) diff --git a/docs/src/clusters.md b/docs/src/clusters.md index 3a3342550e..ddaa553245 100644 --- a/docs/src/clusters.md +++ b/docs/src/clusters.md @@ -119,7 +119,7 @@ Currently, rewards and inflation are disabled. - Note: If you are using a non-command-line wallet such as [Solflare](wallet-guide/solflare.md), the wallet will always be connecting to Mainnet Beta. -- Gossip entrypoint for Mainnet Beta: `mainnet-beta.solana.com:8001` +- Gossip entrypoint for Mainnet Beta: `entrypoint.mainnet-beta.solana.com:8001` - Metrics environment variable for Mainnet Beta: ```bash export SOLANA_METRICS_CONFIG="host=https://metrics.solana.com:8086,db=mainnet-beta,u=mainnet-beta_write,p=password" @@ -147,7 +147,11 @@ $ solana-validator \ --rpc-port 8899 \ --private-rpc \ --dynamic-port-range 8000-8010 \ - --entrypoint mainnet-beta.solana.com:8001 \ + --entrypoint entrypoint.mainnet-beta.solana.com:8001 \ + --entrypoint entrypoint2.mainnet-beta.solana.com:8001 \ + --entrypoint entrypoint3.mainnet-beta.solana.com:8001 \ + --entrypoint entrypoint4.mainnet-beta.solana.com:8001 \ + --entrypoint entrypoint5.mainnet-beta.solana.com:8001 \ --expected-genesis-hash 5eykt4UsFv8P8NJdTREpY1vzqKqZKvdpKuc147dw2N9d \ --wal-recovery-mode skip_any_corrupted_record \ --limit-ledger-size diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index 8389c9890b..40b20b2146 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -205,7 +205,7 @@ impl LocalCluster { &leader_ledger_path, &leader_vote_keypair.pubkey(), vec![leader_vote_keypair.clone()], - None, + vec![], &leader_config, ); @@ -348,7 +348,7 @@ impl LocalCluster { &ledger_path, &voting_keypair.pubkey(), vec![voting_keypair.clone()], - Some(&self.entry_point_info), + vec![self.entry_point_info.clone()], &config, ); @@ -660,7 +660,9 @@ impl Cluster for LocalCluster { &validator_info.ledger_path, &validator_info.voting_keypair.pubkey(), vec![validator_info.voting_keypair.clone()], - entry_point_info.as_ref(), + entry_point_info + .map(|entry_point_info| vec![entry_point_info]) + .unwrap_or_default(), &cluster_validator_info.config, ); cluster_validator_info.validator = Some(restarted_node); diff --git a/net-utils/src/lib.rs b/net-utils/src/lib.rs index 4ad65268a1..739d1fda59 100644 --- a/net-utils/src/lib.rs +++ b/net-utils/src/lib.rs @@ -113,7 +113,7 @@ fn do_verify_reachable_ports( udp_retry_count: usize, ) -> bool { info!( - "Checking that tcp ports {:?} from {:?}", + "Checking that tcp ports {:?} are reachable from {:?}", tcp_listeners, ip_echo_server_addr ); @@ -334,7 +334,7 @@ pub fn is_host(string: String) -> Result<(), String> { pub fn parse_host_port(host_port: &str) -> Result { let addrs: Vec<_> = host_port .to_socket_addrs() - .map_err(|err| err.to_string())? + .map_err(|err| format!("Unable to resolve host {}: {}", host_port, err))? .collect(); if addrs.is_empty() { Err(format!("Unable to resolve host: {}", host_port)) diff --git a/validator/src/main.rs b/validator/src/main.rs index 591e6f414a..16eec941e9 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -3,7 +3,7 @@ use clap::{ ArgMatches, }; use log::*; -use rand::{thread_rng, Rng}; +use rand::{seq::SliceRandom, thread_rng, Rng}; use solana_clap_utils::{ input_parsers::{keypair_of, keypairs_of, pubkey_of}, input_validators::{ @@ -107,7 +107,7 @@ fn get_trusted_snapshot_hashes( fn start_gossip_node( identity_keypair: &Arc, - entrypoint_gossip: &SocketAddr, + cluster_entrypoints: &[ContactInfo], gossip_addr: &SocketAddr, gossip_socket: UdpSocket, expected_shred_version: Option, @@ -121,7 +121,7 @@ fn start_gossip_node( ), identity_keypair.clone(), ); - cluster_info.set_entrypoint(ContactInfo::new_gossip_entry_point(entrypoint_gossip)); + cluster_info.set_entrypoints(cluster_entrypoints.to_vec()); let cluster_info = Arc::new(cluster_info); let gossip_exit_flag = Arc::new(AtomicBool::new(false)); @@ -137,7 +137,7 @@ fn start_gossip_node( fn get_rpc_node( cluster_info: &ClusterInfo, - entrypoint_gossip: &SocketAddr, + cluster_entrypoints: &[ContactInfo], validator_config: &ValidatorConfig, blacklisted_rpc_nodes: &mut HashSet, snapshot_not_required: bool, @@ -155,20 +155,19 @@ fn get_rpc_node( .expected_shred_version .unwrap_or_else(|| cluster_info.my_shred_version()); if shred_version == 0 { - if let Some(entrypoint) = - cluster_info.lookup_contact_info_by_gossip_addr(entrypoint_gossip) - { - if entrypoint.shred_version == 0 { - eprintln!( - "Entrypoint shred version is zero. Restart with --expected-shred-version" - ); - exit(1); - } + let all_zero_shred_versions = cluster_entrypoints.iter().all(|cluster_entrypoint| { + cluster_info + .lookup_contact_info_by_gossip_addr(&cluster_entrypoint.gossip) + .map_or(false, |entrypoint| entrypoint.shred_version == 0) + }); + + if all_zero_shred_versions { + eprintln!( + "Entrypoint shred version is zero. Restart with --expected-shred-version" + ); + exit(1); } - info!( - "Waiting to adopt entrypoint shred version, contact info for {:?} not found...", - entrypoint_gossip - ); + info!("Waiting to adopt entrypoint shred version..."); continue; } @@ -481,7 +480,7 @@ fn verify_reachable_ports( node: &Node, cluster_entrypoint: &ContactInfo, validator_config: &ValidatorConfig, -) { +) -> bool { let mut udp_sockets = vec![&node.sockets.gossip, &node.sockets.repair]; if ContactInfo::is_valid_address(&node.info.serve_repair) { @@ -528,13 +527,11 @@ fn verify_reachable_ports( tcp_listeners.push((ip_echo.local_addr().unwrap().port(), ip_echo)); } - if !solana_net_utils::verify_reachable_ports( + solana_net_utils::verify_reachable_ports( &cluster_entrypoint.gossip, tcp_listeners, &udp_sockets, - ) { - exit(1); - } + ) } struct RpcBootstrapConfig { @@ -564,7 +561,7 @@ fn rpc_bootstrap( ledger_path: &Path, vote_account: &Pubkey, authorized_voter_keypairs: &[Arc], - cluster_entrypoint: &ContactInfo, + cluster_entrypoints: &[ContactInfo], validator_config: &mut ValidatorConfig, bootstrap_config: RpcBootstrapConfig, no_port_check: bool, @@ -572,7 +569,14 @@ fn rpc_bootstrap( maximum_local_snapshot_age: Slot, ) { if !no_port_check { - verify_reachable_ports(&node, cluster_entrypoint, &validator_config); + let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect(); + order.shuffle(&mut thread_rng()); + if order + .into_iter() + .all(|i| !verify_reachable_ports(&node, &cluster_entrypoints[i], &validator_config)) + { + exit(1); + } } if bootstrap_config.no_genesis_fetch && bootstrap_config.no_snapshot_fetch { @@ -585,7 +589,7 @@ fn rpc_bootstrap( if gossip.is_none() { gossip = Some(start_gossip_node( &identity_keypair, - &cluster_entrypoint.gossip, + &cluster_entrypoints, &node.info.gossip, node.sockets.gossip.try_clone().unwrap(), validator_config.expected_shred_version, @@ -595,7 +599,7 @@ fn rpc_bootstrap( let rpc_node_details = get_rpc_node( &gossip.as_ref().unwrap().0, - &cluster_entrypoint.gossip, + &cluster_entrypoints, &validator_config, &mut blacklisted_rpc_nodes, bootstrap_config.no_snapshot_fetch, @@ -745,7 +749,7 @@ fn create_validator( ledger_path: &Path, vote_account: &Pubkey, authorized_voter_keypairs: Vec>, - cluster_entrypoint: Option, + cluster_entrypoints: Vec, mut validator_config: ValidatorConfig, rpc_bootstrap_config: RpcBootstrapConfig, no_port_check: bool, @@ -759,14 +763,14 @@ fn create_validator( solana_ledger::entry::init_poh(); solana_runtime::snapshot_utils::remove_tmp_snapshot_archives(ledger_path); - if let Some(ref cluster_entrypoint) = cluster_entrypoint { + if !cluster_entrypoints.is_empty() { rpc_bootstrap( &node, &identity_keypair, &ledger_path, &vote_account, &authorized_voter_keypairs, - cluster_entrypoint, + &cluster_entrypoints, &mut validator_config, rpc_bootstrap_config, no_port_check, @@ -781,7 +785,7 @@ fn create_validator( &ledger_path, &vote_account, authorized_voter_keypairs, - cluster_entrypoint.as_ref(), + cluster_entrypoints, &validator_config, ) } @@ -868,6 +872,7 @@ pub fn main() { .long("entrypoint") .value_name("HOST:PORT") .takes_value(true) + .multiple(true) .validator(solana_net_utils::is_host_port) .help("Rendezvous with the cluster at this gossip entrypoint"), ) @@ -1669,12 +1674,18 @@ pub fn main() { validator_config.halt_on_trusted_validators_accounts_hash_mismatch = true; } - let entrypoint_addr = matches.value_of("entrypoint").map(|entrypoint| { - solana_net_utils::parse_host_port(entrypoint).unwrap_or_else(|e| { - eprintln!("failed to parse entrypoint address: {}", e); - exit(1); + let entrypoint_addrs = values_t!(matches, "entrypoint", String) + .unwrap_or_default() + .into_iter() + .map(|entrypoint| { + solana_net_utils::parse_host_port(&entrypoint).unwrap_or_else(|e| { + eprintln!("failed to parse entrypoint address: {}", e); + exit(1); + }) }) - }); + .collect::>() + .into_iter() + .collect::>(); let public_rpc_addr = matches.value_of("public_rpc_addr").map(|addr| { solana_net_utils::parse_host_port(addr).unwrap_or_else(|e| { @@ -1699,7 +1710,11 @@ pub fn main() { let use_progress_bar = logfile.is_none(); let _logger_thread = start_logger(logfile); - let gossip_host = matches + info!("{} {}", crate_name!(), solana_version::version!()); + info!("Starting validator with: {:#?}", std::env::args_os()); + + use std::net::IpAddr; + let gossip_host: IpAddr = matches .value_of("gossip_host") .map(|gossip_host| { solana_net_utils::parse_host(gossip_host).unwrap_or_else(|err| { @@ -1708,12 +1723,30 @@ pub fn main() { }) }) .unwrap_or_else(|| { - if let Some(entrypoint_addr) = entrypoint_addr { - solana_net_utils::get_public_ip_addr(&entrypoint_addr).unwrap_or_else(|err| { - eprintln!( - "Failed to contact cluster entrypoint {}: {}", - entrypoint_addr, err + if !entrypoint_addrs.is_empty() { + let mut order: Vec<_> = (0..entrypoint_addrs.len()).collect(); + order.shuffle(&mut thread_rng()); + + let gossip_host = order.into_iter().find_map(|i| { + let entrypoint_addr = &entrypoint_addrs[i]; + info!( + "Contacting {} to determine the validator's public IP address", + entrypoint_addr ); + solana_net_utils::get_public_ip_addr(entrypoint_addr).map_or_else( + |err| { + eprintln!( + "Failed to contact cluster entrypoint {}: {}", + entrypoint_addr, err + ); + None + }, + Some, + ) + }); + + gossip_host.unwrap_or_else(|| { + eprintln!("Unable to determine the validator's public IP address"); exit(1); }) } else { @@ -1733,9 +1766,10 @@ pub fn main() { }), ); - let cluster_entrypoint = entrypoint_addr - .as_ref() - .map(ContactInfo::new_gossip_entry_point); + let cluster_entrypoints = entrypoint_addrs + .iter() + .map(ContactInfo::new_gossip_entry_point) + .collect::>(); let mut node = Node::new_with_external_ip( &identity_keypair.pubkey(), @@ -1769,9 +1803,6 @@ pub fn main() { } } - info!("{} {}", crate_name!(), solana_version::version!()); - info!("Starting validator with: {:#?}", std::env::args_os()); - solana_metrics::set_host_id(identity_keypair.pubkey().to_string()); solana_metrics::set_panic_hook("validator"); @@ -1781,7 +1812,7 @@ pub fn main() { &ledger_path, &vote_account, authorized_voter_keypairs, - cluster_entrypoint, + cluster_entrypoints, validator_config, rpc_bootstrap_config, no_port_check,