From a41254e18c9f0715c7e4a5c50834b4b5bfc90efb Mon Sep 17 00:00:00 2001 From: anatoly yakovenko Date: Thu, 15 Nov 2018 13:23:26 -0800 Subject: [PATCH] Add scalable gossip library (#1546) * Cluster Replicated Data Store Separate the data storage and merge strategy from the network IO boundary. Implement an eager push overlay for transporting recent messages. Simulation shows fast convergence with 20k nodes. --- Cargo.toml | 2 + src/bin/bench-tps.rs | 20 +- src/bin/fullnode.rs | 2 +- src/bin/replicator.rs | 4 +- src/bloom.rs | 103 ++ src/broadcast_stage.rs | 7 +- src/choose_gossip_peer_strategy.rs | 334 ------ src/client.rs | 3 +- src/cluster_info.rs | 1517 +++++++++------------------- src/contact_info.rs | 237 +++++ src/crds.rs | 351 +++++++ src/crds_gossip.rs | 486 +++++++++ src/crds_gossip_error.rs | 7 + src/crds_gossip_pull.rs | 378 +++++++ src/crds_gossip_push.rs | 453 +++++++++ src/crds_traits_impls.rs | 26 + src/crds_value.rs | 147 +++ src/drone.rs | 26 +- src/fullnode.rs | 29 +- src/lib.rs | 14 +- src/ncp.rs | 4 +- src/replicator.rs | 4 +- src/rpc.rs | 24 +- src/thin_client.rs | 35 +- src/tvu.rs | 6 +- src/vote_stage.rs | 8 +- src/wallet.rs | 32 +- src/window.rs | 3 +- src/window_service.rs | 10 +- tests/data_replicator.rs | 193 +--- tests/multinode.rs | 54 +- 31 files changed, 2821 insertions(+), 1698 deletions(-) create mode 100644 src/bloom.rs delete mode 100644 src/choose_gossip_peer_strategy.rs create mode 100644 src/contact_info.rs create mode 100644 src/crds.rs create mode 100644 src/crds_gossip.rs create mode 100644 src/crds_gossip_error.rs create mode 100644 src/crds_gossip_pull.rs create mode 100644 src/crds_gossip_push.rs create mode 100644 src/crds_traits_impls.rs create mode 100644 src/crds_value.rs diff --git a/Cargo.toml b/Cargo.toml index b8367bf29d..8589f30eab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,6 +70,7 @@ unstable = [] atty = "0.2" bincode = "1.0.0" bs58 = "0.2.0" +bv = { version = "0.10.0", features = ["serde"] } byteorder = "1.2.1" bytes = "0.4" chrono = { version = "0.4.0", features = ["serde"] } @@ -88,6 +89,7 @@ solana-jsonrpc-pubsub = "0.3.0" solana-jsonrpc-ws-server = "0.3.0" ipnetwork = "0.12.7" itertools = "0.7.8" +indexmap = "1.0" libc = "0.2.43" libloading = "0.5.0" log = "0.4.2" diff --git a/src/bin/bench-tps.rs b/src/bin/bench-tps.rs index a5b32fe343..192f2ff5c3 100644 --- a/src/bin/bench-tps.rs +++ b/src/bin/bench-tps.rs @@ -69,7 +69,7 @@ fn sample_tx_count( let mut max_tps = 0.0; let mut total; - let log_prefix = format!("{:21}:", v.contact_info.tpu.to_string()); + let log_prefix = format!("{:21}:", v.tpu.to_string()); loop { let tx_count = client.transaction_count(); @@ -106,7 +106,7 @@ fn sample_tx_count( tps: max_tps, tx: total, }; - maxes.write().unwrap().push((v.contact_info.tpu, stats)); + maxes.write().unwrap().push((v.tpu, stats)); break; } } @@ -257,7 +257,7 @@ fn do_tx_transfers( println!( "Transferring 1 unit {} times... to {}", txs0.len(), - leader.contact_info.tpu + leader.tpu ); let tx_len = txs0.len(); let transfer_start = Instant::now(); @@ -377,7 +377,7 @@ fn fund_keys(client: &mut ThinClient, source: &Keypair, dests: &[Keypair], token } fn airdrop_tokens(client: &mut ThinClient, leader: &NodeInfo, id: &Keypair, tx_count: u64) { - let mut drone_addr = leader.contact_info.tpu; + let mut drone_addr = leader.tpu; drone_addr.set_port(DRONE_PORT); let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap_or(0); @@ -638,7 +638,7 @@ fn main() { let leader = leader.unwrap(); - println!("leader RPC is at {} {}", leader.contact_info.rpc, leader.id); + println!("leader RPC is at {} {}", leader.rpc, leader.id); let mut client = mk_client(&leader); let mut barrier_client = mk_client(&leader); @@ -804,7 +804,7 @@ fn converge( //lets spy on the network let (node, gossip_socket) = ClusterInfo::spy_node(); let mut spy_cluster_info = ClusterInfo::new(node).expect("ClusterInfo::new"); - spy_cluster_info.insert(&leader); + spy_cluster_info.insert_info(leader.clone()); spy_cluster_info.set_leader(leader.id); let spy_ref = Arc::new(RwLock::new(spy_cluster_info)); let window = Arc::new(RwLock::new(default_window())); @@ -818,13 +818,7 @@ fn converge( println!("{}", spy_ref.node_info_trace()); if spy_ref.leader_data().is_some() { - v = spy_ref - .table - .values() - .filter(|x| ClusterInfo::is_valid_address(&x.contact_info.rpc)) - .cloned() - .collect(); - + v = spy_ref.rpc_peers(); if v.len() >= num_nodes { println!("CONVERGED!"); break; diff --git a/src/bin/fullnode.rs b/src/bin/fullnode.rs index a8017c95f3..eab92fa674 100644 --- a/src/bin/fullnode.rs +++ b/src/bin/fullnode.rs @@ -67,7 +67,7 @@ fn main() { if let Ok(file) = File::open(path.clone()) { let parse: serde_json::Result = serde_json::from_reader(file); if let Ok(data) = parse { - (data.keypair(), data.node_info.contact_info.ncp) + (data.keypair(), data.node_info.ncp) } else { eprintln!("failed to parse {}", path); exit(1); diff --git a/src/bin/replicator.rs b/src/bin/replicator.rs index 2a931c52d3..23aa3fe896 100644 --- a/src/bin/replicator.rs +++ b/src/bin/replicator.rs @@ -63,7 +63,7 @@ fn main() { if let Ok(file) = File::open(path.clone()) { let parse: serde_json::Result = serde_json::from_reader(file); if let Ok(data) = parse { - (data.keypair(), data.node_info.contact_info.ncp) + (data.keypair(), data.node_info.ncp) } else { eprintln!("failed to parse {}", path); exit(1); @@ -129,7 +129,7 @@ fn main() { let mut client = mk_client(&leader_info); - let mut drone_addr = leader_info.contact_info.tpu; + let mut drone_addr = leader_info.tpu; drone_addr.set_port(DRONE_PORT); let airdrop_amount = 5; if let Err(e) = request_airdrop(&drone_addr, &keypair.pubkey(), airdrop_amount) { diff --git a/src/bloom.rs b/src/bloom.rs new file mode 100644 index 0000000000..b2d7f07bfc --- /dev/null +++ b/src/bloom.rs @@ -0,0 +1,103 @@ +//! Simple Bloom Filter +use bv::BitVec; +use rand::{self, Rng}; +use std::cmp; +use std::marker::PhantomData; + +/// Generate a stable hash of `self` for each `hash_index` +/// Best effort can be made for uniqueness of each hash. +pub trait BloomHashIndex { + fn hash(&self, hash_index: u64) -> u64; +} + +#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)] +pub struct Bloom { + pub keys: Vec, + pub bits: BitVec, + _phantom: PhantomData, +} + +impl Bloom { + /// create filter optimal for num size given the `false_rate` + /// the keys are randomized for picking data out of a collision resistant hash of size + /// `keysize` bytes + /// https://hur.st/bloomfilter/ + pub fn random(num: usize, false_rate: f64, max_bits: usize) -> Self { + let min_num_bits = ((num as f64 * false_rate.log(2f64)) + / (1f64 / 2f64.powf(2f64.log(2f64))).log(2f64)).ceil() + as usize; + let num_bits = cmp::max(1, cmp::min(min_num_bits, max_bits)); + let num_keys = ((num_bits as f64 / num as f64) * 2f64.log(2f64)).round() as usize; + let keys: Vec = (0..num_keys).map(|_| rand::thread_rng().gen()).collect(); + let bits = BitVec::new_fill(false, num_bits as u64); + Bloom { + keys, + bits, + _phantom: Default::default(), + } + } + fn pos(&self, key: &T, k: u64) -> u64 { + key.hash(k) % self.bits.len() + } + pub fn add(&mut self, key: &T) { + for k in &self.keys { + let pos = self.pos(key, *k); + self.bits.set(pos, true); + } + } + pub fn contains(&mut self, key: &T) -> bool { + for k in &self.keys { + let pos = self.pos(key, *k); + if !self.bits.get(pos) { + return false; + } + } + true + } +} + +#[cfg(test)] +mod test { + use super::*; + use hash::{hash, Hash}; + + #[test] + fn test_bloom_filter() { + //empty + let bloom: Bloom = Bloom::random(0, 0.1, 100); + assert_eq!(bloom.keys.len(), 0); + assert_eq!(bloom.bits.len(), 1); + + //normal + let bloom: Bloom = Bloom::random(10, 0.1, 100); + assert_eq!(bloom.keys.len(), 3); + assert_eq!(bloom.bits.len(), 34); + + //saturated + let bloom: Bloom = Bloom::random(100, 0.1, 100); + assert_eq!(bloom.keys.len(), 1); + assert_eq!(bloom.bits.len(), 100); + } + #[test] + fn test_add_contains() { + let mut bloom: Bloom = Bloom::random(100, 0.1, 100); + + let key = hash(b"hello"); + assert!(!bloom.contains(&key)); + bloom.add(&key); + assert!(bloom.contains(&key)); + + let key = hash(b"world"); + assert!(!bloom.contains(&key)); + bloom.add(&key); + assert!(bloom.contains(&key)); + } + #[test] + fn test_random() { + let mut b1: Bloom = Bloom::random(10, 0.1, 100); + let mut b2: Bloom = Bloom::random(10, 0.1, 100); + b1.keys.sort(); + b2.keys.sort(); + assert_ne!(b1.keys, b2.keys); + } +} diff --git a/src/broadcast_stage.rs b/src/broadcast_stage.rs index a5e4b01c08..f414ce0706 100644 --- a/src/broadcast_stage.rs +++ b/src/broadcast_stage.rs @@ -13,6 +13,7 @@ use packet::{index_blobs, SharedBlobs}; use rayon::prelude::*; use result::{Error, Result}; use service::Service; +use solana_sdk::pubkey::Pubkey; use std::net::UdpSocket; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::mpsc::{Receiver, RecvTimeoutError}; @@ -32,6 +33,7 @@ pub enum BroadcastStageReturnType { fn broadcast( max_tick_height: Option, tick_height: &mut u64, + leader_id: Pubkey, node_info: &NodeInfo, broadcast_table: &[NodeInfo], window: &SharedWindow, @@ -140,6 +142,7 @@ fn broadcast( // Send blobs out from the window ClusterInfo::broadcast( Some(*tick_height) == max_tick_height, + leader_id, &node_info, &broadcast_table, &window, @@ -211,10 +214,12 @@ impl BroadcastStage { let me = cluster_info.read().unwrap().my_data().clone(); let mut tick_height_ = tick_height; loop { - let broadcast_table = cluster_info.read().unwrap().compute_broadcast_table(); + let broadcast_table = cluster_info.read().unwrap().tpu_peers(); + let leader_id = cluster_info.read().unwrap().leader_id(); if let Err(e) = broadcast( max_tick_height, &mut tick_height_, + leader_id, &me, &broadcast_table, &window, diff --git a/src/choose_gossip_peer_strategy.rs b/src/choose_gossip_peer_strategy.rs deleted file mode 100644 index cd7bf1b242..0000000000 --- a/src/choose_gossip_peer_strategy.rs +++ /dev/null @@ -1,334 +0,0 @@ -use cluster_info::{ClusterInfoError, NodeInfo}; -use rand::distributions::{Distribution, Weighted, WeightedChoice}; -use rand::thread_rng; -use result::Result; -use solana_sdk::pubkey::Pubkey; -use std; -use std::collections::HashMap; - -pub const DEFAULT_WEIGHT: u32 = 1; - -pub trait ChooseGossipPeerStrategy { - fn choose_peer<'a>(&self, options: Vec<&'a NodeInfo>) -> Result<&'a NodeInfo>; -} - -pub struct ChooseRandomPeerStrategy<'a> { - random: &'a Fn() -> u64, -} - -// Given a source of randomness "random", this strategy will randomly pick a validator -// from the input options. This strategy works in isolation, but doesn't leverage any -// rumors from the rest of the gossip network to make more informed decisions about -// which validators have more/less updates -impl<'a, 'b> ChooseRandomPeerStrategy<'a> { - pub fn new(random: &'a Fn() -> u64) -> Self { - ChooseRandomPeerStrategy { random } - } -} - -impl<'a> ChooseGossipPeerStrategy for ChooseRandomPeerStrategy<'a> { - fn choose_peer<'b>(&self, options: Vec<&'b NodeInfo>) -> Result<&'b NodeInfo> { - if options.is_empty() { - Err(ClusterInfoError::NoPeers)?; - } - - let n = ((self.random)() as usize) % options.len(); - Ok(options[n]) - } -} - -// This strategy uses rumors accumulated from the rest of the network to weight -// the importance of communicating with a particular validator based on cumulative network -// perceiption of the number of updates the validator has to offer. A validator is randomly -// picked based on a weighted sample from the pool of viable choices. The "weight", w, of a -// particular validator "v" is calculated as follows: -// -// w = [Sum for all i in I_v: (rumor_v(i) - observed(v)) * stake(i)] / -// [Sum for all i in I_v: Sum(stake(i))] -// -// where I_v is the set of all validators that returned a rumor about the update_index of -// validator "v", stake(i) is the size of the stake of validator "i", observed(v) is the -// observed update_index from the last direct communication validator "v", and -// rumor_v(i) is the rumored update_index of validator "v" propagated by fellow validator "i". - -// This could be a problem if there are validators with large stakes lying about their -// observed updates. There could also be a problem in network partitions, or even just -// when certain validators are disproportionately active, where we hear more rumors about -// certain clusters of nodes that then propagate more rumros about each other. Hopefully -// this can be resolved with a good baseline DEFAULT_WEIGHT, or by implementing lockout -// periods for very active validators in the future. - -pub struct ChooseWeightedPeerStrategy<'a> { - // The map of last directly observed update_index for each active validator. - // This is how we get observed(v) from the formula above. - remote: &'a HashMap, - // The map of rumored update_index for each active validator. Using the formula above, - // to find rumor_v(i), we would first look up "v" in the outer map, then look up - // "i" in the inner map, i.e. look up external_liveness[v][i] - external_liveness: &'a HashMap>, - // A function returning the size of the stake for a particular validator, corresponds - // to stake(i) in the formula above. - get_stake: &'a Fn(Pubkey) -> f64, -} - -impl<'a> ChooseWeightedPeerStrategy<'a> { - pub fn new( - remote: &'a HashMap, - external_liveness: &'a HashMap>, - get_stake: &'a Fn(Pubkey) -> f64, - ) -> Self { - ChooseWeightedPeerStrategy { - remote, - external_liveness, - get_stake, - } - } - - fn calculate_weighted_remote_index(&self, peer_id: Pubkey) -> u32 { - let mut last_seen_index = 0; - // If the peer is not in our remote table, then we leave last_seen_index as zero. - // Only happens when a peer appears in our cluster_info.table but not in our cluster_info.remote, - // which means a validator was directly injected into our cluster_info.table - if let Some(index) = self.remote.get(&peer_id) { - last_seen_index = *index; - } - - let liveness_entry = self.external_liveness.get(&peer_id); - if liveness_entry.is_none() { - return DEFAULT_WEIGHT; - } - - let votes = liveness_entry.unwrap(); - - if votes.is_empty() { - return DEFAULT_WEIGHT; - } - - // Calculate the weighted average of the rumors - let mut relevant_votes = vec![]; - - let total_stake = votes.iter().fold(0.0, |total_stake, (&id, &vote)| { - let stake = (self.get_stake)(id); - // If the total stake is going to overflow u64, pick - // the larger of either the current total_stake, or the - // new stake, this way we are guaranteed to get at least u64/2 - // sample of stake in our weighted calculation - if std::f64::MAX - total_stake < stake { - if stake > total_stake { - relevant_votes = vec![(stake, vote)]; - stake - } else { - total_stake - } - } else { - relevant_votes.push((stake, vote)); - total_stake + stake - } - }); - - let weighted_vote = relevant_votes.iter().fold(0.0, |sum, &(stake, vote)| { - if vote < last_seen_index { - // This should never happen because we maintain the invariant that the indexes - // in the external_liveness table are always greater than the corresponding - // indexes in the remote table, if the index exists in the remote table at all. - - // Case 1: Attempt to insert bigger index into the "external_liveness" table - // happens after an insertion into the "remote" table. In this case, - // (see apply_updates()) function, we prevent the insertion if the entry - // in the remote table >= the atempted insertion into the "external" liveness - // table. - - // Case 2: Bigger index in the "external_liveness" table inserted before - // a smaller insertion into the "remote" table. We clear the corresponding - // "external_liveness" table entry on all insertions into the "remote" table - // See apply_updates() function. - - warn!("weighted peer index was smaller than local entry in remote table"); - return sum; - } - - let vote_difference = (vote - last_seen_index) as f64; - let new_weight = vote_difference * (stake / total_stake); - - if std::f64::MAX - sum < new_weight { - return f64::max(new_weight, sum); - } - - sum + new_weight - }); - - // Return u32 b/c the weighted sampling API from rand::distributions - // only takes u32 for weights - if weighted_vote >= f64::from(std::u32::MAX) { - return std::u32::MAX; - } - - // If the weighted rumors we've heard about aren't any greater than - // what we've directly learned from the last time we communicated with the - // peer (i.e. weighted_vote == 0), then return a weight of 1. - // Otherwise, return the calculated weight. - weighted_vote as u32 + DEFAULT_WEIGHT - } -} - -impl<'a> ChooseGossipPeerStrategy for ChooseWeightedPeerStrategy<'a> { - fn choose_peer<'b>(&self, options: Vec<&'b NodeInfo>) -> Result<&'b NodeInfo> { - if options.is_empty() { - Err(ClusterInfoError::NoPeers)?; - } - - let mut weighted_peers = vec![]; - for peer in options { - let weight = self.calculate_weighted_remote_index(peer.id); - weighted_peers.push(Weighted { weight, item: peer }); - } - - let mut rng = thread_rng(); - Ok(WeightedChoice::new(&mut weighted_peers).sample(&mut rng)) - } -} - -#[cfg(test)] -mod tests { - use choose_gossip_peer_strategy::{ChooseWeightedPeerStrategy, DEFAULT_WEIGHT}; - use logger; - use signature::{Keypair, KeypairUtil}; - use solana_sdk::pubkey::Pubkey; - use std; - use std::collections::HashMap; - - fn get_stake(_id: Pubkey) -> f64 { - 1.0 - } - - #[test] - fn test_default() { - logger::setup(); - - // Initialize the filler keys - let key1 = Keypair::new().pubkey(); - - let remote: HashMap = HashMap::new(); - let external_liveness: HashMap> = HashMap::new(); - - let weighted_strategy = - ChooseWeightedPeerStrategy::new(&remote, &external_liveness, &get_stake); - - // If external_liveness table doesn't contain this entry, - // return the default weight - let result = weighted_strategy.calculate_weighted_remote_index(key1); - assert_eq!(result, DEFAULT_WEIGHT); - } - - #[test] - fn test_only_external_liveness() { - logger::setup(); - - // Initialize the filler keys - let key1 = Keypair::new().pubkey(); - let key2 = Keypair::new().pubkey(); - - let remote: HashMap = HashMap::new(); - let mut external_liveness: HashMap> = HashMap::new(); - - // If only the liveness table contains the entry, should return the - // weighted liveness entries - let test_value: u32 = 5; - let mut rumors: HashMap = HashMap::new(); - rumors.insert(key2, test_value as u64); - external_liveness.insert(key1, rumors); - - let weighted_strategy = - ChooseWeightedPeerStrategy::new(&remote, &external_liveness, &get_stake); - - let result = weighted_strategy.calculate_weighted_remote_index(key1); - assert_eq!(result, test_value + DEFAULT_WEIGHT); - } - - #[test] - fn test_overflow_votes() { - logger::setup(); - - // Initialize the filler keys - let key1 = Keypair::new().pubkey(); - let key2 = Keypair::new().pubkey(); - - let remote: HashMap = HashMap::new(); - let mut external_liveness: HashMap> = HashMap::new(); - - // If the vote index is greater than u32::MAX, default to u32::MAX - let test_value = (std::u32::MAX as u64) + 10; - let mut rumors: HashMap = HashMap::new(); - rumors.insert(key2, test_value); - external_liveness.insert(key1, rumors); - - let weighted_strategy = - ChooseWeightedPeerStrategy::new(&remote, &external_liveness, &get_stake); - - let result = weighted_strategy.calculate_weighted_remote_index(key1); - assert_eq!(result, std::u32::MAX); - } - - #[test] - fn test_many_validators() { - logger::setup(); - - // Initialize the filler keys - let key1 = Keypair::new().pubkey(); - - let mut remote: HashMap = HashMap::new(); - let mut external_liveness: HashMap> = HashMap::new(); - - // Test many validators' rumors in external_liveness - let num_peers = 10; - let mut rumors: HashMap = HashMap::new(); - - remote.insert(key1, 0); - - for i in 0..num_peers { - let pubkey = Keypair::new().pubkey(); - rumors.insert(pubkey, i); - } - - external_liveness.insert(key1, rumors); - - let weighted_strategy = - ChooseWeightedPeerStrategy::new(&remote, &external_liveness, &get_stake); - - let result = weighted_strategy.calculate_weighted_remote_index(key1); - assert_eq!(result, (num_peers / 2) as u32); - } - - #[test] - fn test_many_validators2() { - logger::setup(); - - // Initialize the filler keys - let key1 = Keypair::new().pubkey(); - - let mut remote: HashMap = HashMap::new(); - let mut external_liveness: HashMap> = HashMap::new(); - - // Test many validators' rumors in external_liveness - let num_peers = 10; - let old_index = 20; - let mut rumors: HashMap = HashMap::new(); - - remote.insert(key1, old_index); - - for _i in 0..num_peers { - let pubkey = Keypair::new().pubkey(); - rumors.insert(pubkey, old_index); - } - - external_liveness.insert(key1, rumors); - - let weighted_strategy = - ChooseWeightedPeerStrategy::new(&remote, &external_liveness, &get_stake); - - let result = weighted_strategy.calculate_weighted_remote_index(key1); - - // If nobody has seen a newer update then revert to default - assert_eq!(result, DEFAULT_WEIGHT); - } -} diff --git a/src/client.rs b/src/client.rs index 2bec75471d..7d278eb3f5 100644 --- a/src/client.rs +++ b/src/client.rs @@ -4,6 +4,5 @@ use thin_client::ThinClient; pub fn mk_client(r: &NodeInfo) -> ThinClient { let (_, transactions_socket) = bind_in_range(FULLNODE_PORT_RANGE).unwrap(); - - ThinClient::new(r.contact_info.rpc, r.contact_info.tpu, transactions_socket) + ThinClient::new(r.rpc, r.tpu, transactions_socket) } diff --git a/src/cluster_info.rs b/src/cluster_info.rs index 706732005b..6d8f1189f2 100644 --- a/src/cluster_info.rs +++ b/src/cluster_info.rs @@ -12,9 +12,13 @@ //! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. //! //! Bank needs to provide an interface for us to query the stake weight -use bincode::{deserialize, serialize, serialized_size}; -use choose_gossip_peer_strategy::{ChooseGossipPeerStrategy, ChooseWeightedPeerStrategy}; +use bincode::{deserialize, serialize}; +use bloom::Bloom; +use contact_info::ContactInfo; use counter::Counter; +use crds_gossip::CrdsGossip; +use crds_gossip_pull::CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS; +use crds_value::{CrdsValue, CrdsValueLabel, LeaderId}; use hash::Hash; use ledger::LedgerWindow; use log::Level; @@ -22,11 +26,10 @@ use netutil::{bind_in_range, bind_to, find_available_port_in_range, multi_bind_i use packet::{to_blob, Blob, SharedBlob, BLOB_SIZE}; use rand::{thread_rng, Rng}; use rayon::prelude::*; -use result::{Error, Result}; +use result::Result; use rpc::RPC_PORT; use signature::{Keypair, KeypairUtil}; use solana_sdk::pubkey::Pubkey; -use std; use std::collections::HashMap; use std::io; use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket}; @@ -38,31 +41,12 @@ use streamer::{BlobReceiver, BlobSender}; use timing::{duration_as_ms, timestamp}; use window::{SharedWindow, WindowIndex}; +pub type NodeInfo = ContactInfo; + pub const FULLNODE_PORT_RANGE: (u16, u16) = (8000, 10_000); /// milliseconds we sleep for between gossip requests const GOSSIP_SLEEP_MILLIS: u64 = 100; -const GOSSIP_PURGE_MILLIS: u64 = 15000; - -/// minimum membership table size before we start purging dead nodes -const MIN_TABLE_SIZE: usize = 2; - -#[macro_export] -macro_rules! socketaddr { - ($ip:expr, $port:expr) => { - SocketAddr::from((Ipv4Addr::from($ip), $port)) - }; - ($str:expr) => {{ - let a: SocketAddr = $str.parse().unwrap(); - a - }}; -} -#[macro_export] -macro_rules! socketaddr_any { - () => { - socketaddr!(0, 0) - }; -} #[derive(Debug, PartialEq, Eq)] pub enum ClusterInfoError { @@ -73,246 +57,97 @@ pub enum ClusterInfoError { BadGossipAddress, } -/// Structure to be replicated by the network -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] -pub struct ContactInfo { - /// gossip address - pub ncp: SocketAddr, - /// address to connect to for replication - pub tvu: SocketAddr, - /// transactions address - pub tpu: SocketAddr, - /// storage data address - pub storage_addr: SocketAddr, - /// address to which to send JSON-RPC requests - pub rpc: SocketAddr, - /// websocket for JSON-RPC push notifications - pub rpc_pubsub: SocketAddr, - /// if this struture changes update this value as well - /// Always update `NodeInfo` version too - /// This separate version for addresses allows us to use the `Vote` - /// as means of updating the `NodeInfo` table without touching the - /// addresses if they haven't changed. - pub version: u64, -} - -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] -pub struct LedgerState { - /// last verified hash that was submitted to the leader - pub last_id: Hash, -} - -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] -pub struct NodeInfo { - pub id: Pubkey, - /// If any of the bits change, update increment this value - pub version: u64, - /// network addresses - pub contact_info: ContactInfo, - /// current leader identity - pub leader_id: Pubkey, -} - -impl NodeInfo { - pub fn new( - id: Pubkey, - ncp: SocketAddr, - tvu: SocketAddr, - tpu: SocketAddr, - storage_addr: SocketAddr, - rpc: SocketAddr, - rpc_pubsub: SocketAddr, - ) -> Self { - NodeInfo { - id, - version: 0, - contact_info: ContactInfo { - ncp, - tvu, - tpu, - storage_addr, - rpc, - rpc_pubsub, - version: 0, - }, - leader_id: Pubkey::default(), - } - } - - pub fn new_localhost(id: Pubkey) -> Self { - Self::new( - id, - socketaddr!("127.0.0.1:1234"), - socketaddr!("127.0.0.1:1235"), - socketaddr!("127.0.0.1:1236"), - socketaddr!("127.0.0.1:1237"), - socketaddr!("127.0.0.1:1238"), - socketaddr!("127.0.0.1:1239"), - ) - } - - #[cfg(test)] - /// NodeInfo with unspecified addresses for adversarial testing. - pub fn new_unspecified() -> Self { - let addr = socketaddr!(0, 0); - assert!(addr.ip().is_unspecified()); - Self::new(Keypair::new().pubkey(), addr, addr, addr, addr, addr, addr) - } - #[cfg(test)] - /// NodeInfo with multicast addresses for adversarial testing. - pub fn new_multicast() -> Self { - let addr = socketaddr!("224.0.1.255:1000"); - assert!(addr.ip().is_multicast()); - Self::new(Keypair::new().pubkey(), addr, addr, addr, addr, addr, addr) - } - fn next_port(addr: &SocketAddr, nxt: u16) -> SocketAddr { - let mut nxt_addr = *addr; - nxt_addr.set_port(addr.port() + nxt); - nxt_addr - } - pub fn new_with_pubkey_socketaddr(pubkey: Pubkey, bind_addr: &SocketAddr) -> Self { - let transactions_addr = *bind_addr; - let gossip_addr = Self::next_port(&bind_addr, 1); - let replicate_addr = Self::next_port(&bind_addr, 2); - let rpc_addr = SocketAddr::new(bind_addr.ip(), RPC_PORT); - let rpc_pubsub_addr = SocketAddr::new(bind_addr.ip(), RPC_PORT + 1); - NodeInfo::new( - pubkey, - gossip_addr, - replicate_addr, - transactions_addr, - "0.0.0.0:0".parse().unwrap(), - rpc_addr, - rpc_pubsub_addr, - ) - } - pub fn new_with_socketaddr(bind_addr: &SocketAddr) -> Self { - let keypair = Keypair::new(); - Self::new_with_pubkey_socketaddr(keypair.pubkey(), bind_addr) - } - // - pub fn new_entry_point(gossip_addr: &SocketAddr) -> Self { - let daddr: SocketAddr = socketaddr!("0.0.0.0:0"); - NodeInfo::new( - Pubkey::default(), - *gossip_addr, - daddr, - daddr, - daddr, - daddr, - daddr, - ) - } -} - -/// `ClusterInfo` structure keeps a table of `NodeInfo` structs -/// # Properties -/// * `table` - map of public id's to versioned and signed NodeInfo structs -/// * `local` - map of public id's to what `self.update_index` `self.table` was updated -/// * `remote` - map of public id's to the `remote.update_index` was sent -/// * `update_index` - my update index -/// # Remarks -/// This implements two services, `gossip` and `listen`. -/// * `gossip` - asynchronously ask nodes to send updates -/// * `listen` - listen for requests and responses -/// No attempt to keep track of timeouts or dropped requests is made, or should be. pub struct ClusterInfo { - /// table of everyone in the network - pub table: HashMap, - /// Value of my update index when entry in table was updated. - /// Nodes will ask for updates since `update_index`, and this node - /// should respond with all the identities that are greater then the - /// request's `update_index` in this list - local: HashMap, - /// The value of the remote update index that I have last seen - /// This Node will ask external nodes for updates since the value in this list - pub remote: HashMap, - /// last time the public key had sent us a message - pub alive: HashMap, - pub update_index: u64, - pub id: Pubkey, - /// last time we heard from anyone getting a message fro this public key - /// these are rumers and shouldn't be trusted directly - external_liveness: HashMap>, + /// The network + pub gossip: CrdsGossip, } // TODO These messages should be signed, and go through the gpu pipeline for spam filtering #[derive(Serialize, Deserialize, Debug)] #[cfg_attr(feature = "cargo-clippy", allow(large_enum_variant))] enum Protocol { - /// forward your own latest data structure when requesting an update - /// this doesn't update the `remote` update index, but it allows the - /// recepient of this request to add knowledge of this node to the network - /// (last update index i saw from you, my replicated data) - RequestUpdates(u64, NodeInfo), - //TODO might need a since? - /// * from - from id, - /// * max_update_index - from's max update index in the response - /// * nodes - (NodeInfo, remote_last_update) vector - ReceiveUpdates { - from: Pubkey, - max_update_index: u64, - nodes: Vec<(NodeInfo, u64)>, - }, - /// ask for a missing index - /// (my replicated data to keep alive, missing window index) + /// Gosisp protocol messages + PullRequest(Bloom, CrdsValue), + PullResponse(Pubkey, Vec), + PushMessage(Pubkey, Vec), + PruneMessage(Pubkey, Vec), + + /// Window protocol messages + /// TODO: move this message to a different module RequestWindowIndex(NodeInfo, u64), } impl ClusterInfo { pub fn new(node_info: NodeInfo) -> Result { - if node_info.version != 0 { - return Err(Error::ClusterInfoError(ClusterInfoError::BadNodeInfo)); - } let mut me = ClusterInfo { - table: HashMap::new(), - local: HashMap::new(), - remote: HashMap::new(), - alive: HashMap::new(), - external_liveness: HashMap::new(), - id: node_info.id, - update_index: 1, + gossip: CrdsGossip::default(), }; - me.local.insert(node_info.id, me.update_index); - me.table.insert(node_info.id, node_info); + let id = node_info.id; + me.gossip.set_self(id); + me.insert_info(node_info); + me.push_self(); Ok(me) } - pub fn my_data(&self) -> &NodeInfo { - &self.table[&self.id] + pub fn push_self(&mut self) { + let mut my_data = self.my_data(); + let now = timestamp(); + my_data.wallclock = now; + let entry = CrdsValue::ContactInfo(my_data); + self.gossip.refresh_push_active_set(); + self.gossip.process_push_message(&[entry], now); + } + pub fn insert_info(&mut self, node_info: NodeInfo) { + let value = CrdsValue::ContactInfo(node_info); + let _ = self.gossip.crds.insert(value, timestamp()); + } + pub fn id(&self) -> Pubkey { + self.gossip.id + } + pub fn lookup(&self, id: Pubkey) -> Option<&NodeInfo> { + let entry = CrdsValueLabel::ContactInfo(id); + self.gossip + .crds + .lookup(&entry) + .and_then(|x| x.contact_info()) + } + pub fn my_data(&self) -> NodeInfo { + self.lookup(self.id()).cloned().unwrap() + } + pub fn leader_id(&self) -> Pubkey { + let entry = CrdsValueLabel::LeaderId(self.id()); + self.gossip + .crds + .lookup(&entry) + .and_then(|v| v.leader_id()) + .map(|x| x.leader_id) + .unwrap_or_default() } pub fn leader_data(&self) -> Option<&NodeInfo> { - let leader_id = self.table[&self.id].leader_id; - - // leader_id can be 0s from network entry point + let leader_id = self.leader_id(); if leader_id == Pubkey::default() { return None; } - - self.table.get(&leader_id) + self.lookup(leader_id) } - pub fn node_info_trace(&self) -> String { - let leader_id = self.table[&self.id].leader_id; - + let leader_id = self.leader_id(); let nodes: Vec<_> = self - .table - .values() - .filter(|n| Self::is_valid_address(&n.contact_info.rpc)) - .cloned() + .rpc_peers() + .into_iter() .map(|node| { format!( " ncp: {:20} | {}{}\n \ tpu: {:20} |\n \ rpc: {:20} |\n", - node.contact_info.ncp.to_string(), + node.ncp.to_string(), node.id, if node.id == leader_id { " <==== leader" } else { "" }, - node.contact_info.tpu.to_string(), - node.contact_info.rpc.to_string() + node.tpu.to_string(), + node.rpc.to_string() ) }).collect(); @@ -327,135 +162,77 @@ impl ClusterInfo { } pub fn set_leader(&mut self, key: Pubkey) -> () { - let mut me = self.my_data().clone(); - warn!("{}: LEADER_UPDATE TO {} from {}", me.id, key, me.leader_id); - me.leader_id = key; - me.version += 1; - self.insert(&me); + let prev = self.leader_id(); + let self_id = self.gossip.id; + let now = timestamp(); + let leader = LeaderId { + id: self_id, + leader_id: key, + wallclock: now, + }; + let entry = CrdsValue::LeaderId(leader); + warn!("{}: LEADER_UPDATE TO {} from {}", self_id, key, prev); + self.gossip.process_push_message(&[entry], now); } - pub fn get_valid_peers(&self) -> Vec { + pub fn purge(&mut self, now: u64) { + self.gossip.purge(now); + } + pub fn convergence(&self) -> usize { + self.ncp_peers().len() + 1 + } + pub fn rpc_peers(&self) -> Vec { let me = self.my_data().id; - self.table + self.gossip + .crds + .table .values() + .filter_map(|x| x.value.contact_info()) .filter(|x| x.id != me) - .filter(|x| ClusterInfo::is_valid_address(&x.contact_info.rpc)) + .filter(|x| ClusterInfo::is_valid_address(&x.rpc)) .cloned() .collect() } - pub fn get_external_liveness_entry(&self, key: &Pubkey) -> Option<&HashMap> { - self.external_liveness.get(key) - } - - pub fn insert(&mut self, v: &NodeInfo) -> usize { - // TODO check that last_verified types are always increasing - // update the peer table - if self.table.get(&v.id).is_none() || (v.version > self.table[&v.id].version) { - //somehow we signed a message for our own identity with a higher version than - // we have stored ourselves - trace!("{}: insert v.id: {} version: {}", self.id, v.id, v.version); - if self.table.get(&v.id).is_none() { - inc_new_counter_info!("cluster_info-insert-new_entry", 1, 1); - } - - self.update_index += 1; - let _ = self.table.insert(v.id, v.clone()); - let _ = self.local.insert(v.id, self.update_index); - self.update_liveness(v.id); - 1 - } else { - trace!( - "{}: INSERT FAILED data: {} new.version: {} me.version: {}", - self.id, - v.id, - v.version, - self.table[&v.id].version - ); - 0 - } - } - - fn update_liveness(&mut self, id: Pubkey) { - //update the liveness table - let now = timestamp(); - trace!("{} updating liveness {} to {}", self.id, id, now); - *self.alive.entry(id).or_insert(now) = now; - } - /// purge old validators - /// TODO: we need a robust membership protocol - /// http://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf - /// challenging part is that we are on a permissionless network - pub fn purge(&mut self, now: u64) { - if self.table.len() <= MIN_TABLE_SIZE { - trace!("purge: skipped: table too small: {}", self.table.len()); - return; - } - if self.leader_data().is_none() { - trace!("purge: skipped: no leader_data"); - return; - } - let leader_id = self.leader_data().unwrap().id; - let limit = GOSSIP_PURGE_MILLIS; - let dead_ids: Vec = self - .alive - .iter() - .filter_map(|(&k, v)| { - if k != self.id && (now - v) > limit { - Some(k) - } else { - trace!("{} purge skipped {} {} {}", self.id, k, now - v, limit); - None - } - }).collect(); - - inc_new_counter_info!("cluster_info-purge-count", dead_ids.len()); - - for id in &dead_ids { - self.alive.remove(id); - self.table.remove(id); - self.remote.remove(id); - self.local.remove(id); - self.external_liveness.remove(id); - info!("{}: PURGE {}", self.id, id); - for map in self.external_liveness.values_mut() { - map.remove(id); - } - if *id == leader_id { - info!("{}: PURGE LEADER {}", self.id, id,); - inc_new_counter_info!("cluster_info-purge-purged_leader", 1, 1); - } - } + pub fn ncp_peers(&self) -> Vec { + let me = self.my_data().id; + self.gossip + .crds + .table + .values() + .filter_map(|x| x.value.contact_info()) + .filter(|x| x.id != me) + .filter(|x| ClusterInfo::is_valid_address(&x.ncp)) + .cloned() + .collect() } /// compute broadcast table - /// # Remarks - pub fn compute_broadcast_table(&self) -> Vec { - let live: Vec<_> = self.alive.iter().collect(); - //thread_rng().shuffle(&mut live); - let me = &self.table[&self.id]; - let cloned_table: Vec = live - .iter() - .map(|x| &self.table[x.0]) - .filter(|v| { - if me.id == v.id { - //filter myself - false - } else if !(Self::is_valid_address(&v.contact_info.tvu)) { - trace!( - "{}:broadcast skip not listening {} {}", - me.id, - v.id, - v.contact_info.tvu, - ); - false - } else { - trace!("{}:broadcast node {} {}", me.id, v.id, v.contact_info.tvu); - true - } - }).cloned() - .collect(); - cloned_table + pub fn tvu_peers(&self) -> Vec { + let me = self.my_data().id; + self.gossip + .crds + .table + .values() + .filter_map(|x| x.value.contact_info()) + .filter(|x| x.id != me) + .filter(|x| ClusterInfo::is_valid_address(&x.tvu)) + .cloned() + .collect() + } + + /// compute broadcast table + pub fn tpu_peers(&self) -> Vec { + let me = self.my_data().id; + self.gossip + .crds + .table + .values() + .filter_map(|x| x.value.contact_info()) + .filter(|x| x.id != me) + .filter(|x| ClusterInfo::is_valid_address(&x.tpu)) + .cloned() + .collect() } /// broadcast messages from the leader to layer 1 nodes @@ -463,6 +240,7 @@ impl ClusterInfo { /// We need to avoid having obj locked while doing any io, such as the `send_to` pub fn broadcast( contains_last_tick: bool, + leader_id: Pubkey, me: &NodeInfo, broadcast_table: &[NodeInfo], window: &SharedWindow, @@ -495,7 +273,7 @@ impl ClusterInfo { ); trace!("broadcast orders table {}", orders.len()); - let errs = Self::send_orders(s, orders, me); + let errs = Self::send_orders(s, orders, me, leader_id); for e in errs { if let Err(e) = &e { @@ -519,36 +297,16 @@ impl ClusterInfo { /// # Remarks /// We need to avoid having obj locked while doing any io, such as the `send_to` pub fn retransmit(obj: &Arc>, blob: &SharedBlob, s: &UdpSocket) -> Result<()> { - let (me, table): (NodeInfo, Vec) = { + let (me, orders): (NodeInfo, Vec) = { // copy to avoid locking during IO let s = obj.read().expect("'obj' read lock in pub fn retransmit"); - (s.my_data().clone(), s.table.values().cloned().collect()) + (s.my_data().clone(), s.tvu_peers()) }; blob.write() .unwrap() .set_id(&me.id) .expect("set_id in pub fn retransmit"); let rblob = blob.read().unwrap(); - let orders: Vec<_> = table - .iter() - .filter(|v| { - if me.id == v.id { - trace!("skip retransmit to self {:?}", v.id); - false - } else if me.leader_id == v.id { - trace!("skip retransmit to leader {:?}", v.id); - false - } else if !(Self::is_valid_address(&v.contact_info.tvu)) { - trace!( - "skip nodes that are not listening {:?} {}", - v.id, - v.contact_info.tvu - ); - false - } else { - true - } - }).collect(); trace!("retransmit orders {}", orders.len()); let errs: Vec<_> = orders .par_iter() @@ -558,11 +316,11 @@ impl ClusterInfo { me.id, rblob.index().unwrap(), v.id, - v.contact_info.tvu, + v.tvu, ); //TODO profile this, may need multiple sockets for par_iter assert!(rblob.meta.size <= BLOB_SIZE); - s.send_to(&rblob.data[..rblob.meta.size], &v.contact_info.tvu) + s.send_to(&rblob.data[..rblob.meta.size], &v.tvu) }).collect(); for e in errs { if let Err(e) = &e { @@ -574,28 +332,23 @@ impl ClusterInfo { Ok(()) } - // max number of nodes that we could be converged to - pub fn convergence(&self) -> u64 { - let max = self.remote.values().len() as u64 + 1; - self.remote.values().fold(max, |a, b| std::cmp::min(a, *b)) - } - fn send_orders( s: &UdpSocket, orders: Vec<(Option, Vec<&NodeInfo>)>, me: &NodeInfo, + leader_id: Pubkey, ) -> Vec> { orders .into_iter() .flat_map(|(b, vs)| { // only leader should be broadcasting - assert!(vs.iter().find(|info| info.id == me.leader_id).is_none()); + assert!(vs.iter().find(|info| info.id == leader_id).is_none()); let bl = b.unwrap(); let blob = bl.read().unwrap(); //TODO profile this, may need multiple sockets for par_iter let ids_and_tvus = if log_enabled!(Level::Trace) { let v_ids = vs.iter().map(|v| v.id); - let tvus = vs.iter().map(|v| v.contact_info.tvu); + let tvus = vs.iter().map(|v| v.tvu); let ids_and_tvus = v_ids.zip(tvus).collect(); trace!( @@ -616,7 +369,7 @@ impl ClusterInfo { let send_errs_for_blob: Vec<_> = vs .iter() .map(move |v| { - let e = s.send_to(&blob.data[..blob.meta.size], &v.contact_info.tvu); + let e = s.send_to(&blob.data[..blob.meta.size], &v.tvu); trace!( "{}: done broadcast {} to {:?}", me.id, @@ -700,136 +453,82 @@ impl ClusterInfo { orders } - // TODO: fill in with real implmentation once staking is implemented - fn get_stake(_id: Pubkey) -> f64 { - 1.0 - } - - fn max_updates(max_bytes: usize) -> usize { - let unit = (NodeInfo::new_localhost(Default::default()), 0); - let unit_size = serialized_size(&unit).unwrap(); - let msg = Protocol::ReceiveUpdates { - from: Default::default(), - max_update_index: 0, - nodes: vec![unit], - }; - let msg_size = serialized_size(&msg).unwrap(); - ((max_bytes - (msg_size as usize)) / (unit_size as usize)) + 1 - } - // Get updated node since v up to a maximum of `max_bytes` updates - fn get_updates_since(&self, v: u64, max: usize) -> (Pubkey, u64, Vec<(NodeInfo, u64)>) { - let nodes: Vec<_> = self - .table - .values() - .filter(|x| x.id != Pubkey::default() && self.local[&x.id] > v) - .cloned() - .collect(); - let liveness: Vec = nodes - .iter() - .map(|d| *self.remote.get(&d.id).unwrap_or(&0)) - .collect(); - let updates: Vec = nodes.iter().map(|d| self.local[&d.id]).collect(); - trace!("{:?}", updates); - let id = self.id; - let mut out: Vec<(u64, (NodeInfo, u64))> = updates - .into_iter() - .zip(nodes.into_iter().zip(liveness)) - .collect(); - out.sort_by_key(|k| k.0); - let last_node = std::cmp::max(1, std::cmp::min(out.len(), max)) - 1; - let max_updated_node = out.get(last_node).map(|x| x.0).unwrap_or(0); - let updated_data: Vec<(NodeInfo, u64)> = out.into_iter().take(max).map(|x| x.1).collect(); - - trace!("get updates since response {} {}", v, updated_data.len()); - (id, max_updated_node, updated_data) - } - pub fn window_index_request(&self, ix: u64) -> Result<(SocketAddr, Vec)> { // find a peer that appears to be accepting replication, as indicated // by a valid tvu port location - let valid: Vec<_> = self - .table - .values() - .filter(|r| r.id != self.id && Self::is_valid_address(&r.contact_info.tvu)) - .collect(); + let valid: Vec<_> = self.tvu_peers(); if valid.is_empty() { Err(ClusterInfoError::NoPeers)?; } let n = thread_rng().gen::() % valid.len(); - let addr = valid[n].contact_info.ncp; // send the request to the peer's gossip port + let addr = valid[n].ncp; // send the request to the peer's gossip port let req = Protocol::RequestWindowIndex(self.my_data().clone(), ix); let out = serialize(&req)?; Ok((addr, out)) } + fn new_pull_requests(&mut self) -> Vec<(SocketAddr, Protocol)> { + let now = timestamp(); + let pulls: Vec<_> = self.gossip.new_pull_request(now).ok().into_iter().collect(); - /// Create a random gossip request - /// # Returns - /// (A,B) - /// * A - Address to send to - /// * B - RequestUpdates protocol message - fn gossip_request(&self) -> Result<(SocketAddr, Protocol)> { - let options: Vec<_> = self - .table - .values() - .filter(|v| { - v.id != self.id - && !v.contact_info.ncp.ip().is_unspecified() - && !v.contact_info.ncp.ip().is_multicast() + let pr: Vec<_> = pulls + .into_iter() + .filter_map(|(peer, filter, self_info)| { + let peer_label = CrdsValueLabel::ContactInfo(peer); + self.gossip + .crds + .lookup(&peer_label) + .and_then(|v| v.contact_info()) + .map(|peer_info| (peer, filter, peer_info.ncp, self_info)) }).collect(); + pr.into_iter() + .map(|(peer, filter, ncp, self_info)| { + self.gossip.mark_pull_request_creation_time(peer, now); + (ncp, Protocol::PullRequest(filter, self_info)) + }).collect() + } + fn new_push_requests(&mut self) -> Vec<(SocketAddr, Protocol)> { + let self_id = self.gossip.id; + let (_, peers, msgs) = self.gossip.new_push_messages(timestamp()); + peers + .into_iter() + .filter_map(|p| { + let peer_label = CrdsValueLabel::ContactInfo(p); + self.gossip + .crds + .lookup(&peer_label) + .and_then(|v| v.contact_info()) + .map(|p| p.ncp) + }).map(|peer| (peer, Protocol::PushMessage(self_id, msgs.clone()))) + .collect() + } - let choose_peer_strategy = ChooseWeightedPeerStrategy::new( - &self.remote, - &self.external_liveness, - &Self::get_stake, - ); - - let choose_peer_result = choose_peer_strategy.choose_peer(options); - - if let Err(Error::ClusterInfoError(ClusterInfoError::NoPeers)) = &choose_peer_result { - trace!( - "cluster_info too small for gossip {} {}", - self.id, - self.table.len() - ); - }; - let v = choose_peer_result?; - - let remote_update_index = *self.remote.get(&v.id).unwrap_or(&0); - let req = Protocol::RequestUpdates(remote_update_index, self.my_data().clone()); - trace!( - "created gossip request from {} {:?} to {} {}", - self.id, - self.my_data(), - v.id, - v.contact_info.ncp - ); - - Ok((v.contact_info.ncp, req)) + fn gossip_request(&mut self) -> Vec<(SocketAddr, Protocol)> { + let pulls: Vec<_> = self.new_pull_requests(); + let pushes: Vec<_> = self.new_push_requests(); + vec![pulls, pushes].into_iter().flat_map(|x| x).collect() } /// At random pick a node and try to get updated changes from them fn run_gossip(obj: &Arc>, blob_sender: &BlobSender) -> Result<()> { - //TODO we need to keep track of stakes and weight the selection by stake size - //TODO cache sockets - - // Lock the object only to do this operation and not for any longer - // especially not when doing the `sock.send_to` - let (remote_gossip_addr, req) = obj - .read() - .expect("'obj' read lock in fn run_gossip") - .gossip_request()?; - - // TODO this will get chatty, so we need to first ask for number of updates since - // then only ask for specific data that we dont have - let blob = to_blob(req, remote_gossip_addr)?; - blob_sender.send(vec![blob])?; + let reqs = obj.write().unwrap().gossip_request(); + let blobs = reqs + .into_iter() + .filter_map(|(remote_gossip_addr, req)| to_blob(req, remote_gossip_addr).ok()) + .collect(); + blob_sender.send(blobs)?; Ok(()) } pub fn get_gossip_top_leader(&self) -> Option<&NodeInfo> { let mut table = HashMap::new(); let def = Pubkey::default(); - let cur = self.table.values().filter(|x| x.leader_id != def); + let cur = self + .gossip + .crds + .table + .values() + .filter_map(|x| x.value.leader_id()) + .filter(|x| x.leader_id != def); for v in cur { let cnt = table.entry(&v.leader_id).or_insert(0); *cnt += 1; @@ -837,60 +536,16 @@ impl ClusterInfo { } let mut sorted: Vec<(&Pubkey, usize)> = table.into_iter().collect(); for x in &sorted { - trace!("{}: sorted leaders {} votes: {}", self.id, x.0, x.1); + trace!("{}: sorted leaders {} votes: {}", self.gossip.id, x.0, x.1); } sorted.sort_by_key(|a| a.1); let top_leader = sorted.last().map(|a| *a.0); - if let Some(l) = top_leader { - self.table.get(&l) - } else { - None - } - } - - /// Apply updates that we received from the identity `from` - /// # Arguments - /// * `from` - identity of the sender of the updates - /// * `update_index` - the number of updates that `from` has completed and this set of `data` represents - /// * `data` - the update data - fn apply_updates(&mut self, from: Pubkey, update_index: u64, data: &[(NodeInfo, u64)]) { - trace!("got updates {}", data.len()); - // TODO we need to punish/spam resist here - // sigverify the whole update and slash anyone who sends a bad update - let mut insert_total = 0; - for v in data { - insert_total += self.insert(&v.0); - } - inc_new_counter_info!("cluster_info-update-count", insert_total); - - for (node, external_remote_index) in data { - let pubkey = node.id; - let remote_entry = if let Some(v) = self.remote.get(&pubkey) { - *v - } else { - 0 - }; - - if remote_entry >= *external_remote_index { - continue; - } - - let liveness_entry = self - .external_liveness - .entry(pubkey) - .or_insert_with(HashMap::new); - let peer_index = *liveness_entry.entry(from).or_insert(*external_remote_index); - if *external_remote_index > peer_index { - liveness_entry.insert(from, *external_remote_index); - } - } - - *self.remote.entry(from).or_insert(update_index) = update_index; - - // Clear the remote liveness table for this node, b/c we've heard directly from them - // so we don't need to rely on rumors - self.external_liveness.remove(&from); + top_leader + .and_then(|x| { + let leader_label = CrdsValueLabel::ContactInfo(x); + self.gossip.crds.lookup(&leader_label) + }).and_then(|x| x.contact_info()) } /// randomly pick a node and ask them for updates asynchronously @@ -901,19 +556,26 @@ impl ClusterInfo { ) -> JoinHandle<()> { Builder::new() .name("solana-gossip".to_string()) - .spawn(move || loop { - let start = timestamp(); - let _ = Self::run_gossip(&obj, &blob_sender); - if exit.load(Ordering::Relaxed) { - return; - } - obj.write().unwrap().purge(timestamp()); - //TODO: possibly tune this parameter - //we saw a deadlock passing an obj.read().unwrap().timeout into sleep - let elapsed = timestamp() - start; - if GOSSIP_SLEEP_MILLIS > elapsed { - let time_left = GOSSIP_SLEEP_MILLIS - elapsed; - sleep(Duration::from_millis(time_left)); + .spawn(move || { + let mut last_push = timestamp(); + loop { + let start = timestamp(); + let _ = Self::run_gossip(&obj, &blob_sender); + if exit.load(Ordering::Relaxed) { + return; + } + obj.write().unwrap().purge(timestamp()); + //TODO: possibly tune this parameter + //we saw a deadlock passing an obj.read().unwrap().timeout into sleep + if start - last_push > CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS / 2 { + obj.write().unwrap().push_self(); + last_push = timestamp(); + } + let elapsed = timestamp() - start; + if GOSSIP_SLEEP_MILLIS > elapsed { + let time_left = GOSSIP_SLEEP_MILLIS - elapsed; + sleep(Duration::from_millis(time_left)); + } } }).unwrap() } @@ -923,8 +585,9 @@ impl ClusterInfo { window: &SharedWindow, ledger_window: &mut Option<&mut LedgerWindow>, me: &NodeInfo, + leader_id: Pubkey, ix: u64, - ) -> Option { + ) -> Vec { let pos = (ix as usize) % window.read().unwrap().len(); if let Some(ref mut blob) = &mut window.write().unwrap()[pos].data { let mut wblob = blob.write().unwrap(); @@ -940,8 +603,7 @@ impl ClusterInfo { // Allow retransmission of this response if the node // is the leader and the number of repair requests equals // a power of two - if me.leader_id == me.id - && (num_retransmits == 0 || num_retransmits.is_power_of_two()) + if leader_id == me.id && (num_retransmits == 0 || num_retransmits.is_power_of_two()) { sender_id = me.id } @@ -959,7 +621,7 @@ impl ClusterInfo { } inc_new_counter_info!("cluster_info-window-request-pass", 1); - return Some(out); + return vec![out]; } else { inc_new_counter_info!("cluster_info-window-request-outside", 1); trace!( @@ -981,7 +643,7 @@ impl ClusterInfo { Some(from_addr), ); - return Some(out); + return vec![out]; } } @@ -994,7 +656,7 @@ impl ClusterInfo { pos, ); - None + vec![] } //TODO we should first coalesce all the requests @@ -1003,153 +665,179 @@ impl ClusterInfo { window: &SharedWindow, ledger_window: &mut Option<&mut LedgerWindow>, blob: &Blob, - ) -> Option { - match deserialize(&blob.data[..blob.meta.size]) { - Ok(request) => { + ) -> Vec { + deserialize(&blob.data[..blob.meta.size]) + .into_iter() + .flat_map(|request| { ClusterInfo::handle_protocol(obj, &blob.meta.addr(), request, window, ledger_window) + }).collect() + } + fn handle_pull_request( + me: &Arc>, + filter: Bloom, + caller: CrdsValue, + from_addr: &SocketAddr, + ) -> Vec { + let self_id = me.read().unwrap().gossip.id; + inc_new_counter_info!("cluster_info-pull_request", 1); + if caller.contact_info().is_none() { + return vec![]; + } + let mut from = caller.contact_info().cloned().unwrap(); + if from.id == self_id { + warn!( + "PullRequest ignored, I'm talking to myself: me={} remoteme={}", + self_id, from.id + ); + inc_new_counter_info!("cluster_info-window-request-loopback", 1); + return vec![]; + } + let now = timestamp(); + let data = me + .write() + .unwrap() + .gossip + .process_pull_request(caller, filter, now); + let len = data.len(); + trace!("get updates since response {}", len); + if data.is_empty() { + trace!("no updates me {}", self_id); + vec![] + } else { + let rsp = Protocol::PullResponse(self_id, data); + // the remote side may not know his public IP:PORT, record what he looks like to us + // this may or may not be correct for everybody but it's better than leaving him with + // an unspecified address in our table + if from.ncp.ip().is_unspecified() { + inc_new_counter_info!("cluster_info-window-request-updates-unspec-ncp", 1); + from.ncp = *from_addr; } - Err(_) => { - warn!("deserialize cluster_info packet failed"); - None - } + inc_new_counter_info!("cluster_info-pull_request-rsp", len); + to_blob(rsp, from.ncp).ok().into_iter().collect() } } + fn handle_pull_response(me: &Arc>, from: Pubkey, data: Vec) { + let len = data.len(); + let now = Instant::now(); + let self_id = me.read().unwrap().gossip.id; + trace!("PullResponse me: {} len={}", self_id, len); + me.write() + .unwrap() + .gossip + .process_pull_response(from, data, timestamp()); + inc_new_counter_info!("cluster_info-pull_request_response", 1); + inc_new_counter_info!("cluster_info-pull_request_response-size", len); + report_time_spent("ReceiveUpdates", &now.elapsed(), &format!(" len: {}", len)); + } + fn handle_push_message( + me: &Arc>, + from: Pubkey, + data: &[CrdsValue], + ) -> Vec { + let self_id = me.read().unwrap().gossip.id; + inc_new_counter_info!("cluster_info-push_message", 1); + let prunes: Vec<_> = me + .write() + .unwrap() + .gossip + .process_push_message(&data, timestamp()); + if !prunes.is_empty() { + let mut wme = me.write().unwrap(); + inc_new_counter_info!("cluster_info-push_message-prunes", prunes.len()); + let rsp = Protocol::PruneMessage(self_id, prunes); + let ci = wme.lookup(from).cloned(); + let pushes: Vec<_> = wme.new_push_requests(); + inc_new_counter_info!("cluster_info-push_message-pushes", pushes.len()); + let mut rsp: Vec<_> = ci + .and_then(|ci| to_blob(rsp, ci.ncp).ok()) + .into_iter() + .collect(); + let mut blobs: Vec<_> = pushes + .into_iter() + .filter_map(|(remote_gossip_addr, req)| to_blob(req, remote_gossip_addr).ok()) + .collect(); + rsp.append(&mut blobs); + rsp + } else { + vec![] + } + } + fn handle_request_window_index( + me: &Arc>, + from: &ContactInfo, + ix: u64, + from_addr: &SocketAddr, + window: &SharedWindow, + ledger_window: &mut Option<&mut LedgerWindow>, + ) -> Vec { + let now = Instant::now(); + + //TODO this doesn't depend on cluster_info module, could be moved + //but we are using the listen thread to service these request + //TODO verify from is signed + + let self_id = me.read().unwrap().gossip.id; + if from.id == me.read().unwrap().gossip.id { + warn!( + "{}: Ignored received RequestWindowIndex from ME {} {} ", + self_id, from.id, ix, + ); + inc_new_counter_info!("cluster_info-window-request-address-eq", 1); + return vec![]; + } + + me.write().unwrap().insert_info(from.clone()); + let leader_id = me.read().unwrap().leader_id(); + let my_info = me.read().unwrap().my_data().clone(); + inc_new_counter_info!("cluster_info-window-request-recv", 1); + trace!( + "{}: received RequestWindowIndex {} {} ", + self_id, + from.id, + ix, + ); + let res = Self::run_window_request( + &from, + &from_addr, + &window, + ledger_window, + &my_info, + leader_id, + ix, + ); + report_time_spent( + "RequestWindowIndex", + &now.elapsed(), + &format!(" ix: {}", ix), + ); + res + } fn handle_protocol( me: &Arc>, from_addr: &SocketAddr, request: Protocol, window: &SharedWindow, ledger_window: &mut Option<&mut LedgerWindow>, - ) -> Option { + ) -> Vec { match request { // TODO sigverify these - Protocol::RequestUpdates(version, mut from) => { - let id = me.read().unwrap().id; - - trace!( - "{} RequestUpdates {} from {}, professing to be {}", - id, - version, - from_addr, - from.contact_info.ncp - ); - - if from.id == me.read().unwrap().id { - warn!( - "RequestUpdates ignored, I'm talking to myself: me={} remoteme={}", - me.read().unwrap().id, - from.id - ); - inc_new_counter_info!("cluster_info-window-request-loopback", 1); - return None; - } - - // the remote side may not know his public IP:PORT, record what he looks like to us - // this may or may not be correct for everybody but it's better than leaving him with - // an unspecified address in our table - if from.contact_info.ncp.ip().is_unspecified() { - inc_new_counter_info!("cluster_info-window-request-updates-unspec-ncp", 1); - from.contact_info.ncp = *from_addr; - } - let max = Self::max_updates(1024 * 64 - 512); - let (from_id, ups, data) = me.read().unwrap().get_updates_since(version, max); - - // update entry only after collecting liveness - { - let mut me = me.write().unwrap(); - me.insert(&from); - me.update_liveness(from.id); - } - - let len = data.len(); - trace!("get updates since response {} {}", version, len); - - if data.is_empty() { - let me = me.read().unwrap(); - trace!( - "no updates me {} ix {} since {}", - id, - me.update_index, - version - ); - None - } else { - let rsp = Protocol::ReceiveUpdates { - from: from_id, - max_update_index: ups, - nodes: data, - }; - - if let Ok(r) = to_blob(rsp, from.contact_info.ncp) { - trace!( - "sending updates me {} len {} to {} {}", - id, - len, - from.id, - from.contact_info.ncp, - ); - Some(r) - } else { - warn!("to_blob failed"); - None - } - } + Protocol::PullRequest(filter, caller) => { + Self::handle_pull_request(me, filter, caller, from_addr) } - Protocol::ReceiveUpdates { - from, - max_update_index, - nodes, - } => { - let now = Instant::now(); - trace!( - "ReceivedUpdates from={} update_index={} len={}", - from, - max_update_index, - nodes.len() - ); - me.write() - .expect("'me' write lock in ReceiveUpdates") - .apply_updates(from, max_update_index, &nodes); - - report_time_spent( - "ReceiveUpdates", - &now.elapsed(), - &format!(" len: {}", nodes.len()), - ); - None + Protocol::PullResponse(from, data) => { + Self::handle_pull_response(me, from, data); + vec![] + } + Protocol::PushMessage(from, data) => Self::handle_push_message(me, from, &data), + Protocol::PruneMessage(from, data) => { + inc_new_counter_info!("cluster_info-prune_message", 1); + inc_new_counter_info!("cluster_info-prune_message-size", data.len()); + me.write().unwrap().gossip.process_prune_msg(from, &data); + vec![] } - Protocol::RequestWindowIndex(from, ix) => { - let now = Instant::now(); - - //TODO this doesn't depend on cluster_info module, could be moved - //but we are using the listen thread to service these request - //TODO verify from is signed - - if from.id == me.read().unwrap().id { - warn!( - "{}: Ignored received RequestWindowIndex from ME {} {} ", - me.read().unwrap().id, - from.id, - ix, - ); - inc_new_counter_info!("cluster_info-window-request-address-eq", 1); - return None; - } - - me.write().unwrap().insert(&from); - let me = me.read().unwrap().my_data().clone(); - inc_new_counter_info!("cluster_info-window-request-recv", 1); - trace!("{}: received RequestWindowIndex {} {} ", me.id, from.id, ix,); - let res = - Self::run_window_request(&from, &from_addr, &window, ledger_window, &me, ix); - report_time_spent( - "RequestWindowIndex", - &now.elapsed(), - &format!(" ix: {}", ix), - ); - res + Self::handle_request_window_index(me, &from, ix, from_addr, window, ledger_window) } } } @@ -1170,10 +858,8 @@ impl ClusterInfo { } let mut resps = Vec::new(); for req in reqs { - if let Some(resp) = Self::handle_blob(obj, window, ledger_window, &req.read().unwrap()) - { - resps.push(resp); - } + let mut resp = Self::handle_blob(obj, window, ledger_window, &req.read().unwrap()); + resps.append(&mut resp); } response_sender.send(resps)?; Ok(()) @@ -1205,8 +891,8 @@ impl ClusterInfo { let me = me.read().unwrap(); debug!( "{}: run_listen timeout, table size: {}", - me.id, - me.table.len() + me.gossip.id, + me.gossip.crds.table.len() ); } }).unwrap() @@ -1229,7 +915,16 @@ impl ClusterInfo { let pubkey = Keypair::new().pubkey(); let daddr = socketaddr_any!(); - let node = NodeInfo::new(pubkey, daddr, daddr, daddr, daddr, daddr, daddr); + let node = NodeInfo::new( + pubkey, + daddr, + daddr, + daddr, + daddr, + daddr, + daddr, + timestamp(), + ); (node, gossip_socket) } } @@ -1277,6 +972,7 @@ impl Node { storage.local_addr().unwrap(), rpc_addr, rpc_pubsub_addr, + timestamp(), ); Node { info, @@ -1320,6 +1016,7 @@ impl Node { SocketAddr::new(ncp.ip(), storage_port), SocketAddr::new(ncp.ip(), RPC_PORT), SocketAddr::new(ncp.ip(), RPC_PORT + 1), + 0, ); trace!("new NodeInfo: {:?}", info); @@ -1346,11 +1043,8 @@ fn report_time_spent(label: &str, time: &Duration, extra: &str) { #[cfg(test)] mod tests { - use bincode::serialize; - use cluster_info::{ - ClusterInfo, ClusterInfoError, Node, NodeInfo, Protocol, FULLNODE_PORT_RANGE, - GOSSIP_PURGE_MILLIS, GOSSIP_SLEEP_MILLIS, MIN_TABLE_SIZE, - }; + use super::*; + use crds_value::CrdsValueLabel; use entry::Entry; use hash::{hash, Hash}; use ledger::{get_tmp_ledger_path, LedgerWindow, LedgerWriter}; @@ -1360,149 +1054,28 @@ mod tests { use signature::{Keypair, KeypairUtil}; use std::fs::remove_dir_all; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; - use std::sync::atomic::{AtomicBool, Ordering}; - use std::sync::mpsc::channel; use std::sync::{Arc, RwLock}; - use std::thread::sleep; - use std::time::Duration; use window::default_window; #[test] - fn insert_test() { - let mut d = NodeInfo::new_localhost(Keypair::new().pubkey()); - assert_eq!(d.version, 0); - let mut cluster_info = ClusterInfo::new(d.clone()).unwrap(); - assert_eq!(cluster_info.table[&d.id].version, 0); - assert!(!cluster_info.alive.contains_key(&d.id)); - - d.version = 2; - cluster_info.insert(&d); - let liveness = cluster_info.alive[&d.id]; - assert_eq!(cluster_info.table[&d.id].version, 2); - - d.version = 1; - cluster_info.insert(&d); - assert_eq!(cluster_info.table[&d.id].version, 2); - assert_eq!(liveness, cluster_info.alive[&d.id]); - - // Ensure liveness will be updated for version 3 - sleep(Duration::from_millis(1)); - - d.version = 3; - cluster_info.insert(&d); - assert_eq!(cluster_info.table[&d.id].version, 3); - assert!(liveness < cluster_info.alive[&d.id]); - } - fn sorted(ls: &Vec<(NodeInfo, u64)>) -> Vec<(NodeInfo, u64)> { - let mut copy: Vec<_> = ls.iter().cloned().collect(); - copy.sort_by(|x, y| x.0.id.cmp(&y.0.id)); - copy + fn test_cluster_info_new() { + let d = NodeInfo::new_localhost(Keypair::new().pubkey(), timestamp()); + let cluster_info = ClusterInfo::new(d.clone()).expect("ClusterInfo::new"); + assert_eq!(d.id, cluster_info.my_data().id); } + #[test] - fn replicated_data_new_with_socketaddr_with_pubkey() { - let keypair = Keypair::new(); - let d1 = NodeInfo::new_with_pubkey_socketaddr( - keypair.pubkey().clone(), - &socketaddr!("127.0.0.1:1234"), - ); - assert_eq!(d1.id, keypair.pubkey()); - assert_eq!(d1.contact_info.ncp, socketaddr!("127.0.0.1:1235")); - assert_eq!(d1.contact_info.tvu, socketaddr!("127.0.0.1:1236")); - assert_eq!(d1.contact_info.tpu, socketaddr!("127.0.0.1:1234")); - assert_eq!(d1.contact_info.rpc, socketaddr!("127.0.0.1:8899")); - assert_eq!(d1.contact_info.rpc_pubsub, socketaddr!("127.0.0.1:8900")); - } - #[test] - fn max_updates() { - let size = 1024 * 64 - 512; - let num = ClusterInfo::max_updates(size); - let msg = Protocol::ReceiveUpdates { - from: Default::default(), - max_update_index: 0, - nodes: vec![(NodeInfo::new_unspecified(), 0); num], - }; - trace!("{} {} {}", serialize(&msg).unwrap().len(), size, num); - assert!(serialize(&msg).unwrap().len() <= size); - } - #[test] - fn update_test() { - let d1 = NodeInfo::new_localhost(Keypair::new().pubkey()); - let d2 = NodeInfo::new_localhost(Keypair::new().pubkey()); - let d3 = NodeInfo::new_localhost(Keypair::new().pubkey()); - let mut cluster_info = ClusterInfo::new(d1.clone()).expect("ClusterInfo::new"); - let (key, ix, ups) = cluster_info.get_updates_since(0, 1); - assert_eq!(key, d1.id); - assert_eq!(ix, 1); - assert_eq!(ups.len(), 1); - assert_eq!(sorted(&ups), sorted(&vec![(d1.clone(), 0)])); - cluster_info.insert(&d2); - let (key, ix, ups) = cluster_info.get_updates_since(0, 2); - assert_eq!(key, d1.id); - assert_eq!(ix, 2); - assert_eq!(ups.len(), 2); - assert_eq!( - sorted(&ups), - sorted(&vec![(d1.clone(), 0), (d2.clone(), 0)]) - ); - cluster_info.insert(&d3); - let (key, ix, ups) = cluster_info.get_updates_since(0, 3); - assert_eq!(key, d1.id); - assert_eq!(ix, 3); - assert_eq!(ups.len(), 3); - assert_eq!( - sorted(&ups), - sorted(&vec![(d1.clone(), 0), (d2.clone(), 0), (d3.clone(), 0)]) - ); - let mut cluster_info2 = ClusterInfo::new(d2.clone()).expect("ClusterInfo::new"); - cluster_info2.apply_updates(key, ix, &ups); - assert_eq!(cluster_info2.table.values().len(), 3); - assert_eq!( - sorted( - &cluster_info2 - .table - .values() - .map(|x| (x.clone(), 0)) - .collect() - ), - sorted( - &cluster_info - .table - .values() - .map(|x| (x.clone(), 0)) - .collect() - ) - ); - let d4 = NodeInfo::new_entry_point(&socketaddr!("127.0.0.4:1234")); - cluster_info.insert(&d4); - let (_key, ix, ups) = cluster_info.get_updates_since(0, 3); - assert_eq!( - sorted(&ups), - sorted(&vec![(d2.clone(), 0), (d1.clone(), 0), (d3.clone(), 0)]) - ); - assert_eq!(ix, 3); - - let (_key, ix, ups) = cluster_info.get_updates_since(0, 2); - assert_eq!( - sorted(&ups), - sorted(&vec![(d2.clone(), 0), (d1.clone(), 0)]) - ); - assert_eq!(ix, 2); - - let (_key, ix, ups) = cluster_info.get_updates_since(0, 1); - assert_eq!(sorted(&ups), sorted(&vec![(d1.clone(), 0)])); - assert_eq!(ix, 1); - - let (_key, ix, ups) = cluster_info.get_updates_since(1, 3); - assert_eq!(ups.len(), 2); - assert_eq!(ix, 3); - assert_eq!(sorted(&ups), sorted(&vec![(d2, 0), (d3, 0)])); - let (_key, ix, ups) = cluster_info.get_updates_since(3, 3); - assert_eq!(ups.len(), 0); - assert_eq!(ix, 0); + fn insert_info_test() { + let d = NodeInfo::new_localhost(Keypair::new().pubkey(), timestamp()); + let mut cluster_info = ClusterInfo::new(d).expect("ClusterInfo::new"); + let d = NodeInfo::new_localhost(Keypair::new().pubkey(), timestamp()); + let label = CrdsValueLabel::ContactInfo(d.id); + cluster_info.insert_info(d); + assert!(cluster_info.gossip.crds.lookup(&label).is_some()); } #[test] fn window_index_request() { - let me = NodeInfo::new_localhost(Keypair::new().pubkey()); + let me = NodeInfo::new_localhost(Keypair::new().pubkey(), timestamp()); let mut cluster_info = ClusterInfo::new(me).expect("ClusterInfo::new"); let rv = cluster_info.window_index_request(0); assert_matches!(rv, Err(Error::ClusterInfoError(ClusterInfoError::NoPeers))); @@ -1516,11 +1089,12 @@ mod tests { socketaddr!([127, 0, 0, 1], 1237), socketaddr!([127, 0, 0, 1], 1238), socketaddr!([127, 0, 0, 1], 1239), + 0, ); - cluster_info.insert(&nxt); + cluster_info.insert_info(nxt.clone()); let rv = cluster_info.window_index_request(0).unwrap(); - assert_eq!(nxt.contact_info.ncp, ncp); - assert_eq!(rv.0, nxt.contact_info.ncp); + assert_eq!(nxt.ncp, ncp); + assert_eq!(rv.0, nxt.ncp); let ncp2 = socketaddr!([127, 0, 0, 2], 1234); let nxt = NodeInfo::new( @@ -1531,8 +1105,9 @@ mod tests { socketaddr!([127, 0, 0, 1], 1237), socketaddr!([127, 0, 0, 1], 1238), socketaddr!([127, 0, 0, 1], 1239), + 0, ); - cluster_info.insert(&nxt); + cluster_info.insert_info(nxt); let mut one = false; let mut two = false; while !one || !two { @@ -1548,149 +1123,6 @@ mod tests { assert!(one && two); } - #[test] - fn gossip_request_bad_addr() { - let me = NodeInfo::new( - Keypair::new().pubkey(), - socketaddr!("127.0.0.1:127"), - socketaddr!("127.0.0.1:127"), - socketaddr!("127.0.0.1:127"), - socketaddr!("127.0.0.1:127"), - socketaddr!("127.0.0.1:127"), - socketaddr!("127.0.0.1:127"), - ); - - let mut cluster_info = ClusterInfo::new(me).expect("ClusterInfo::new"); - let nxt1 = NodeInfo::new_unspecified(); - // Filter out unspecified addresses - cluster_info.insert(&nxt1); //<--- attack! - let rv = cluster_info.gossip_request(); - assert_matches!(rv, Err(Error::ClusterInfoError(ClusterInfoError::NoPeers))); - let nxt2 = NodeInfo::new_multicast(); - // Filter out multicast addresses - cluster_info.insert(&nxt2); //<--- attack! - let rv = cluster_info.gossip_request(); - assert_matches!(rv, Err(Error::ClusterInfoError(ClusterInfoError::NoPeers))); - } - - /// test that gossip requests are eventually generated for all nodes - #[test] - fn gossip_request() { - let me = NodeInfo::new_localhost(Keypair::new().pubkey()); - let mut cluster_info = ClusterInfo::new(me.clone()).expect("ClusterInfo::new"); - let rv = cluster_info.gossip_request(); - assert_matches!(rv, Err(Error::ClusterInfoError(ClusterInfoError::NoPeers))); - let nxt1 = NodeInfo::new_localhost(Keypair::new().pubkey()); - - cluster_info.insert(&nxt1); - - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt1.contact_info.ncp); - - let nxt2 = NodeInfo::new_entry_point(&socketaddr!("127.0.0.3:1234")); - cluster_info.insert(&nxt2); - // check that the service works - // and that it eventually produces a request for both nodes - let (sender, reader) = channel(); - let exit = Arc::new(AtomicBool::new(false)); - let obj = Arc::new(RwLock::new(cluster_info)); - let thread = ClusterInfo::gossip(obj, sender, exit.clone()); - let mut one = false; - let mut two = false; - for _ in 0..30 { - //50% chance each try that we get a repeat - let mut rv = reader.recv_timeout(Duration::new(1, 0)).unwrap(); - while let Ok(mut more) = reader.try_recv() { - rv.append(&mut more); - } - assert!(rv.len() > 0); - for i in rv.iter() { - if i.read().unwrap().meta.addr() == nxt1.contact_info.ncp { - one = true; - } else if i.read().unwrap().meta.addr() == nxt2.contact_info.ncp { - two = true; - } else { - //unexpected request - assert!(false); - } - } - if one && two { - break; - } - } - exit.store(true, Ordering::Relaxed); - thread.join().unwrap(); - //created requests to both - assert!(one && two); - } - - #[test] - fn purge_test() { - logger::setup(); - let me = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - let mut cluster_info = ClusterInfo::new(me.clone()).expect("ClusterInfo::new"); - let nxt = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.2:1234")); - assert_ne!(me.id, nxt.id); - cluster_info.set_leader(me.id); - cluster_info.insert(&nxt); - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt.contact_info.ncp); - let now = cluster_info.alive[&nxt.id]; - cluster_info.purge(now); - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt.contact_info.ncp); - - cluster_info.purge(now + GOSSIP_PURGE_MILLIS); - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt.contact_info.ncp); - - cluster_info.purge(now + GOSSIP_PURGE_MILLIS + 1); - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt.contact_info.ncp); - - let mut nxt2 = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.2:1234")); - assert_ne!(me.id, nxt2.id); - assert_ne!(nxt.id, nxt2.id); - cluster_info.insert(&nxt2); - while now == cluster_info.alive[&nxt2.id] { - sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS)); - nxt2.version += 1; - cluster_info.insert(&nxt2); - } - let len = cluster_info.table.len() as u64; - assert!((MIN_TABLE_SIZE as u64) < len); - cluster_info.purge(now + GOSSIP_PURGE_MILLIS); - assert_eq!(len as usize, cluster_info.table.len()); - trace!("purging"); - cluster_info.purge(now + GOSSIP_PURGE_MILLIS + 1); - assert_eq!(len as usize - 1, cluster_info.table.len()); - let rv = cluster_info.gossip_request().unwrap(); - assert_eq!(rv.0, nxt.contact_info.ncp); - } - #[test] - fn purge_leader_test() { - logger::setup(); - let me = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - let mut cluster_info = ClusterInfo::new(me.clone()).expect("ClusterInfo::new"); - let nxt = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.2:1234")); - assert_ne!(me.id, nxt.id); - cluster_info.insert(&nxt); - cluster_info.set_leader(nxt.id); - let now = cluster_info.alive[&nxt.id]; - let mut nxt2 = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.2:1234")); - cluster_info.insert(&nxt2); - while now == cluster_info.alive[&nxt2.id] { - sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS)); - nxt2.version = nxt2.version + 1; - cluster_info.insert(&nxt2); - } - let len = cluster_info.table.len() as u64; - cluster_info.purge(now + GOSSIP_PURGE_MILLIS + 1); - assert_eq!(len as usize - 1, cluster_info.table.len()); - assert_eq!(cluster_info.my_data().leader_id, nxt.id); - assert!(cluster_info.leader_data().is_none()); - } - /// test window requests respond with the right blob, and do not overrun #[test] fn run_window_request() { @@ -1704,23 +1136,46 @@ mod tests { socketaddr!("127.0.0.1:1237"), socketaddr!("127.0.0.1:1238"), socketaddr!("127.0.0.1:1239"), + 0, ); - let rv = - ClusterInfo::run_window_request(&me, &socketaddr_any!(), &window, &mut None, &me, 0); - assert!(rv.is_none()); + let leader_id = me.id; + let rv = ClusterInfo::run_window_request( + &me, + &socketaddr_any!(), + &window, + &mut None, + &me, + leader_id, + 0, + ); + assert!(rv.is_empty()); let out = SharedBlob::default(); out.write().unwrap().meta.size = 200; window.write().unwrap()[0].data = Some(out); - let rv = - ClusterInfo::run_window_request(&me, &socketaddr_any!(), &window, &mut None, &me, 0); - assert!(rv.is_some()); - let v = rv.unwrap(); + let rv = ClusterInfo::run_window_request( + &me, + &socketaddr_any!(), + &window, + &mut None, + &me, + leader_id, + 0, + ); + assert!(!rv.is_empty()); + let v = rv[0].clone(); //test we copied the blob assert_eq!(v.read().unwrap().meta.size, 200); let len = window.read().unwrap().len() as u64; - let rv = - ClusterInfo::run_window_request(&me, &socketaddr_any!(), &window, &mut None, &me, len); - assert!(rv.is_none()); + let rv = ClusterInfo::run_window_request( + &me, + &socketaddr_any!(), + &window, + &mut None, + &me, + leader_id, + len, + ); + assert!(rv.is_empty()); fn tmp_ledger(name: &str) -> String { let path = get_tmp_ledger_path(name); @@ -1747,9 +1202,10 @@ mod tests { &window, &mut Some(&mut ledger_window), &me, + leader_id, 1, ); - assert!(rv.is_some()); + assert!(!rv.is_empty()); remove_dir_all(ledger_path).unwrap(); } @@ -1759,8 +1215,8 @@ mod tests { fn run_window_request_with_backoff() { let window = Arc::new(RwLock::new(default_window())); - let mut me = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - me.leader_id = me.id; + let me = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); + let leader_id = me.id; let mock_peer = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); @@ -1771,9 +1227,10 @@ mod tests { &window, &mut None, &me, + leader_id, 0, ); - assert!(rv.is_none()); + assert!(rv.is_empty()); let blob = SharedBlob::default(); let blob_size = 200; blob.write().unwrap().meta.size = blob_size; @@ -1787,8 +1244,9 @@ mod tests { &window, &mut None, &me, + leader_id, 0, - ).unwrap(); + )[0].clone(); let blob = shared_blob.read().unwrap(); // Test we copied the blob assert_eq!(blob.meta.size, blob_size); @@ -1802,78 +1260,13 @@ mod tests { } } - /// Validates the node that sent Protocol::ReceiveUpdates gets its - /// liveness updated, but not if the node sends Protocol::ReceiveUpdates - /// to itself. - #[test] - fn protocol_requestupdate_alive() { - logger::setup(); - let window = Arc::new(RwLock::new(default_window())); - - let node = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - let node_with_same_addr = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - assert_ne!(node.id, node_with_same_addr.id); - let node_with_diff_addr = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:4321")); - - let cluster_info = ClusterInfo::new(node.clone()).expect("ClusterInfo::new"); - assert_eq!(cluster_info.alive.len(), 0); - - let obj = Arc::new(RwLock::new(cluster_info)); - - let request = Protocol::RequestUpdates(1, node.clone()); - assert!(ClusterInfo::handle_protocol( - &obj, - &node.contact_info.ncp, - request, - &window, - &mut None, - ) - .is_none()); - - let request = Protocol::RequestUpdates(1, node_with_same_addr.clone()); - assert!(ClusterInfo::handle_protocol( - &obj, - &node.contact_info.ncp, - request, - &window, - &mut None, - ) - .is_none()); - - let request = Protocol::RequestUpdates(1, node_with_diff_addr.clone()); - ClusterInfo::handle_protocol(&obj, &node.contact_info.ncp, request, &window, &mut None); - - let me = obj.write().unwrap(); - - // |node| and |node_with_same_addr| are ok to me in me.alive, should not be in me.alive, but - assert!(!me.alive.contains_key(&node.id)); - // same addr might very well happen because of NAT - assert!(me.alive.contains_key(&node_with_same_addr.id)); - // |node_with_diff_addr| should now be. - assert!(me.alive[&node_with_diff_addr.id] > 0); - } - - #[test] - fn test_is_valid_address() { - assert!(cfg!(test)); - let bad_address_port = socketaddr!("127.0.0.1:0"); - assert!(!ClusterInfo::is_valid_address(&bad_address_port)); - let bad_address_unspecified = socketaddr!(0, 1234); - assert!(!ClusterInfo::is_valid_address(&bad_address_unspecified)); - let bad_address_multicast = socketaddr!([224, 254, 0, 0], 1234); - assert!(!ClusterInfo::is_valid_address(&bad_address_multicast)); - let loopback = socketaddr!("127.0.0.1:1234"); - assert!(ClusterInfo::is_valid_address(&loopback)); - // assert!(!ClusterInfo::is_valid_ip_internal(loopback.ip(), false)); - } - #[test] fn test_default_leader() { logger::setup(); - let node_info = NodeInfo::new_localhost(Keypair::new().pubkey()); + let node_info = NodeInfo::new_localhost(Keypair::new().pubkey(), 0); let mut cluster_info = ClusterInfo::new(node_info).unwrap(); let network_entry_point = NodeInfo::new_entry_point(&socketaddr!("127.0.0.1:1239")); - cluster_info.insert(&network_entry_point); + cluster_info.insert_info(network_entry_point); assert!(cluster_info.leader_data().is_none()); } diff --git a/src/contact_info.rs b/src/contact_info.rs new file mode 100644 index 0000000000..93857d0e7c --- /dev/null +++ b/src/contact_info.rs @@ -0,0 +1,237 @@ +use rpc::RPC_PORT; +use signature::{Keypair, KeypairUtil}; +use solana_sdk::pubkey::Pubkey; +use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use timing::timestamp; + +/// Structure representing a node on the network +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +pub struct ContactInfo { + pub id: Pubkey, + /// gossip address + pub ncp: SocketAddr, + /// address to connect to for replication + pub tvu: SocketAddr, + /// transactions address + pub tpu: SocketAddr, + /// storage data address + pub storage_addr: SocketAddr, + /// address to which to send JSON-RPC requests + pub rpc: SocketAddr, + /// websocket for JSON-RPC push notifications + pub rpc_pubsub: SocketAddr, + /// latest wallclock picked + pub wallclock: u64, +} + +#[macro_export] +macro_rules! socketaddr { + ($ip:expr, $port:expr) => { + SocketAddr::from((Ipv4Addr::from($ip), $port)) + }; + ($str:expr) => {{ + let a: SocketAddr = $str.parse().unwrap(); + a + }}; +} +#[macro_export] +macro_rules! socketaddr_any { + () => { + socketaddr!(0, 0) + }; +} + +impl Default for ContactInfo { + fn default() -> Self { + ContactInfo { + id: Pubkey::default(), + ncp: socketaddr_any!(), + tvu: socketaddr_any!(), + tpu: socketaddr_any!(), + storage_addr: socketaddr_any!(), + rpc: socketaddr_any!(), + rpc_pubsub: socketaddr_any!(), + wallclock: 0, + } + } +} + +impl ContactInfo { + pub fn new( + id: Pubkey, + ncp: SocketAddr, + tvu: SocketAddr, + tpu: SocketAddr, + storage_addr: SocketAddr, + rpc: SocketAddr, + rpc_pubsub: SocketAddr, + now: u64, + ) -> Self { + ContactInfo { + id, + ncp, + tvu, + tpu, + storage_addr, + rpc, + rpc_pubsub, + wallclock: now, + } + } + + pub fn new_localhost(id: Pubkey, now: u64) -> Self { + Self::new( + id, + socketaddr!("127.0.0.1:1234"), + socketaddr!("127.0.0.1:1235"), + socketaddr!("127.0.0.1:1236"), + socketaddr!("127.0.0.1:1237"), + socketaddr!("127.0.0.1:1238"), + socketaddr!("127.0.0.1:1239"), + now, + ) + } + + #[cfg(test)] + /// ContactInfo with multicast addresses for adversarial testing. + pub fn new_multicast() -> Self { + let addr = socketaddr!("224.0.1.255:1000"); + assert!(addr.ip().is_multicast()); + Self::new( + Keypair::new().pubkey(), + addr, + addr, + addr, + addr, + addr, + addr, + 0, + ) + } + fn next_port(addr: &SocketAddr, nxt: u16) -> SocketAddr { + let mut nxt_addr = *addr; + nxt_addr.set_port(addr.port() + nxt); + nxt_addr + } + pub fn new_with_pubkey_socketaddr(pubkey: Pubkey, bind_addr: &SocketAddr) -> Self { + let transactions_addr = *bind_addr; + let gossip_addr = Self::next_port(&bind_addr, 1); + let replicate_addr = Self::next_port(&bind_addr, 2); + let rpc_addr = SocketAddr::new(bind_addr.ip(), RPC_PORT); + let rpc_pubsub_addr = SocketAddr::new(bind_addr.ip(), RPC_PORT + 1); + ContactInfo::new( + pubkey, + gossip_addr, + replicate_addr, + transactions_addr, + "0.0.0.0:0".parse().unwrap(), + rpc_addr, + rpc_pubsub_addr, + timestamp(), + ) + } + pub fn new_with_socketaddr(bind_addr: &SocketAddr) -> Self { + let keypair = Keypair::new(); + Self::new_with_pubkey_socketaddr(keypair.pubkey(), bind_addr) + } + // + pub fn new_entry_point(gossip_addr: &SocketAddr) -> Self { + let daddr: SocketAddr = socketaddr!("0.0.0.0:0"); + ContactInfo::new( + Pubkey::default(), + *gossip_addr, + daddr, + daddr, + daddr, + daddr, + daddr, + timestamp(), + ) + } + fn is_valid_ip(addr: IpAddr) -> bool { + !(addr.is_unspecified() || addr.is_multicast()) + // || (addr.is_loopback() && !cfg_test)) + // TODO: boot loopback in production networks + } + /// port must not be 0 + /// ip must be specified and not mulitcast + /// loopback ip is only allowed in tests + pub fn is_valid_address(addr: &SocketAddr) -> bool { + (addr.port() != 0) && Self::is_valid_ip(addr.ip()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_valid_address() { + assert!(cfg!(test)); + let bad_address_port = socketaddr!("127.0.0.1:0"); + assert!(!ContactInfo::is_valid_address(&bad_address_port)); + let bad_address_unspecified = socketaddr!(0, 1234); + assert!(!ContactInfo::is_valid_address(&bad_address_unspecified)); + let bad_address_multicast = socketaddr!([224, 254, 0, 0], 1234); + assert!(!ContactInfo::is_valid_address(&bad_address_multicast)); + let loopback = socketaddr!("127.0.0.1:1234"); + assert!(ContactInfo::is_valid_address(&loopback)); + // assert!(!ContactInfo::is_valid_ip_internal(loopback.ip(), false)); + } + #[test] + fn test_default() { + let ci = ContactInfo::default(); + assert!(ci.ncp.ip().is_unspecified()); + assert!(ci.tvu.ip().is_unspecified()); + assert!(ci.rpc.ip().is_unspecified()); + assert!(ci.rpc_pubsub.ip().is_unspecified()); + assert!(ci.tpu.ip().is_unspecified()); + assert!(ci.storage_addr.ip().is_unspecified()); + } + #[test] + fn test_multicast() { + let ci = ContactInfo::new_multicast(); + assert!(ci.ncp.ip().is_multicast()); + assert!(ci.tvu.ip().is_multicast()); + assert!(ci.rpc.ip().is_multicast()); + assert!(ci.rpc_pubsub.ip().is_multicast()); + assert!(ci.tpu.ip().is_multicast()); + assert!(ci.storage_addr.ip().is_multicast()); + } + #[test] + fn test_entry_point() { + let addr = socketaddr!("127.0.0.1:10"); + let ci = ContactInfo::new_entry_point(&addr); + assert_eq!(ci.ncp, addr); + assert!(ci.tvu.ip().is_unspecified()); + assert!(ci.rpc.ip().is_unspecified()); + assert!(ci.rpc_pubsub.ip().is_unspecified()); + assert!(ci.tpu.ip().is_unspecified()); + assert!(ci.storage_addr.ip().is_unspecified()); + } + #[test] + fn test_socketaddr() { + let addr = socketaddr!("127.0.0.1:10"); + let ci = ContactInfo::new_with_socketaddr(&addr); + assert_eq!(ci.tpu, addr); + assert_eq!(ci.ncp.port(), 11); + assert_eq!(ci.tvu.port(), 12); + assert_eq!(ci.rpc.port(), 8899); + assert_eq!(ci.rpc_pubsub.port(), 8900); + assert!(ci.storage_addr.ip().is_unspecified()); + } + #[test] + fn replicated_data_new_with_socketaddr_with_pubkey() { + let keypair = Keypair::new(); + let d1 = ContactInfo::new_with_pubkey_socketaddr( + keypair.pubkey().clone(), + &socketaddr!("127.0.0.1:1234"), + ); + assert_eq!(d1.id, keypair.pubkey()); + assert_eq!(d1.ncp, socketaddr!("127.0.0.1:1235")); + assert_eq!(d1.tvu, socketaddr!("127.0.0.1:1236")); + assert_eq!(d1.tpu, socketaddr!("127.0.0.1:1234")); + assert_eq!(d1.rpc, socketaddr!("127.0.0.1:8899")); + assert_eq!(d1.rpc_pubsub, socketaddr!("127.0.0.1:8900")); + } +} diff --git a/src/crds.rs b/src/crds.rs new file mode 100644 index 0000000000..cf054017f8 --- /dev/null +++ b/src/crds.rs @@ -0,0 +1,351 @@ +//! This module implements Cluster Replicated Data Store for +//! asynchronous updates in a distributed network. +//! +//! Data is stored in the CrdsValue type, each type has a specific +//! CrdsValueLabel. Labels are semantically grouped into a single record +//! that is identified by a Pubkey. +//! * 1 Pubkey maps many CrdsValueLabels +//! * 1 CrdsValueLabel maps to 1 CrdsValue +//! The Label, the record Pubkey, and all the record labels can be derived +//! from a single CrdsValue. +//! +//! The actual data is stored in a single map of +//! `CrdsValueLabel(Pubkey) -> CrdsValue` This allows for partial record +//! updates to be propagated through the network. +//! +//! This means that full `Record` updates are not atomic. +//! +//! Additional labels can be added by appending them to the CrdsValueLabel, +//! CrdsValue enums. +//! +//! Merge strategy is implemented in: +//! impl PartialOrd for VersionedCrdsValue +//! +//! A value is updated to a new version if the labels match, and the value +//! wallclock is later, or the value hash is greater. + +use bincode::serialize; +use crds_value::{CrdsValue, CrdsValueLabel}; +use hash::{hash, Hash}; +use indexmap::map::IndexMap; +use solana_sdk::pubkey::Pubkey; +use std::cmp; + +pub struct Crds { + /// Stores the map of labels and values + pub table: IndexMap, +} + +#[derive(PartialEq, Debug)] +pub enum CrdsError { + InsertFailed, +} + +/// This structure stores some local metadata assosciated with the CrdsValue +/// The implementation of PartialOrd ensures that the "highest" version is always picked to be +/// stored in the Crds +#[derive(PartialEq, Debug)] +pub struct VersionedCrdsValue { + pub value: CrdsValue, + /// local time when inserted + pub insert_timestamp: u64, + /// local time when updated + pub local_timestamp: u64, + /// value hash + pub value_hash: Hash, +} + +impl PartialOrd for VersionedCrdsValue { + fn partial_cmp(&self, other: &VersionedCrdsValue) -> Option { + if self.value.label() != other.value.label() { + None + } else if self.value.wallclock() == other.value.wallclock() { + Some(self.value_hash.cmp(&other.value_hash)) + } else { + Some(self.value.wallclock().cmp(&other.value.wallclock())) + } + } +} +impl VersionedCrdsValue { + pub fn new(local_timestamp: u64, value: CrdsValue) -> Self { + let value_hash = hash(&serialize(&value).unwrap()); + VersionedCrdsValue { + value, + insert_timestamp: local_timestamp, + local_timestamp, + value_hash, + } + } +} + +impl Default for Crds { + fn default() -> Self { + Crds { + table: IndexMap::new(), + } + } +} + +impl Crds { + /// must be called atomically with `insert_versioned` + pub fn new_versioned(&self, local_timestamp: u64, value: CrdsValue) -> VersionedCrdsValue { + VersionedCrdsValue::new(local_timestamp, value) + } + /// insert the new value, returns the old value if insert succeeds + pub fn insert_versioned( + &mut self, + new_value: VersionedCrdsValue, + ) -> Result, CrdsError> { + let label = new_value.value.label(); + let wallclock = new_value.value.wallclock(); + let do_insert = self + .table + .get(&label) + .map(|current| new_value > *current) + .unwrap_or(true); + if do_insert { + let old = self.table.insert(label, new_value); + Ok(old) + } else { + trace!("INSERT FAILED data: {} new.wallclock: {}", label, wallclock,); + Err(CrdsError::InsertFailed) + } + } + pub fn insert( + &mut self, + value: CrdsValue, + local_timestamp: u64, + ) -> Result, CrdsError> { + let new_value = self.new_versioned(local_timestamp, value); + self.insert_versioned(new_value) + } + pub fn lookup(&self, label: &CrdsValueLabel) -> Option<&CrdsValue> { + self.table.get(label).map(|x| &x.value) + } + + pub fn lookup_versioned(&self, label: &CrdsValueLabel) -> Option<&VersionedCrdsValue> { + self.table.get(label) + } + + fn update_label_timestamp(&mut self, id: &CrdsValueLabel, now: u64) { + if let Some(e) = self.table.get_mut(id) { + e.local_timestamp = cmp::max(e.local_timestamp, now); + } + } + + /// Update the timestamp's of all the labels that are assosciated with Pubkey + pub fn update_record_timestamp(&mut self, pubkey: Pubkey, now: u64) { + for label in &CrdsValue::record_labels(pubkey) { + self.update_label_timestamp(label, now); + } + } + + /// find all the keys that are older or equal to min_ts + pub fn find_old_labels(&self, min_ts: u64) -> Vec { + self.table + .iter() + .filter_map(|(k, v)| { + if v.local_timestamp <= min_ts { + Some(k) + } else { + None + } + }).cloned() + .collect() + } + + pub fn remove(&mut self, key: &CrdsValueLabel) { + self.table.remove(key); + } +} + +#[cfg(test)] +mod test { + use super::*; + use contact_info::ContactInfo; + use crds_value::LeaderId; + use signature::{Keypair, KeypairUtil}; + + #[test] + fn test_insert() { + let mut crds = Crds::default(); + let val = CrdsValue::LeaderId(LeaderId::default()); + assert_eq!(crds.insert(val.clone(), 0).ok(), Some(None)); + assert_eq!(crds.table.len(), 1); + assert!(crds.table.contains_key(&val.label())); + assert_eq!(crds.table[&val.label()].local_timestamp, 0); + } + #[test] + fn test_update_old() { + let mut crds = Crds::default(); + let val = CrdsValue::LeaderId(LeaderId::default()); + assert_eq!(crds.insert(val.clone(), 0), Ok(None)); + assert_eq!(crds.insert(val.clone(), 1), Err(CrdsError::InsertFailed)); + assert_eq!(crds.table[&val.label()].local_timestamp, 0); + } + #[test] + fn test_update_new() { + let mut crds = Crds::default(); + let original = CrdsValue::LeaderId(LeaderId::default()); + assert_matches!(crds.insert(original.clone(), 0), Ok(_)); + let val = CrdsValue::LeaderId(LeaderId { + id: Pubkey::default(), + leader_id: Pubkey::default(), + wallclock: 1, + }); + assert_eq!( + crds.insert(val.clone(), 1).unwrap().unwrap().value, + original + ); + assert_eq!(crds.table[&val.label()].local_timestamp, 1); + } + #[test] + fn test_update_timestsamp() { + let mut crds = Crds::default(); + let val = CrdsValue::LeaderId(LeaderId::default()); + assert_eq!(crds.insert(val.clone(), 0), Ok(None)); + + crds.update_label_timestamp(&val.label(), 1); + assert_eq!(crds.table[&val.label()].local_timestamp, 1); + assert_eq!(crds.table[&val.label()].insert_timestamp, 0); + + let val2 = CrdsValue::ContactInfo(ContactInfo::default()); + assert_eq!(val2.label().pubkey(), val.label().pubkey()); + assert_matches!(crds.insert(val2.clone(), 0), Ok(None)); + + crds.update_record_timestamp(val.label().pubkey(), 2); + assert_eq!(crds.table[&val.label()].local_timestamp, 2); + assert_eq!(crds.table[&val.label()].insert_timestamp, 0); + assert_eq!(crds.table[&val2.label()].local_timestamp, 2); + assert_eq!(crds.table[&val2.label()].insert_timestamp, 0); + + crds.update_record_timestamp(val.label().pubkey(), 1); + assert_eq!(crds.table[&val.label()].local_timestamp, 2); + assert_eq!(crds.table[&val.label()].insert_timestamp, 0); + + let mut ci = ContactInfo::default(); + ci.wallclock += 1; + let val3 = CrdsValue::ContactInfo(ci); + assert_matches!(crds.insert(val3.clone(), 3), Ok(Some(_))); + assert_eq!(crds.table[&val2.label()].local_timestamp, 3); + assert_eq!(crds.table[&val2.label()].insert_timestamp, 3); + } + #[test] + fn test_find_old_records() { + let mut crds = Crds::default(); + let val = CrdsValue::LeaderId(LeaderId::default()); + assert_eq!(crds.insert(val.clone(), 1), Ok(None)); + + assert!(crds.find_old_labels(0).is_empty()); + assert_eq!(crds.find_old_labels(1), vec![val.label()]); + assert_eq!(crds.find_old_labels(2), vec![val.label()]); + } + #[test] + fn test_remove() { + let mut crds = Crds::default(); + let val = CrdsValue::LeaderId(LeaderId::default()); + assert_matches!(crds.insert(val.clone(), 1), Ok(_)); + + assert_eq!(crds.find_old_labels(1), vec![val.label()]); + crds.remove(&val.label()); + assert!(crds.find_old_labels(1).is_empty()); + } + #[test] + fn test_equal() { + let key = Keypair::new(); + let v1 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + let v2 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + assert!(!(v1 != v2)); + assert!(v1 == v2); + } + #[test] + fn test_hash_order() { + let key = Keypair::new(); + let v1 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + let v2 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: key.pubkey(), + wallclock: 0, + }), + ); + assert!(v1 != v2); + assert!(!(v1 == v2)); + if v1 > v2 { + assert!(v2 < v1) + } else { + assert!(v2 > v1) + } + } + #[test] + fn test_wallclock_order() { + let key = Keypair::new(); + let v1 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: Pubkey::default(), + wallclock: 1, + }), + ); + let v2 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: key.pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + assert!(v1 > v2); + assert!(!(v1 < v2)); + assert!(v1 != v2); + assert!(!(v1 == v2)); + } + #[test] + fn test_label_order() { + let v1 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: Keypair::new().pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + let v2 = VersionedCrdsValue::new( + 1, + CrdsValue::LeaderId(LeaderId { + id: Keypair::new().pubkey(), + leader_id: Pubkey::default(), + wallclock: 0, + }), + ); + assert!(v1 != v2); + assert!(!(v1 == v2)); + assert!(!(v1 < v2)); + assert!(!(v1 > v2)); + assert!(!(v2 < v1)); + assert!(!(v2 > v1)); + } +} diff --git a/src/crds_gossip.rs b/src/crds_gossip.rs new file mode 100644 index 0000000000..07937a69de --- /dev/null +++ b/src/crds_gossip.rs @@ -0,0 +1,486 @@ +//! Crds Gossip +//! This module ties together Crds and the push and pull gossip overlays. The interface is +//! designed to run with a simulator or over a UDP network connection with messages up to a +//! packet::BLOB_DATA_SIZE size. + +use bloom::Bloom; +use crds::Crds; +use crds_gossip_error::CrdsGossipError; +use crds_gossip_pull::CrdsGossipPull; +use crds_gossip_push::{CrdsGossipPush, CRDS_GOSSIP_NUM_ACTIVE}; +use crds_value::CrdsValue; +use hash::Hash; +use solana_sdk::pubkey::Pubkey; + +pub struct CrdsGossip { + pub crds: Crds, + pub id: Pubkey, + push: CrdsGossipPush, + pull: CrdsGossipPull, +} + +impl Default for CrdsGossip { + fn default() -> Self { + CrdsGossip { + crds: Crds::default(), + id: Pubkey::default(), + push: CrdsGossipPush::default(), + pull: CrdsGossipPull::default(), + } + } +} + +impl CrdsGossip { + pub fn set_self(&mut self, id: Pubkey) { + self.id = id; + } + /// process a push message to the network + pub fn process_push_message(&mut self, values: &[CrdsValue], now: u64) -> Vec { + let results: Vec<_> = values + .iter() + .map(|val| { + self.push + .process_push_message(&mut self.crds, val.clone(), now) + }).collect(); + results + .into_iter() + .zip(values) + .filter_map(|(r, d)| { + if r == Err(CrdsGossipError::PushMessagePrune) { + Some(d.label().pubkey()) + } else if let Ok(Some(val)) = r { + self.pull + .record_old_hash(val.value_hash, val.local_timestamp); + None + } else { + None + } + }).collect() + } + + pub fn new_push_messages(&mut self, now: u64) -> (Pubkey, Vec, Vec) { + let (peers, values) = self.push.new_push_messages(&self.crds, now); + (self.id, peers, values) + } + + /// add the `from` to the peer's filter of nodes + pub fn process_prune_msg(&mut self, peer: Pubkey, origin: &[Pubkey]) { + self.push.process_prune_msg(peer, origin) + } + + /// refresh the push active set + /// * ratio - number of actives to rotate + pub fn refresh_push_active_set(&mut self) { + self.push.refresh_push_active_set( + &self.crds, + self.id, + self.pull.pull_request_time.len(), + CRDS_GOSSIP_NUM_ACTIVE, + ) + } + + /// generate a random request + pub fn new_pull_request( + &self, + now: u64, + ) -> Result<(Pubkey, Bloom, CrdsValue), CrdsGossipError> { + self.pull.new_pull_request(&self.crds, self.id, now) + } + + /// time when a request to `from` was initiated + /// This is used for weighted random selection durring `new_pull_request` + /// It's important to use the local nodes request creation time as the weight + /// instaad of the response received time otherwise failed nodes will increase their weight. + pub fn mark_pull_request_creation_time(&mut self, from: Pubkey, now: u64) { + self.pull.mark_pull_request_creation_time(from, now) + } + /// process a pull request and create a response + pub fn process_pull_request( + &mut self, + caller: CrdsValue, + filter: Bloom, + now: u64, + ) -> Vec { + self.pull + .process_pull_request(&mut self.crds, caller, filter, now) + } + /// process a pull response + pub fn process_pull_response( + &mut self, + from: Pubkey, + response: Vec, + now: u64, + ) -> usize { + self.pull + .process_pull_response(&mut self.crds, from, response, now) + } + pub fn purge(&mut self, now: u64) { + if now > self.push.msg_timeout { + let min = now - self.push.msg_timeout; + self.push.purge_old_pending_push_messages(&self.crds, min); + } + if now > 5 * self.push.msg_timeout { + let min = now - 5 * self.push.msg_timeout; + self.push.purge_old_pushed_once_messages(min); + } + if now > self.pull.crds_timeout { + let min = now - self.pull.crds_timeout; + self.pull.purge_active(&mut self.crds, self.id, min); + } + if now > 5 * self.pull.crds_timeout { + let min = now - 5 * self.pull.crds_timeout; + self.pull.purge_purged(min); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use bincode::serialized_size; + use contact_info::ContactInfo; + use crds_gossip_push::CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS; + use crds_value::CrdsValueLabel; + use rayon::prelude::*; + use signature::{Keypair, KeypairUtil}; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; + + type Node = Arc>; + type Network = HashMap; + fn star_network_create(num: usize) -> Network { + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let mut network: HashMap<_, _> = (1..num) + .map(|_| { + let new = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let id = new.label().pubkey(); + let mut node = CrdsGossip::default(); + node.crds.insert(new.clone(), 0).unwrap(); + node.crds.insert(entry.clone(), 0).unwrap(); + node.set_self(id); + (new.label().pubkey(), Arc::new(Mutex::new(node))) + }).collect(); + let mut node = CrdsGossip::default(); + let id = entry.label().pubkey(); + node.crds.insert(entry.clone(), 0).unwrap(); + node.set_self(id); + network.insert(id, Arc::new(Mutex::new(node))); + network + } + + fn rstar_network_create(num: usize) -> Network { + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let mut origin = CrdsGossip::default(); + let id = entry.label().pubkey(); + origin.crds.insert(entry.clone(), 0).unwrap(); + origin.set_self(id); + let mut network: HashMap<_, _> = (1..num) + .map(|_| { + let new = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let id = new.label().pubkey(); + let mut node = CrdsGossip::default(); + node.crds.insert(new.clone(), 0).unwrap(); + origin.crds.insert(new.clone(), 0).unwrap(); + node.set_self(id); + (new.label().pubkey(), Arc::new(Mutex::new(node))) + }).collect(); + network.insert(id, Arc::new(Mutex::new(origin))); + network + } + + fn ring_network_create(num: usize) -> Network { + let mut network: HashMap<_, _> = (0..num) + .map(|_| { + let new = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let id = new.label().pubkey(); + let mut node = CrdsGossip::default(); + node.crds.insert(new.clone(), 0).unwrap(); + node.set_self(id); + (new.label().pubkey(), Arc::new(Mutex::new(node))) + }).collect(); + let keys: Vec = network.keys().cloned().collect(); + for k in 0..keys.len() { + let start_info = { + let start = &network[&keys[k]]; + let start_id = start.lock().unwrap().id.clone(); + start + .lock() + .unwrap() + .crds + .lookup(&CrdsValueLabel::ContactInfo(start_id)) + .unwrap() + .clone() + }; + let end = network.get_mut(&keys[(k + 1) % keys.len()]).unwrap(); + end.lock().unwrap().crds.insert(start_info, 0).unwrap(); + } + network + } + + fn network_simulator_pull_only(network: &mut Network) { + let num = network.len(); + let (converged, bytes_tx) = network_run_pull(network, 0, num * 2, 0.9); + trace!( + "network_simulator_pull_{}: converged: {} total_bytes: {}", + num, + converged, + bytes_tx + ); + assert!(converged >= 0.9); + } + + fn network_simulator(network: &mut Network) { + let num = network.len(); + // run for a small amount of time + let (converged, bytes_tx) = network_run_pull(network, 0, 10, 1.0); + trace!("network_simulator_push_{}: converged: {}", num, converged); + // make sure there is someone in the active set + let network_values: Vec = network.values().cloned().collect(); + network_values.par_iter().for_each(|node| { + node.lock().unwrap().refresh_push_active_set(); + }); + let mut total_bytes = bytes_tx; + for second in 1..num { + let start = second * 10; + let end = (second + 1) * 10; + let now = (start * 100) as u64; + // push a message to the network + network_values.par_iter().for_each(|locked_node| { + let node = &mut locked_node.lock().unwrap(); + let mut m = node + .crds + .lookup(&CrdsValueLabel::ContactInfo(node.id)) + .and_then(|v| v.contact_info().cloned()) + .unwrap(); + m.wallclock = now; + node.process_push_message(&[CrdsValue::ContactInfo(m.clone())], now); + }); + // push for a bit + let (queue_size, bytes_tx) = network_run_push(network, start, end); + total_bytes += bytes_tx; + trace!( + "network_simulator_push_{}: queue_size: {} bytes: {}", + num, + queue_size, + bytes_tx + ); + // pull for a bit + let (converged, bytes_tx) = network_run_pull(network, start, end, 1.0); + total_bytes += bytes_tx; + trace!( + "network_simulator_push_{}: converged: {} bytes: {} total_bytes: {}", + num, + converged, + bytes_tx, + total_bytes + ); + if converged > 0.9 { + break; + } + } + } + + fn network_run_push(network: &mut Network, start: usize, end: usize) -> (usize, usize) { + let mut bytes: usize = 0; + let mut num_msgs: usize = 0; + let mut total: usize = 0; + let num = network.len(); + let mut prunes: usize = 0; + let mut delivered: usize = 0; + let network_values: Vec = network.values().cloned().collect(); + for t in start..end { + let now = t as u64 * 100; + let requests: Vec<_> = network_values + .par_iter() + .map(|node| { + node.lock().unwrap().purge(now); + node.lock().unwrap().new_push_messages(now) + }).collect(); + let transfered: Vec<_> = requests + .par_iter() + .map(|(from, peers, msgs)| { + let mut bytes: usize = 0; + let mut delivered: usize = 0; + let mut num_msgs: usize = 0; + let mut prunes: usize = 0; + for to in peers { + bytes += serialized_size(msgs).unwrap() as usize; + num_msgs += 1; + let rsps = network + .get(&to) + .map(|node| node.lock().unwrap().process_push_message(&msgs, now)) + .unwrap(); + bytes += serialized_size(&rsps).unwrap() as usize; + prunes += rsps.len(); + network + .get(&from) + .map(|node| node.lock().unwrap().process_prune_msg(*to, &rsps)) + .unwrap(); + delivered += rsps.is_empty() as usize; + } + (bytes, delivered, num_msgs, prunes) + }).collect(); + for (b, d, m, p) in transfered { + bytes += b; + delivered += d; + num_msgs += m; + prunes += p; + } + if now % CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS == 0 && now > 0 { + network_values.par_iter().for_each(|node| { + node.lock().unwrap().refresh_push_active_set(); + }); + } + total = network_values + .par_iter() + .map(|v| v.lock().unwrap().push.num_pending()) + .sum(); + trace!( + "network_run_push_{}: now: {} queue: {} bytes: {} num_msgs: {} prunes: {} delivered: {}", + num, + now, + total, + bytes, + num_msgs, + prunes, + delivered, + ); + } + (total, bytes) + } + + fn network_run_pull( + network: &mut Network, + start: usize, + end: usize, + max_convergance: f64, + ) -> (f64, usize) { + let mut bytes: usize = 0; + let mut msgs: usize = 0; + let mut overhead: usize = 0; + let mut convergance = 0f64; + let num = network.len(); + let network_values: Vec = network.values().cloned().collect(); + for t in start..end { + let now = t as u64 * 100; + let mut requests: Vec<_> = { + network_values + .par_iter() + .filter_map(|from| from.lock().unwrap().new_pull_request(now).ok()) + .collect() + }; + let transfered: Vec<_> = requests + .into_par_iter() + .map(|(to, request, caller_info)| { + let mut bytes: usize = 0; + let mut msgs: usize = 0; + let mut overhead: usize = 0; + let from = caller_info.label().pubkey(); + bytes += request.keys.len(); + bytes += (request.bits.len() / 8) as usize; + bytes += serialized_size(&caller_info).unwrap() as usize; + let rsp = network + .get(&to) + .map(|node| { + node.lock() + .unwrap() + .process_pull_request(caller_info, request, now) + }).unwrap(); + bytes += serialized_size(&rsp).unwrap() as usize; + msgs += rsp.len(); + network.get(&from).map(|node| { + node.lock() + .unwrap() + .mark_pull_request_creation_time(from, now); + overhead += node.lock().unwrap().process_pull_response(from, rsp, now); + }); + (bytes, msgs, overhead) + }).collect(); + for (b, m, o) in transfered { + bytes += b; + msgs += m; + overhead += o; + } + let total: usize = network_values + .par_iter() + .map(|v| v.lock().unwrap().crds.table.len()) + .sum(); + convergance = total as f64 / ((num * num) as f64); + if convergance > max_convergance { + break; + } + trace!( + "network_run_pull_{}: now: {} connections: {} convergance: {} bytes: {} msgs: {} overhead: {}", + num, + now, + total, + convergance, + bytes, + msgs, + overhead + ); + } + (convergance, bytes) + } + + #[test] + fn test_star_network_pull_50() { + let mut network = star_network_create(50); + network_simulator_pull_only(&mut network); + } + #[test] + fn test_star_network_pull_100() { + let mut network = star_network_create(100); + network_simulator_pull_only(&mut network); + } + #[test] + fn test_star_network_push_star_200() { + let mut network = star_network_create(200); + network_simulator(&mut network); + } + #[test] + fn test_star_network_push_rstar_200() { + let mut network = rstar_network_create(200); + network_simulator(&mut network); + } + #[test] + fn test_star_network_push_ring_200() { + let mut network = ring_network_create(200); + network_simulator(&mut network); + } + #[test] + #[ignore] + fn test_star_network_large_pull() { + use logger; + logger::setup(); + let mut network = star_network_create(2000); + network_simulator_pull_only(&mut network); + } + #[test] + #[ignore] + fn test_rstar_network_large_push() { + use logger; + logger::setup(); + let mut network = rstar_network_create(4000); + network_simulator(&mut network); + } + #[test] + #[ignore] + fn test_ring_network_large_push() { + use logger; + logger::setup(); + let mut network = ring_network_create(4001); + network_simulator(&mut network); + } + #[test] + #[ignore] + fn test_star_network_large_push() { + use logger; + logger::setup(); + let mut network = star_network_create(4002); + network_simulator(&mut network); + } +} diff --git a/src/crds_gossip_error.rs b/src/crds_gossip_error.rs new file mode 100644 index 0000000000..d9d00ce77c --- /dev/null +++ b/src/crds_gossip_error.rs @@ -0,0 +1,7 @@ +#[derive(PartialEq, Debug)] +pub enum CrdsGossipError { + NoPeers, + PushMessageTimeout, + PushMessagePrune, + PushMessageOldVersion, +} diff --git a/src/crds_gossip_pull.rs b/src/crds_gossip_pull.rs new file mode 100644 index 0000000000..2555aa56e1 --- /dev/null +++ b/src/crds_gossip_pull.rs @@ -0,0 +1,378 @@ +//! Crds Gossip Pull overlay +//! This module implements the anti-entropy protocol for the network. +//! +//! The basic strategy is as follows: +//! 1. Construct a bloom filter of the local data set +//! 2. Randomly ask a node on the network for data that is not contained in the bloom filter. +//! +//! Bloom filters have a false positive rate. Each requests uses a different bloom filter +//! with random hash functions. So each subsequent request will have a different distribution +//! of false positives. + +use bincode::serialized_size; +use bloom::Bloom; +use crds::Crds; +use crds_gossip_error::CrdsGossipError; +use crds_value::{CrdsValue, CrdsValueLabel}; +use hash::Hash; +use packet::BLOB_DATA_SIZE; +use rand; +use rand::distributions::{Distribution, Weighted, WeightedChoice}; +use solana_sdk::pubkey::Pubkey; +use std::cmp; +use std::collections::HashMap; +use std::collections::VecDeque; + +pub const CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS: u64 = 15000; + +pub struct CrdsGossipPull { + /// timestamp of last request + pub pull_request_time: HashMap, + /// hash and insert time + purged_values: VecDeque<(Hash, u64)>, + /// max bytes per message + pub max_bytes: usize, + pub crds_timeout: u64, +} + +impl Default for CrdsGossipPull { + fn default() -> Self { + Self { + purged_values: VecDeque::new(), + pull_request_time: HashMap::new(), + max_bytes: BLOB_DATA_SIZE, + crds_timeout: CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, + } + } +} +impl CrdsGossipPull { + /// generate a random request + pub fn new_pull_request( + &self, + crds: &Crds, + self_id: Pubkey, + now: u64, + ) -> Result<(Pubkey, Bloom, CrdsValue), CrdsGossipError> { + let mut options: Vec<_> = crds + .table + .values() + .filter_map(|v| v.value.contact_info()) + .filter(|v| { + v.id != self_id && !v.ncp.ip().is_unspecified() && !v.ncp.ip().is_multicast() + }).map(|item| { + let req_time: u64 = *self.pull_request_time.get(&item.id).unwrap_or(&0); + let weight = cmp::max( + 1, + cmp::min(u64::from(u16::max_value()) - 1, (now - req_time) / 1024) as u32, + ); + Weighted { weight, item } + }).collect(); + if options.is_empty() { + return Err(CrdsGossipError::NoPeers); + } + let filter = self.build_crds_filter(crds); + let random = WeightedChoice::new(&mut options).sample(&mut rand::thread_rng()); + let self_info = crds + .lookup(&CrdsValueLabel::ContactInfo(self_id)) + .unwrap_or_else(|| panic!("self_id invalid {}", self_id)); + Ok((random.id, filter, self_info.clone())) + } + + /// time when a request to `from` was initiated + /// This is used for weighted random selection during `new_pull_request` + /// It's important to use the local nodes request creation time as the weight + /// instead of the response received time otherwise failed nodes will increase their weight. + pub fn mark_pull_request_creation_time(&mut self, from: Pubkey, now: u64) { + self.pull_request_time.insert(from, now); + } + + /// Store an old hash in the purged values set + pub fn record_old_hash(&mut self, hash: Hash, timestamp: u64) { + self.purged_values.push_back((hash, timestamp)) + } + + /// process a pull request and create a response + pub fn process_pull_request( + &mut self, + crds: &mut Crds, + caller: CrdsValue, + mut filter: Bloom, + now: u64, + ) -> Vec { + let rv = self.filter_crds_values(crds, &mut filter); + let key = caller.label().pubkey(); + let old = crds.insert(caller, now); + if let Some(val) = old.ok().and_then(|opt| opt) { + self.purged_values + .push_back((val.value_hash, val.local_timestamp)) + } + crds.update_record_timestamp(key, now); + rv + } + /// process a pull response + pub fn process_pull_response( + &mut self, + crds: &mut Crds, + from: Pubkey, + response: Vec, + now: u64, + ) -> usize { + let mut failed = 0; + for r in response { + let owner = r.label().pubkey(); + let old = crds.insert(r, now); + failed += old.is_err() as usize; + old.ok().map(|opt| { + crds.update_record_timestamp(owner, now); + opt.map(|val| { + self.purged_values + .push_back((val.value_hash, val.local_timestamp)) + }) + }); + } + crds.update_record_timestamp(from, now); + failed + } + /// build a filter of the current crds table + fn build_crds_filter(&self, crds: &Crds) -> Bloom { + let num = crds.table.values().count() + self.purged_values.len(); + let mut bloom = Bloom::random(num, 0.1, 4 * 1024 * 8 - 1); + for v in crds.table.values() { + bloom.add(&v.value_hash); + } + for (value_hash, _insert_timestamp) in &self.purged_values { + bloom.add(value_hash); + } + bloom + } + /// filter values that fail the bloom filter up to max_bytes + fn filter_crds_values(&self, crds: &Crds, filter: &mut Bloom) -> Vec { + let mut max_bytes = self.max_bytes as isize; + let mut ret = vec![]; + for v in crds.table.values() { + if filter.contains(&v.value_hash) { + continue; + } + max_bytes -= serialized_size(&v.value).unwrap() as isize; + if max_bytes < 0 { + break; + } + ret.push(v.value.clone()); + } + ret + } + /// Purge values from the crds that are older then `active_timeout` + /// The value_hash of an active item is put into self.purged_values queue + pub fn purge_active(&mut self, crds: &mut Crds, self_id: Pubkey, min_ts: u64) { + let old = crds.find_old_labels(min_ts); + let mut purged: VecDeque<_> = old + .iter() + .filter(|label| label.pubkey() != self_id) + .filter_map(|label| { + let rv = crds + .lookup_versioned(label) + .map(|val| (val.value_hash, val.local_timestamp)); + crds.remove(label); + rv + }).collect(); + self.purged_values.append(&mut purged); + } + /// Purge values from the `self.purged_values` queue that are older then purge_timeout + pub fn purge_purged(&mut self, min_ts: u64) { + let cnt = self + .purged_values + .iter() + .take_while(|v| v.1 < min_ts) + .count(); + self.purged_values.drain(..cnt); + } +} +#[cfg(test)] +mod test { + use super::*; + use contact_info::ContactInfo; + use crds_value::LeaderId; + use signature::{Keypair, KeypairUtil}; + + #[test] + fn test_new_pull_request() { + let mut crds = Crds::default(); + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let id = entry.label().pubkey(); + let node = CrdsGossipPull::default(); + assert_eq!( + node.new_pull_request(&crds, id, 0), + Err(CrdsGossipError::NoPeers) + ); + + crds.insert(entry.clone(), 0).unwrap(); + assert_eq!( + node.new_pull_request(&crds, id, 0), + Err(CrdsGossipError::NoPeers) + ); + + let new = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + crds.insert(new.clone(), 0).unwrap(); + let req = node.new_pull_request(&crds, id, 0); + let (to, _, self_info) = req.unwrap(); + assert_eq!(to, new.label().pubkey()); + assert_eq!(self_info, entry); + } + + #[test] + fn test_new_mark_creation_time() { + let mut crds = Crds::default(); + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let node_id = entry.label().pubkey(); + let mut node = CrdsGossipPull::default(); + crds.insert(entry.clone(), 0).unwrap(); + let old = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + crds.insert(old.clone(), 0).unwrap(); + let new = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + crds.insert(new.clone(), 0).unwrap(); + + // set request creation time to max_value + node.mark_pull_request_creation_time(new.label().pubkey(), u64::max_value()); + + // odds of getting the other request should be 1 in u64::max_value() + for _ in 0..10 { + let req = node.new_pull_request(&crds, node_id, u64::max_value()); + let (to, _, self_info) = req.unwrap(); + assert_eq!(to, old.label().pubkey()); + assert_eq!(self_info, entry); + } + } + + #[test] + fn test_process_pull_request() { + let mut node_crds = Crds::default(); + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let node_id = entry.label().pubkey(); + let node = CrdsGossipPull::default(); + node_crds.insert(entry.clone(), 0).unwrap(); + let new = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + node_crds.insert(new.clone(), 0).unwrap(); + let req = node.new_pull_request(&node_crds, node_id, 0); + + let mut dest_crds = Crds::default(); + let mut dest = CrdsGossipPull::default(); + let (_, filter, caller) = req.unwrap(); + let rsp = dest.process_pull_request(&mut dest_crds, caller.clone(), filter, 1); + assert!(rsp.is_empty()); + assert!(dest_crds.lookup(&caller.label()).is_some()); + assert_eq!( + dest_crds + .lookup_versioned(&caller.label()) + .unwrap() + .insert_timestamp, + 1 + ); + assert_eq!( + dest_crds + .lookup_versioned(&caller.label()) + .unwrap() + .local_timestamp, + 1 + ); + } + #[test] + fn test_process_pull_request_response() { + let mut node_crds = Crds::default(); + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let node_id = entry.label().pubkey(); + let mut node = CrdsGossipPull::default(); + node_crds.insert(entry.clone(), 0).unwrap(); + let new = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + node_crds.insert(new.clone(), 0).unwrap(); + + let mut dest = CrdsGossipPull::default(); + let mut dest_crds = Crds::default(); + let new = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + dest_crds.insert(new.clone(), 0).unwrap(); + + // node contains a key from the dest node, but at an older local timestamp + let dest_id = new.label().pubkey(); + let same_key = CrdsValue::LeaderId(LeaderId { + id: dest_id, + leader_id: dest_id, + wallclock: 1, + }); + node_crds.insert(same_key.clone(), 0).unwrap(); + assert_eq!( + node_crds + .lookup_versioned(&same_key.label()) + .unwrap() + .local_timestamp, + 0 + ); + let mut done = false; + for _ in 0..30 { + // there is a chance of a false positive with bloom filters + let req = node.new_pull_request(&node_crds, node_id, 0); + let (_, filter, caller) = req.unwrap(); + let rsp = dest.process_pull_request(&mut dest_crds, caller, filter, 0); + // if there is a false positive this is empty + // prob should be around 0.1 per iteration + if rsp.is_empty() { + continue; + } + + assert_eq!(rsp.len(), 1); + let failed = node.process_pull_response(&mut node_crds, node_id, rsp, 1); + assert_eq!(failed, 0); + assert_eq!( + node_crds + .lookup_versioned(&new.label()) + .unwrap() + .local_timestamp, + 1 + ); + // verify that the whole record was updated for dest since this is a response from dest + assert_eq!( + node_crds + .lookup_versioned(&same_key.label()) + .unwrap() + .local_timestamp, + 1 + ); + done = true; + break; + } + assert!(done); + } + #[test] + fn test_gossip_purge() { + let mut node_crds = Crds::default(); + let entry = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let node_label = entry.label(); + let node_id = node_label.pubkey(); + let mut node = CrdsGossipPull::default(); + node_crds.insert(entry.clone(), 0).unwrap(); + let old = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + node_crds.insert(old.clone(), 0).unwrap(); + let value_hash = node_crds.lookup_versioned(&old.label()).unwrap().value_hash; + + //verify self is valid + assert_eq!(node_crds.lookup(&node_label).unwrap().label(), node_label); + + // purge + node.purge_active(&mut node_crds, node_id, 1); + + //verify self is still valid after purge + assert_eq!(node_crds.lookup(&node_label).unwrap().label(), node_label); + + assert_eq!(node_crds.lookup_versioned(&old.label()), None); + assert_eq!(node.purged_values.len(), 1); + for _ in 0..30 { + // there is a chance of a false positive with bloom filters + // assert that purged value is still in the set + // chance of 30 consecutive false positives is 0.1^30 + let mut filter = node.build_crds_filter(&node_crds); + assert!(filter.contains(&value_hash)); + } + + // purge the value + node.purge_purged(1); + assert_eq!(node.purged_values.len(), 0); + } +} diff --git a/src/crds_gossip_push.rs b/src/crds_gossip_push.rs new file mode 100644 index 0000000000..79d2e1efae --- /dev/null +++ b/src/crds_gossip_push.rs @@ -0,0 +1,453 @@ +//! Crds Gossip Push overlay +//! This module is used to propagate recently created CrdsValues across the network +//! Eager push strategy is based on Plumtree +//! http://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf +//! +//! Main differences are: +//! 1. There is no `max hop`. Messages are signed with a local wallclock. If they are outside of +//! the local nodes wallclock window they are drooped silently. +//! 2. The prune set is stored in a Bloom filter. + +use bincode::serialized_size; +use bloom::Bloom; +use crds::{Crds, VersionedCrdsValue}; +use crds_gossip_error::CrdsGossipError; +use crds_value::{CrdsValue, CrdsValueLabel}; +use hash::Hash; +use indexmap::map::IndexMap; +use packet::BLOB_DATA_SIZE; +use rand::{self, Rng}; +use solana_sdk::pubkey::Pubkey; +use std::cmp; +use std::collections::HashMap; + +pub const CRDS_GOSSIP_NUM_ACTIVE: usize = 30; +pub const CRDS_GOSSIP_PUSH_FANOUT: usize = 6; +pub const CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS: u64 = 5000; + +pub struct CrdsGossipPush { + /// max bytes per message + pub max_bytes: usize, + /// active set of validators for push + active_set: IndexMap>, + /// push message queue + push_messages: HashMap, + pushed_once: HashMap, + pub num_active: usize, + pub push_fanout: usize, + pub msg_timeout: u64, +} + +impl Default for CrdsGossipPush { + fn default() -> Self { + Self { + max_bytes: BLOB_DATA_SIZE, + active_set: IndexMap::new(), + push_messages: HashMap::new(), + pushed_once: HashMap::new(), + num_active: CRDS_GOSSIP_NUM_ACTIVE, + push_fanout: CRDS_GOSSIP_PUSH_FANOUT, + msg_timeout: CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS, + } + } +} +impl CrdsGossipPush { + pub fn num_pending(&self) -> usize { + self.push_messages.len() + } + /// process a push message to the network + pub fn process_push_message( + &mut self, + crds: &mut Crds, + value: CrdsValue, + now: u64, + ) -> Result, CrdsGossipError> { + if now > value.wallclock() + self.msg_timeout { + return Err(CrdsGossipError::PushMessageTimeout); + } + if now + self.msg_timeout < value.wallclock() { + return Err(CrdsGossipError::PushMessageTimeout); + } + let label = value.label(); + + let new_value = crds.new_versioned(now, value); + let value_hash = new_value.value_hash; + if self.pushed_once.get(&value_hash).is_some() { + return Err(CrdsGossipError::PushMessagePrune); + } + let old = crds.insert_versioned(new_value); + if old.is_err() { + return Err(CrdsGossipError::PushMessageOldVersion); + } + self.push_messages.insert(label, value_hash); + self.pushed_once.insert(value_hash, now); + Ok(old.ok().and_then(|opt| opt)) + } + + /// New push message to broadcast to peers. + /// Returns a list of Pubkeys for the selected peers and a list of values to send to all the + /// peers. + /// The list of push messages is created such that all the randomly selected peers have not + /// pruned the source addresses. + pub fn new_push_messages(&mut self, crds: &Crds, now: u64) -> (Vec, Vec) { + let max = self.active_set.len(); + let mut nodes: Vec<_> = (0..max).collect(); + rand::thread_rng().shuffle(&mut nodes); + let peers: Vec = nodes + .into_iter() + .filter_map(|n| self.active_set.get_index(n)) + .take(self.push_fanout) + .map(|n| *n.0) + .collect(); + let mut total_bytes: usize = 0; + let mut values = vec![]; + for (label, hash) in &self.push_messages { + let mut failed = false; + for p in &peers { + let filter = self.active_set.get_mut(p); + failed |= filter.is_none() || filter.unwrap().contains(&label.pubkey()); + } + if failed { + continue; + } + let res = crds.lookup_versioned(label); + if res.is_none() { + continue; + } + let version = res.unwrap(); + if version.value_hash != *hash { + continue; + } + let value = &version.value; + if value.wallclock() > now || value.wallclock() + self.msg_timeout < now { + continue; + } + total_bytes += serialized_size(value).unwrap() as usize; + if total_bytes > self.max_bytes { + break; + } + values.push(value.clone()); + } + for v in &values { + self.push_messages.remove(&v.label()); + } + (peers, values) + } + + /// add the `from` to the peer's filter of nodes + pub fn process_prune_msg(&mut self, peer: Pubkey, origins: &[Pubkey]) { + for origin in origins { + if let Some(p) = self.active_set.get_mut(&peer) { + p.add(origin) + } + } + } + + fn compute_need(num_active: usize, active_set_len: usize, ratio: usize) -> usize { + let num = active_set_len / ratio; + cmp::min(num_active, (num_active - active_set_len) + num) + } + + /// refresh the push active set + /// * ratio - active_set.len()/ratio is the number of actives to rotate + pub fn refresh_push_active_set( + &mut self, + crds: &Crds, + self_id: Pubkey, + network_size: usize, + ratio: usize, + ) { + let need = Self::compute_need(self.num_active, self.active_set.len(), ratio); + let mut new_items = HashMap::new(); + let mut ixs: Vec<_> = (0..crds.table.len()).collect(); + rand::thread_rng().shuffle(&mut ixs); + + for ix in ixs { + let item = crds.table.get_index(ix); + if item.is_none() { + continue; + } + let val = item.unwrap(); + if val.0.pubkey() == self_id { + continue; + } + if self.active_set.get(&val.0.pubkey()).is_some() { + continue; + } + if new_items.get(&val.0.pubkey()).is_some() { + continue; + } + let bloom = Bloom::random(network_size, 0.1, 1024 * 8 * 4); + new_items.insert(val.0.pubkey(), bloom); + if new_items.len() == need { + break; + } + } + let mut keys: Vec = self.active_set.keys().cloned().collect(); + rand::thread_rng().shuffle(&mut keys); + let num = keys.len() / ratio; + for k in &keys[..num] { + self.active_set.remove(k); + } + for (k, v) in new_items { + self.active_set.insert(k, v); + } + } + + /// purge old pending push messages + pub fn purge_old_pending_push_messages(&mut self, crds: &Crds, min_time: u64) { + let old_msgs: Vec = self + .push_messages + .iter() + .filter_map(|(k, hash)| { + if let Some(versioned) = crds.lookup_versioned(k) { + if versioned.value.wallclock() < min_time || versioned.value_hash != *hash { + Some(k) + } else { + None + } + } else { + Some(k) + } + }).cloned() + .collect(); + for k in old_msgs { + self.push_messages.remove(&k); + } + } + /// purge old pushed_once messages + pub fn purge_old_pushed_once_messages(&mut self, min_time: u64) { + let old_msgs: Vec = self + .pushed_once + .iter() + .filter_map(|(k, v)| if *v < min_time { Some(k) } else { None }) + .cloned() + .collect(); + for k in old_msgs { + self.pushed_once.remove(&k); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use contact_info::ContactInfo; + use signature::{Keypair, KeypairUtil}; + #[test] + fn test_process_push() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let value = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + let label = value.label(); + // push a new message + assert_eq!( + push.process_push_message(&mut crds, value.clone(), 0), + Ok(None) + ); + assert_eq!(crds.lookup(&label), Some(&value)); + + // push it again + assert_eq!( + push.process_push_message(&mut crds, value.clone(), 0), + Err(CrdsGossipError::PushMessagePrune) + ); + } + #[test] + fn test_process_push_old_version() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let mut ci = ContactInfo::new_localhost(Keypair::new().pubkey(), 0); + ci.wallclock = 1; + let value = CrdsValue::ContactInfo(ci.clone()); + + // push a new message + assert_eq!(push.process_push_message(&mut crds, value, 0), Ok(None)); + + // push an old version + ci.wallclock = 0; + let value = CrdsValue::ContactInfo(ci.clone()); + assert_eq!( + push.process_push_message(&mut crds, value, 0), + Err(CrdsGossipError::PushMessageOldVersion) + ); + } + #[test] + fn test_process_push_timeout() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let timeout = push.msg_timeout; + let mut ci = ContactInfo::new_localhost(Keypair::new().pubkey(), 0); + + // push a version to far in the future + ci.wallclock = timeout + 1; + let value = CrdsValue::ContactInfo(ci.clone()); + assert_eq!( + push.process_push_message(&mut crds, value, 0), + Err(CrdsGossipError::PushMessageTimeout) + ); + + // push a version to far in the past + ci.wallclock = 0; + let value = CrdsValue::ContactInfo(ci.clone()); + assert_eq!( + push.process_push_message(&mut crds, value, timeout + 1), + Err(CrdsGossipError::PushMessageTimeout) + ); + } + #[test] + fn test_process_push_update() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let mut ci = ContactInfo::new_localhost(Keypair::new().pubkey(), 0); + ci.wallclock = 0; + let value_old = CrdsValue::ContactInfo(ci.clone()); + + // push a new message + assert_eq!( + push.process_push_message(&mut crds, value_old.clone(), 0), + Ok(None) + ); + + // push an old version + ci.wallclock = 1; + let value = CrdsValue::ContactInfo(ci.clone()); + assert_eq!( + push.process_push_message(&mut crds, value, 0) + .unwrap() + .unwrap() + .value, + value_old + ); + } + #[test] + fn test_compute_need() { + assert_eq!(CrdsGossipPush::compute_need(30, 0, 10), 30); + assert_eq!(CrdsGossipPush::compute_need(30, 1, 10), 29); + assert_eq!(CrdsGossipPush::compute_need(30, 30, 10), 3); + assert_eq!(CrdsGossipPush::compute_need(30, 29, 10), 3); + } + #[test] + fn test_refresh_active_set() { + use logger; + logger::setup(); + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let value1 = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + + assert_eq!(crds.insert(value1.clone(), 0), Ok(None)); + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + + assert!(push.active_set.get(&value1.label().pubkey()).is_some()); + let value2 = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert!(push.active_set.get(&value2.label().pubkey()).is_none()); + assert_eq!(crds.insert(value2.clone(), 0), Ok(None)); + for _ in 0..30 { + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + if push.active_set.get(&value2.label().pubkey()).is_some() { + break; + } + } + assert!(push.active_set.get(&value2.label().pubkey()).is_some()); + + for _ in 0..push.num_active { + let value2 = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!(crds.insert(value2.clone(), 0), Ok(None)); + } + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + assert_eq!(push.active_set.len(), push.num_active); + } + #[test] + fn test_new_push_messages() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let peer = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!(crds.insert(peer.clone(), 0), Ok(None)); + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + + let new_msg = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!( + push.process_push_message(&mut crds, new_msg.clone(), 0), + Ok(None) + ); + assert_eq!(push.active_set.len(), 1); + assert_eq!( + push.new_push_messages(&crds, 0), + (vec![peer.label().pubkey()], vec![new_msg]) + ); + } + #[test] + fn test_process_prune() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let peer = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!(crds.insert(peer.clone(), 0), Ok(None)); + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + + let new_msg = + CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!( + push.process_push_message(&mut crds, new_msg.clone(), 0), + Ok(None) + ); + push.process_prune_msg(peer.label().pubkey(), &[new_msg.label().pubkey()]); + assert_eq!( + push.new_push_messages(&crds, 0), + (vec![peer.label().pubkey()], vec![]) + ); + } + #[test] + fn test_purge_old_pending_push_messages() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let peer = CrdsValue::ContactInfo(ContactInfo::new_localhost(Keypair::new().pubkey(), 0)); + assert_eq!(crds.insert(peer.clone(), 0), Ok(None)); + push.refresh_push_active_set(&crds, Pubkey::default(), 1, 1); + + let mut ci = ContactInfo::new_localhost(Keypair::new().pubkey(), 0); + ci.wallclock = 1; + let new_msg = CrdsValue::ContactInfo(ci.clone()); + assert_eq!( + push.process_push_message(&mut crds, new_msg.clone(), 1), + Ok(None) + ); + push.purge_old_pending_push_messages(&crds, 0); + assert_eq!( + push.new_push_messages(&crds, 0), + (vec![peer.label().pubkey()], vec![]) + ); + } + + #[test] + fn test_purge_old_pushed_once_messages() { + let mut crds = Crds::default(); + let mut push = CrdsGossipPush::default(); + let mut ci = ContactInfo::new_localhost(Keypair::new().pubkey(), 0); + ci.wallclock = 0; + let value = CrdsValue::ContactInfo(ci.clone()); + let label = value.label(); + // push a new message + assert_eq!( + push.process_push_message(&mut crds, value.clone(), 0), + Ok(None) + ); + assert_eq!(crds.lookup(&label), Some(&value)); + + // push it again + assert_eq!( + push.process_push_message(&mut crds, value.clone(), 0), + Err(CrdsGossipError::PushMessagePrune) + ); + + // purge the old pushed + push.purge_old_pushed_once_messages(1); + + // push it again + assert_eq!( + push.process_push_message(&mut crds, value.clone(), 0), + Err(CrdsGossipError::PushMessageOldVersion) + ); + } +} diff --git a/src/crds_traits_impls.rs b/src/crds_traits_impls.rs new file mode 100644 index 0000000000..b2c9db2142 --- /dev/null +++ b/src/crds_traits_impls.rs @@ -0,0 +1,26 @@ +use bloom::BloomHashIndex; +use hash::Hash; +use solana_sdk::pubkey::Pubkey; + +fn slice_hash(slice: &[u8], hash_index: u64) -> u64 { + let len = slice.len(); + assert!(len < 256); + let mut rv = 0u64; + for i in 0..8 { + let pos = (hash_index >> i) & 0xff; + rv |= u64::from(slice[pos as usize % len]) << i; + } + rv +} + +impl BloomHashIndex for Pubkey { + fn hash(&self, hash_index: u64) -> u64 { + slice_hash(self.as_ref(), hash_index) + } +} + +impl BloomHashIndex for Hash { + fn hash(&self, hash_index: u64) -> u64 { + slice_hash(self.as_ref(), hash_index) + } +} diff --git a/src/crds_value.rs b/src/crds_value.rs new file mode 100644 index 0000000000..512d490a41 --- /dev/null +++ b/src/crds_value.rs @@ -0,0 +1,147 @@ +use contact_info::ContactInfo; +use solana_sdk::pubkey::Pubkey; +use std::fmt; +use transaction::Transaction; + +/// CrdsValue that is replicated across the cluster +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +pub enum CrdsValue { + /// * Merge Strategy - Latest wallclock is picked + ContactInfo(ContactInfo), + /// TODO, Votes need a height potentially in the userdata + /// * Merge Strategy - Latest height is picked + Vote(Vote), + /// * Merge Strategy - Latest wallclock is picked + LeaderId(LeaderId), +} + +#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)] +pub struct LeaderId { + pub id: Pubkey, + pub leader_id: Pubkey, + pub wallclock: u64, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +pub struct Vote { + pub transaction: Transaction, + pub height: u64, + pub wallclock: u64, +} + +/// Type of the replicated value +/// These are labels for values in a record that is assosciated with `Pubkey` +#[derive(PartialEq, Hash, Eq, Clone, Debug)] +pub enum CrdsValueLabel { + ContactInfo(Pubkey), + Vote(Pubkey), + LeaderId(Pubkey), +} + +impl fmt::Display for CrdsValueLabel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CrdsValueLabel::ContactInfo(_) => write!(f, "ContactInfo({})", self.pubkey()), + CrdsValueLabel::Vote(_) => write!(f, "Vote({})", self.pubkey()), + CrdsValueLabel::LeaderId(_) => write!(f, "LeaderId({})", self.pubkey()), + } + } +} + +impl CrdsValueLabel { + pub fn pubkey(&self) -> Pubkey { + match self { + CrdsValueLabel::ContactInfo(p) => *p, + CrdsValueLabel::Vote(p) => *p, + CrdsValueLabel::LeaderId(p) => *p, + } + } +} + +impl CrdsValue { + /// Totally unsecure unverfiable wallclock of the node that generatd this message + /// Latest wallclock is always picked. + /// This is used to time out push messages. + pub fn wallclock(&self) -> u64 { + match self { + CrdsValue::ContactInfo(contact_info) => contact_info.wallclock, + CrdsValue::Vote(vote) => vote.wallclock, + CrdsValue::LeaderId(leader_id) => leader_id.wallclock, + } + } + pub fn label(&self) -> CrdsValueLabel { + match self { + CrdsValue::ContactInfo(contact_info) => CrdsValueLabel::ContactInfo(contact_info.id), + CrdsValue::Vote(vote) => CrdsValueLabel::Vote(vote.transaction.account_keys[0]), + CrdsValue::LeaderId(leader_id) => CrdsValueLabel::LeaderId(leader_id.id), + } + } + pub fn contact_info(&self) -> Option<&ContactInfo> { + match self { + CrdsValue::ContactInfo(contact_info) => Some(contact_info), + _ => None, + } + } + pub fn leader_id(&self) -> Option<&LeaderId> { + match self { + CrdsValue::LeaderId(leader_id) => Some(leader_id), + _ => None, + } + } + pub fn vote(&self) -> Option<&Vote> { + match self { + CrdsValue::Vote(vote) => Some(vote), + _ => None, + } + } + /// Return all the possible labels for a record identified by Pubkey. + pub fn record_labels(key: Pubkey) -> [CrdsValueLabel; 3] { + [ + CrdsValueLabel::ContactInfo(key), + CrdsValueLabel::Vote(key), + CrdsValueLabel::LeaderId(key), + ] + } +} +#[cfg(test)] +mod test { + use super::*; + use contact_info::ContactInfo; + use system_transaction::test_tx; + + #[test] + fn test_labels() { + let mut hits = [false; 3]; + // this method should cover all the possible labels + for v in &CrdsValue::record_labels(Pubkey::default()) { + match v { + CrdsValueLabel::ContactInfo(_) => hits[0] = true, + CrdsValueLabel::Vote(_) => hits[1] = true, + CrdsValueLabel::LeaderId(_) => hits[2] = true, + } + } + assert!(hits.iter().all(|x| *x)); + } + #[test] + fn test_keys_and_values() { + let v = CrdsValue::LeaderId(LeaderId::default()); + let key = v.clone().leader_id().unwrap().id; + assert_eq!(v.wallclock(), 0); + assert_eq!(v.label(), CrdsValueLabel::LeaderId(key)); + + let v = CrdsValue::ContactInfo(ContactInfo::default()); + assert_eq!(v.wallclock(), 0); + let key = v.clone().contact_info().unwrap().id; + assert_eq!(v.label(), CrdsValueLabel::ContactInfo(key)); + + let v = CrdsValue::Vote(Vote { + transaction: test_tx(), + height: 1, + wallclock: 0, + }); + assert_eq!(v.wallclock(), 0); + let key = v.clone().vote().unwrap().transaction.account_keys[0]; + assert_eq!(v.label(), CrdsValueLabel::Vote(key)); + } + +} diff --git a/src/drone.rs b/src/drone.rs index 2af670adc0..5b895845bc 100644 --- a/src/drone.rs +++ b/src/drone.rs @@ -109,11 +109,7 @@ impl Drone { let leader = poll_gossip_for_leader(self.network_addr, Some(10)) .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?; - let mut client = ThinClient::new( - leader.contact_info.rpc, - leader.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader.rpc, leader.tpu, transactions_socket); let last_id = client.get_last_id(); let mut tx = match req { @@ -343,22 +339,12 @@ mod tests { let mut addr: SocketAddr = "0.0.0.0:9900".parse().expect("bind to drone socket"); addr.set_ip(get_ip_addr().expect("drone get_ip_addr")); - let mut drone = Drone::new( - alice.keypair(), - addr, - leader_data.contact_info.ncp, - None, - Some(150_000), - ); + let mut drone = Drone::new(alice.keypair(), addr, leader_data.ncp, None, Some(150_000)); let transactions_socket = UdpSocket::bind("0.0.0.0:0").expect("drone bind to transactions socket"); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let bob_req = DroneRequest::GetAirdrop { airdrop_request_amount: 50, @@ -387,11 +373,7 @@ mod tests { let transactions_socket = UdpSocket::bind("0.0.0.0:0").expect("drone bind to transactions socket"); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let carlos_req = DroneRequest::GetAirdrop { airdrop_request_amount: 5_000_000, diff --git a/src/fullnode.rs b/src/fullnode.rs index 5d733a9056..20a17e9632 100644 --- a/src/fullnode.rs +++ b/src/fullnode.rs @@ -16,6 +16,7 @@ use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; use std::thread::Result; +use timing::timestamp; use tpu::{Tpu, TpuReturnType}; use tvu::{Tvu, TvuReturnType}; use untrusted::Input; @@ -148,9 +149,9 @@ impl Fullnode { info!( "starting... local gossip address: {} (advertising {})", - local_gossip_addr, node.info.contact_info.ncp + local_gossip_addr, node.info.ncp ); - let mut rpc_addr = node.info.contact_info.rpc; + let mut rpc_addr = node.info.rpc; if let Some(port) = rpc_port { rpc_addr.set_port(port); } @@ -198,16 +199,16 @@ impl Fullnode { sigverify_disabled: bool, rpc_port: Option, ) -> Self { - let mut rpc_addr = node.info.contact_info.rpc; - let mut rpc_pubsub_addr = node.info.contact_info.rpc_pubsub; + let mut rpc_addr = node.info.rpc; + let mut rpc_pubsub_addr = node.info.rpc_pubsub; // Use custom RPC port, if provided (`Some(port)`) // RPC port may be any valid open port on the node // If rpc_port == `None`, node will listen on the ports set in NodeInfo if let Some(port) = rpc_port { rpc_addr.set_port(port); - node.info.contact_info.rpc = rpc_addr; + node.info.rpc = rpc_addr; rpc_pubsub_addr.set_port(port + 1); - node.info.contact_info.rpc_pubsub = rpc_pubsub_addr; + node.info.rpc_pubsub = rpc_pubsub_addr; } let exit = Arc::new(AtomicBool::new(false)); @@ -215,6 +216,7 @@ impl Fullnode { let window = new_window(32 * 1024); let shared_window = Arc::new(RwLock::new(window)); + node.info.wallclock = timestamp(); let cluster_info = Arc::new(RwLock::new( ClusterInfo::new(node.info).expect("ClusterInfo::new"), )); @@ -233,7 +235,10 @@ impl Fullnode { // Insert the bootstrap leader info, should only be None if this node // is the bootstrap leader if let Some(bootstrap_leader_info) = bootstrap_leader_info_option { - cluster_info.write().unwrap().insert(bootstrap_leader_info); + cluster_info + .write() + .unwrap() + .insert_info(bootstrap_leader_info.clone()); } // Get the scheduled leader @@ -738,7 +743,7 @@ mod tests { &bootstrap_leader_ledger_path, Arc::new(bootstrap_leader_keypair), Arc::new(Keypair::new()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -829,7 +834,7 @@ mod tests { &bootstrap_leader_ledger_path, bootstrap_leader_keypair, leader_vote_account_keypair, - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -848,7 +853,7 @@ mod tests { &bootstrap_leader_ledger_path, Arc::new(validator_keypair), Arc::new(validator_vote_account_keypair), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -869,7 +874,7 @@ mod tests { let leader_keypair = Keypair::new(); let leader_node = Node::new_localhost_with_pubkey(leader_keypair.pubkey()); let leader_id = leader_node.info.id; - let leader_ncp = leader_node.info.contact_info.ncp; + let leader_ncp = leader_node.info.ncp; // Create validator identity let num_ending_ticks = 1; @@ -954,7 +959,7 @@ mod tests { // "extra_blobs" number of blobs to make sure the window stops in the right place. let extra_blobs = cmp::max(leader_rotation_interval / 3, 1); let total_blobs_to_send = bootstrap_height + extra_blobs; - let tvu_address = &validator_info.contact_info.tvu; + let tvu_address = &validator_info.tvu; let msgs = make_consecutive_blobs( leader_id, total_blobs_to_send, diff --git a/src/lib.rs b/src/lib.rs index 2b7f4459e2..2d920b8c3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ pub mod counter; pub mod bank; pub mod banking_stage; pub mod blob_fetch_stage; +pub mod bloom; pub mod bpf_loader; pub mod broadcast_stage; pub mod budget_expr; @@ -21,11 +22,18 @@ pub mod budget_transaction; pub mod chacha; #[cfg(all(feature = "chacha", feature = "cuda"))] pub mod chacha_cuda; -pub mod choose_gossip_peer_strategy; pub mod client; +pub mod crds; +pub mod crds_gossip; +pub mod crds_gossip_error; +pub mod crds_gossip_pull; +pub mod crds_gossip_push; +pub mod crds_traits_impls; +pub mod crds_value; #[macro_use] -pub mod cluster_info; +pub mod contact_info; pub mod budget_program; +pub mod cluster_info; pub mod compute_leader_finality_service; pub mod drone; pub mod entry; @@ -82,6 +90,7 @@ pub mod window; pub mod window_service; extern crate bincode; extern crate bs58; +extern crate bv; extern crate byteorder; extern crate bytes; extern crate chrono; @@ -93,6 +102,7 @@ extern crate generic_array; #[cfg(any(feature = "chacha", feature = "cuda"))] #[macro_use] extern crate hex_literal; +extern crate indexmap; extern crate ipnetwork; extern crate itertools; extern crate libc; diff --git a/src/ncp.rs b/src/ncp.rs index 4546454a59..7054ba3c74 100644 --- a/src/ncp.rs +++ b/src/ncp.rs @@ -26,8 +26,8 @@ impl Ncp { let (request_sender, request_receiver) = channel(); let gossip_socket = Arc::new(gossip_socket); trace!( - "Ncp: id: {:?}, listening on: {:?}", - &cluster_info.read().unwrap().id.as_ref()[..4], + "Ncp: id: {}, listening on: {:?}", + &cluster_info.read().unwrap().my_data().id, gossip_socket.local_addr().unwrap() ); let t_receiver = diff --git a/src/replicator.rs b/src/replicator.rs index 93bd28c9a0..d007a9ae2a 100644 --- a/src/replicator.rs +++ b/src/replicator.rs @@ -89,9 +89,9 @@ impl Replicator { let leader_info = network_addr.map(|i| NodeInfo::new_entry_point(&i)); let leader_pubkey; - if let Some(leader_info) = leader_info.as_ref() { + if let Some(leader_info) = leader_info { leader_pubkey = leader_info.id; - cluster_info.write().unwrap().insert(leader_info); + cluster_info.write().unwrap().insert_info(leader_info); } else { panic!("No leader info!"); } diff --git a/src/rpc.rs b/src/rpc.rs index 4ddc599f4b..42c5fee155 100644 --- a/src/rpc.rs +++ b/src/rpc.rs @@ -297,7 +297,7 @@ impl JsonRpcRequestProcessor { fn get_leader_addr(cluster_info: &Arc>) -> Result { if let Some(leader_data) = cluster_info.read().unwrap().leader_data() { - Ok(leader_data.contact_info.tpu) + Ok(leader_data.tpu) } else { Err(Error { code: ErrorCode::InternalError, @@ -368,11 +368,9 @@ mod tests { bank.process_transaction(&tx).expect("process transaction"); let request_processor = JsonRpcRequestProcessor::new(Arc::new(bank)); - let cluster_info = Arc::new(RwLock::new( - ClusterInfo::new(NodeInfo::new_unspecified()).unwrap(), - )); + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new(NodeInfo::default()).unwrap())); let leader = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - cluster_info.write().unwrap().insert(&leader); + cluster_info.write().unwrap().insert_info(leader.clone()); cluster_info.write().unwrap().set_leader(leader.id); let rpc_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0); let exit = Arc::new(AtomicBool::new(false)); @@ -394,9 +392,7 @@ mod tests { fn test_rpc_new() { let alice = Mint::new(10_000); let bank = Bank::new(&alice); - let cluster_info = Arc::new(RwLock::new( - ClusterInfo::new(NodeInfo::new_unspecified()).unwrap(), - )); + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new(NodeInfo::default()).unwrap())); let rpc_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 24680); let rpc_service = JsonRpcService::new(&Arc::new(bank), &cluster_info, rpc_addr); let thread = rpc_service.thread_hdl.thread(); @@ -645,7 +641,7 @@ mod tests { "method": "sendTransaction", "params": json!(vec![serial_tx]) }); - let rpc_addr = leader_data.contact_info.rpc; + let rpc_addr = leader_data.rpc; let rpc_string = format!("http://{}", rpc_addr.to_string()); let mut response = client .post(&rpc_string) @@ -689,9 +685,7 @@ mod tests { io.extend_with(rpc.to_delegate()); let meta = Meta { request_processor: JsonRpcRequestProcessor::new(Arc::new(bank)), - cluster_info: Arc::new(RwLock::new( - ClusterInfo::new(NodeInfo::new_unspecified()).unwrap(), - )), + cluster_info: Arc::new(RwLock::new(ClusterInfo::new(NodeInfo::default()).unwrap())), rpc_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0), exit: Arc::new(AtomicBool::new(false)), }; @@ -710,9 +704,7 @@ mod tests { #[test] fn test_rpc_get_leader_addr() { - let cluster_info = Arc::new(RwLock::new( - ClusterInfo::new(NodeInfo::new_unspecified()).unwrap(), - )); + let cluster_info = Arc::new(RwLock::new(ClusterInfo::new(NodeInfo::default()).unwrap())); assert_eq!( get_leader_addr(&cluster_info), Err(Error { @@ -722,7 +714,7 @@ mod tests { }) ); let leader = NodeInfo::new_with_socketaddr(&socketaddr!("127.0.0.1:1234")); - cluster_info.write().unwrap().insert(&leader); + cluster_info.write().unwrap().insert_info(leader.clone()); cluster_info.write().unwrap().set_leader(leader.id); assert_eq!( get_leader_addr(&cluster_info), diff --git a/src/thin_client.rs b/src/thin_client.rs index ee903b80ee..8128181333 100644 --- a/src/thin_client.rs +++ b/src/thin_client.rs @@ -359,7 +359,10 @@ pub fn poll_gossip_for_leader(leader_ncp: SocketAddr, timeout: Option) -> R ); let leader_entry_point = NodeInfo::new_entry_point(&leader_ncp); - cluster_info.write().unwrap().insert(&leader_entry_point); + cluster_info + .write() + .unwrap() + .insert_info(leader_entry_point); sleep(Duration::from_millis(100)); @@ -475,11 +478,7 @@ mod tests { let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let transaction_count = client.transaction_count(); assert_eq!(transaction_count, 0); let finality = client.get_finality(); @@ -532,11 +531,7 @@ mod tests { sleep(Duration::from_millis(300)); let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let last_id = client.get_last_id(); let tx = Transaction::system_new(&alice.keypair(), bob_pubkey, 500, last_id); @@ -593,11 +588,7 @@ mod tests { sleep(Duration::from_millis(300)); let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let last_id = client.get_last_id(); let signature = client .transfer(500, &alice.keypair(), bob_pubkey, &last_id) @@ -642,11 +633,7 @@ mod tests { sleep(Duration::from_millis(300)); let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); // Create the validator account, transfer some tokens to that account let validator_keypair = Keypair::new(); @@ -744,11 +731,7 @@ mod tests { sleep(Duration::from_millis(900)); let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - let mut client = ThinClient::new( - leader_data.contact_info.rpc, - leader_data.contact_info.tpu, - transactions_socket, - ); + let mut client = ThinClient::new(leader_data.rpc, leader_data.tpu, transactions_socket); let last_id = client.get_last_id(); // give bob 500 tokens diff --git a/src/tvu.rs b/src/tvu.rs index 06444574ea..bf111db791 100644 --- a/src/tvu.rs +++ b/src/tvu.rs @@ -216,7 +216,7 @@ pub mod tests { //start cluster_info2 let mut cluster_info2 = ClusterInfo::new(target2.info.clone()).expect("ClusterInfo::new"); - cluster_info2.insert(&leader.info); + cluster_info2.insert_info(leader.info.clone()); cluster_info2.set_leader(leader.info.id); let leader_id = leader.info.id; let cref2 = Arc::new(RwLock::new(cluster_info2)); @@ -245,7 +245,7 @@ pub mod tests { let starting_balance = 10_000; let mint = Mint::new(starting_balance); - let replicate_addr = target1.info.contact_info.tvu; + let replicate_addr = target1.info.tvu; let leader_scheduler = Arc::new(RwLock::new(LeaderScheduler::from_bootstrap_leader( leader_id, ))); @@ -255,7 +255,7 @@ pub mod tests { //start cluster_info1 let mut cluster_info1 = ClusterInfo::new(target1.info.clone()).expect("ClusterInfo::new"); - cluster_info1.insert(&leader.info); + cluster_info1.insert_info(leader.info.clone()); cluster_info1.set_leader(leader.info.id); let cref1 = Arc::new(RwLock::new(cluster_info1)); let dr_1 = new_ncp(cref1.clone(), target1.sockets.gossip, exit.clone()); diff --git a/src/vote_stage.rs b/src/vote_stage.rs index 3b73b3211d..bd65723b1b 100644 --- a/src/vote_stage.rs +++ b/src/vote_stage.rs @@ -34,7 +34,7 @@ pub fn create_new_signed_vote_blob( let shared_blob = SharedBlob::default(); let tick_height = bank.tick_height(); - let leader_tpu = get_leader_tpu(bank, cluster_info)?; + let leader_tpu = get_leader_tpu(&bank, cluster_info)?; //TODO: doesn't seem like there is a synchronous call to get height and id debug!("voting on {:?}", &last_id.as_ref()[..8]); let vote = Vote { tick_height }; @@ -58,11 +58,7 @@ fn get_leader_tpu(bank: &Bank, cluster_info: &Arc>) -> Resul }; let rcluster_info = cluster_info.read().unwrap(); - let leader_tpu = rcluster_info - .table - .get(&leader_id) - .map(|leader| leader.contact_info.tpu); - + let leader_tpu = rcluster_info.lookup(leader_id).map(|leader| leader.tpu); if let Some(leader_tpu) = leader_tpu { Ok(leader_tpu) } else { diff --git a/src/wallet.rs b/src/wallet.rs index e17faa4c57..2d9cd93b61 100644 --- a/src/wallet.rs +++ b/src/wallet.rs @@ -317,9 +317,9 @@ pub fn process_command(config: &WalletConfig) -> Result) -> (Arc>, Ncp, UdpSocket let c = Arc::new(RwLock::new(cluster_info)); let w = Arc::new(RwLock::new(vec![])); let d = Ncp::new(&c.clone(), w, None, tn.sockets.gossip, exit); + let _ = c.read().unwrap().my_data(); (c, d, tn.sockets.replicate.pop().unwrap()) } @@ -29,38 +31,31 @@ fn test_node(exit: Arc) -> (Arc>, Ncp, UdpSocket /// Run until every node in the network has a full NodeInfo set. /// Check that nodes stop sending updates after all the NodeInfo has been shared. /// tests that actually use this function are below -fn run_gossip_topo(topo: F) +fn run_gossip_topo(num: usize, topo: F) where F: Fn(&Vec<(Arc>, Ncp, UdpSocket)>) -> (), { - let num: usize = 5; let exit = Arc::new(AtomicBool::new(false)); let listen: Vec<_> = (0..num).map(|_| test_node(exit.clone())).collect(); topo(&listen); let mut done = true; for i in 0..(num * 32) { - done = false; - trace!("round {}", i); - for (c, _, _) in &listen { - if num == c.read().unwrap().convergence() as usize { - done = true; - break; - } - } - //at least 1 node converged - if done == true { + done = true; + let total: usize = listen + .iter() + .map(|v| v.0.read().unwrap().ncp_peers().len()) + .sum(); + if (total + num) * 10 > num * num * 9 { + done = true; break; + } else { + trace!("not converged {} {} {}", i, total + num, num * num); } sleep(Duration::new(1, 0)); } exit.store(true, Ordering::Relaxed); - for (c, dr, _) in listen { + for (_, dr, _) in listen { dr.join().unwrap(); - // make it clear what failed - // protocol is to chatty, updates should stop after everyone receives `num` - assert!(c.read().unwrap().update_index <= num as u64); - // protocol is not chatty enough, everyone should get `num` entries - assert_eq!(c.read().unwrap().table.len(), num); } assert!(done); } @@ -68,37 +63,57 @@ where #[test] fn gossip_ring() -> result::Result<()> { logger::setup(); - run_gossip_topo(|listen| { + run_gossip_topo(50, |listen| { let num = listen.len(); for n in 0..num { let y = n % listen.len(); let x = (n + 1) % listen.len(); let mut xv = listen[x].0.write().unwrap(); let yv = listen[y].0.read().unwrap(); - let mut d = yv.table[&yv.id].clone(); - d.version = 0; - xv.insert(&d); + let mut d = yv.lookup(yv.id()).unwrap().clone(); + d.wallclock = timestamp(); + xv.insert_info(d); } }); Ok(()) } +/// ring a -> b -> c -> d -> e -> a +#[test] +#[ignore] +fn gossip_ring_large() -> result::Result<()> { + logger::setup(); + run_gossip_topo(600, |listen| { + let num = listen.len(); + for n in 0..num { + let y = n % listen.len(); + let x = (n + 1) % listen.len(); + let mut xv = listen[x].0.write().unwrap(); + let yv = listen[y].0.read().unwrap(); + let mut d = yv.lookup(yv.id()).unwrap().clone(); + d.wallclock = timestamp(); + xv.insert_info(d); + } + }); + + Ok(()) +} /// star a -> (b,c,d,e) #[test] fn gossip_star() { logger::setup(); - run_gossip_topo(|listen| { + run_gossip_topo(50, |listen| { let num = listen.len(); for n in 0..(num - 1) { let x = 0; let y = (n + 1) % listen.len(); let mut xv = listen[x].0.write().unwrap(); let yv = listen[y].0.read().unwrap(); - let mut yd = yv.table[&yv.id].clone(); - yd.version = 0; - xv.insert(&yd); - trace!("star leader {:?}", &xv.id.as_ref()[..4]); + let mut yd = yv.lookup(yv.id()).unwrap().clone(); + yd.wallclock = timestamp(); + xv.insert_info(yd); + trace!("star leader {}", &xv.id()); } }); } @@ -107,22 +122,18 @@ fn gossip_star() { #[test] fn gossip_rstar() { logger::setup(); - run_gossip_topo(|listen| { + run_gossip_topo(50, |listen| { let num = listen.len(); let xd = { let xv = listen[0].0.read().unwrap(); - xv.table[&xv.id].clone() + xv.lookup(xv.id()).unwrap().clone() }; - trace!("rstar leader {:?}", &xd.id.as_ref()[..4]); + trace!("rstar leader {}", xd.id); for n in 0..(num - 1) { let y = (n + 1) % listen.len(); let mut yv = listen[y].0.write().unwrap(); - yv.insert(&xd); - trace!( - "rstar insert {:?} into {:?}", - &xd.id.as_ref()[..4], - &yv.id.as_ref()[..4] - ); + yv.insert_info(xd.clone()); + trace!("rstar insert {} into {}", xd.id, yv.id()); } }); } @@ -140,19 +151,20 @@ pub fn cluster_info_retransmit() -> result::Result<()> { let c1_data = c1.read().unwrap().my_data().clone(); c1.write().unwrap().set_leader(c1_data.id); - c2.write().unwrap().insert(&c1_data); - c3.write().unwrap().insert(&c1_data); + c2.write().unwrap().insert_info(c1_data.clone()); + c3.write().unwrap().insert_info(c1_data.clone()); c2.write().unwrap().set_leader(c1_data.id); c3.write().unwrap().set_leader(c1_data.id); + let num = 3; //wait to converge trace!("waiting to converge:"); let mut done = false; for _ in 0..30 { - done = c1.read().unwrap().table.len() == 3 - && c2.read().unwrap().table.len() == 3 - && c3.read().unwrap().table.len() == 3; + done = c1.read().unwrap().ncp_peers().len() == num - 1 + && c2.read().unwrap().ncp_peers().len() == num - 1 + && c3.read().unwrap().ncp_peers().len() == num - 1; if done { break; } @@ -180,102 +192,3 @@ pub fn cluster_info_retransmit() -> result::Result<()> { Ok(()) } - -#[test] -#[ignore] -fn test_external_liveness_table() { - logger::setup(); - assert!(cfg!(feature = "test")); - let c1_c4_exit = Arc::new(AtomicBool::new(false)); - let c2_c3_exit = Arc::new(AtomicBool::new(false)); - - trace!("c1:"); - let (c1, dr1, _) = test_node(c1_c4_exit.clone()); - trace!("c2:"); - let (c2, dr2, _) = test_node(c2_c3_exit.clone()); - trace!("c3:"); - let (c3, dr3, _) = test_node(c2_c3_exit.clone()); - trace!("c4:"); - let (c4, dr4, _) = test_node(c1_c4_exit.clone()); - - let c1_data = c1.read().unwrap().my_data().clone(); - c1.write().unwrap().set_leader(c1_data.id); - - let c2_id = c2.read().unwrap().id; - let c3_id = c3.read().unwrap().id; - let c4_id = c4.read().unwrap().id; - - // Insert the remote data about c4 - let c2_index_for_c4 = 10; - c2.write().unwrap().remote.insert(c4_id, c2_index_for_c4); - let c3_index_for_c4 = 20; - c3.write().unwrap().remote.insert(c4_id, c3_index_for_c4); - - // Set up the initial network topology - c2.write().unwrap().insert(&c1_data); - c3.write().unwrap().insert(&c1_data); - - c2.write().unwrap().set_leader(c1_data.id); - c3.write().unwrap().set_leader(c1_data.id); - - // Wait to converge - trace!("waiting to converge:"); - let mut done = false; - for _ in 0..30 { - done = c1.read().unwrap().table.len() == 3 - && c2.read().unwrap().table.len() == 3 - && c3.read().unwrap().table.len() == 3; - if done { - break; - } - sleep(Duration::new(1, 0)); - } - assert!(done); - - // Validate c1's external liveness table, then release lock rc1 - { - let rc1 = c1.read().unwrap(); - let el = rc1.get_external_liveness_entry(&c4.read().unwrap().id); - - // Make sure liveness table entry for c4 exists on node c1 - assert!(el.is_some()); - let liveness_map = el.unwrap(); - - // Make sure liveness table entry contains correct result for c2 - let c2_index_result_for_c4 = liveness_map.get(&c2_id); - assert!(c2_index_result_for_c4.is_some()); - assert_eq!(*(c2_index_result_for_c4.unwrap()), c2_index_for_c4); - - // Make sure liveness table entry contains correct result for c3 - let c3_index_result_for_c4 = liveness_map.get(&c3_id); - assert!(c3_index_result_for_c4.is_some()); - assert_eq!(*(c3_index_result_for_c4.unwrap()), c3_index_for_c4); - } - - // Shutdown validators c2 and c3 - c2_c3_exit.store(true, Ordering::Relaxed); - dr2.join().unwrap(); - dr3.join().unwrap(); - - // Allow communication between c1 and c4, make sure that c1's external_liveness table - // entry for c4 gets cleared - c4.write().unwrap().insert(&c1_data); - c4.write().unwrap().set_leader(c1_data.id); - for _ in 0..30 { - done = c1 - .read() - .unwrap() - .get_external_liveness_entry(&c4_id) - .is_none(); - if done { - break; - } - sleep(Duration::new(1, 0)); - } - assert!(done); - - // Shutdown validators c1 and c4 - c1_c4_exit.store(true, Ordering::Relaxed); - dr1.join().unwrap(); - dr4.join().unwrap(); -} diff --git a/tests/multinode.rs b/tests/multinode.rs index 11143525a9..7dbdb3861c 100644 --- a/tests/multinode.rs +++ b/tests/multinode.rs @@ -45,9 +45,9 @@ fn make_spy_node(leader: &NodeInfo) -> (Ncp, Arc>, Pubkey) { let mut spy = Node::new_localhost(); let me = spy.info.id.clone(); let daddr = "0.0.0.0:0".parse().unwrap(); - spy.info.contact_info.tvu = daddr; + spy.info.tvu = daddr; let mut spy_cluster_info = ClusterInfo::new(spy.info).expect("ClusterInfo::new"); - spy_cluster_info.insert(&leader); + spy_cluster_info.insert_info(leader.clone()); spy_cluster_info.set_leader(leader.id); let spy_cluster_info_ref = Arc::new(RwLock::new(spy_cluster_info)); let spy_window = Arc::new(RwLock::new(default_window())); @@ -68,7 +68,7 @@ fn make_listening_node(leader: &NodeInfo) -> (Ncp, Arc>, Nod let new_node_info = new_node.info.clone(); let me = new_node.info.id.clone(); let mut new_node_cluster_info = ClusterInfo::new(new_node_info).expect("ClusterInfo::new"); - new_node_cluster_info.insert(&leader); + new_node_cluster_info.insert_info(leader.clone()); new_node_cluster_info.set_leader(leader.id); let new_node_cluster_info_ref = Arc::new(RwLock::new(new_node_cluster_info)); let new_node_window = Arc::new(RwLock::new(default_window())); @@ -96,8 +96,8 @@ fn converge(leader: &NodeInfo, num_nodes: usize) -> Vec { let mut rv = vec![]; for _ in 0..30 { let num = spy_ref.read().unwrap().convergence(); - let mut v = spy_ref.read().unwrap().get_valid_peers(); - if num >= num_nodes as u64 && v.len() >= num_nodes { + let mut v = spy_ref.read().unwrap().rpc_peers(); + if num >= num_nodes && v.len() >= num_nodes { rv.append(&mut v); converged = true; break; @@ -183,7 +183,7 @@ fn test_multi_node_ledger_window() -> result::Result<()> { &zero_ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -288,7 +288,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> { &ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -326,7 +326,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> { &zero_ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -420,7 +420,7 @@ fn test_multi_node_basic() { &ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -496,7 +496,7 @@ fn test_boot_validator_from_file() -> result::Result<()> { &ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -584,7 +584,7 @@ fn test_leader_restart_validator_start_from_old_ledger() -> result::Result<()> { &stale_ledger_path, keypair, Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), false, LeaderScheduler::from_bootstrap_leader(leader_data.id), None, @@ -715,7 +715,7 @@ fn test_multi_node_dynamic_network() { &ledger_path, Arc::new(keypair), Arc::new(Keypair::new()), - Some(leader_data.contact_info.ncp), + Some(leader_data.ncp), true, LeaderScheduler::from_bootstrap_leader(leader_pubkey), None, @@ -861,7 +861,7 @@ fn test_leader_to_validator_transition() { &leader_ledger_path, leader_keypair, Arc::new(Keypair::new()), - Some(leader_info.contact_info.ncp), + Some(leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -875,10 +875,10 @@ fn test_leader_to_validator_transition() { let mut converged = false; for _ in 0..30 { let num = spy_node.read().unwrap().convergence(); - let mut v: Vec = spy_node.read().unwrap().get_valid_peers(); + let mut v: Vec = spy_node.read().unwrap().rpc_peers(); // There's only one person excluding the spy node (the leader) who should see // two nodes on the network - if num >= 2 as u64 && v.len() >= 1 { + if num >= 2 && v.len() >= 1 { converged = true; break; } @@ -1001,7 +1001,7 @@ fn test_leader_validator_basic() { &validator_ledger_path, validator_keypair, Arc::new(vote_account_keypair), - Some(leader_info.contact_info.ncp), + Some(leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1013,7 +1013,7 @@ fn test_leader_validator_basic() { &leader_ledger_path, leader_keypair, Arc::new(Keypair::new()), - Some(leader_info.contact_info.ncp), + Some(leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1189,7 +1189,7 @@ fn test_dropped_handoff_recovery() { &bootstrap_leader_ledger_path, bootstrap_leader_keypair, Arc::new(Keypair::new()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1212,7 +1212,7 @@ fn test_dropped_handoff_recovery() { &validator_ledger_path, kp, Arc::new(Keypair::new()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1238,7 +1238,7 @@ fn test_dropped_handoff_recovery() { &next_leader_ledger_path, next_leader_keypair, Arc::new(vote_account_keypair), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1355,7 +1355,7 @@ fn test_full_leader_validator_network() { &bootstrap_leader_ledger_path, Arc::new(node_keypairs.pop_front().unwrap()), Arc::new(vote_account_keypairs.pop_front().unwrap()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1382,7 +1382,7 @@ fn test_full_leader_validator_network() { &validator_ledger_path, Arc::new(kp), Arc::new(vote_account_keypairs.pop_front().unwrap()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1559,7 +1559,7 @@ fn test_broadcast_last_tick() { &bootstrap_leader_ledger_path, Arc::new(bootstrap_leader_keypair), Arc::new(Keypair::new()), - Some(bootstrap_leader_info.contact_info.ncp), + Some(bootstrap_leader_info.ncp), false, LeaderScheduler::new(&leader_scheduler_config), None, @@ -1621,12 +1621,8 @@ fn test_broadcast_last_tick() { fn mk_client(leader: &NodeInfo) -> ThinClient { let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap(); - assert!(ClusterInfo::is_valid_address(&leader.contact_info.tpu)); - ThinClient::new( - leader.contact_info.rpc, - leader.contact_info.tpu, - transactions_socket, - ) + assert!(ClusterInfo::is_valid_address(&leader.tpu)); + ThinClient::new(leader.rpc, leader.tpu, transactions_socket) } fn send_tx_and_retry_get_balance(