Add scalable gossip library (#1546)

* Cluster Replicated Data Store

Separate the data storage and merge strategy from the network IO boundary.
Implement an eager push overlay for transporting recent messages.

Simulation shows fast convergence with 20k nodes.
This commit is contained in:
anatoly yakovenko
2018-11-15 13:23:26 -08:00
committed by GitHub
parent 4a3230904e
commit a41254e18c
31 changed files with 2821 additions and 1698 deletions

View File

@ -45,9 +45,9 @@ fn make_spy_node(leader: &NodeInfo) -> (Ncp, Arc<RwLock<ClusterInfo>>, Pubkey) {
let mut spy = Node::new_localhost();
let me = spy.info.id.clone();
let daddr = "0.0.0.0:0".parse().unwrap();
spy.info.contact_info.tvu = daddr;
spy.info.tvu = daddr;
let mut spy_cluster_info = ClusterInfo::new(spy.info).expect("ClusterInfo::new");
spy_cluster_info.insert(&leader);
spy_cluster_info.insert_info(leader.clone());
spy_cluster_info.set_leader(leader.id);
let spy_cluster_info_ref = Arc::new(RwLock::new(spy_cluster_info));
let spy_window = Arc::new(RwLock::new(default_window()));
@ -68,7 +68,7 @@ fn make_listening_node(leader: &NodeInfo) -> (Ncp, Arc<RwLock<ClusterInfo>>, Nod
let new_node_info = new_node.info.clone();
let me = new_node.info.id.clone();
let mut new_node_cluster_info = ClusterInfo::new(new_node_info).expect("ClusterInfo::new");
new_node_cluster_info.insert(&leader);
new_node_cluster_info.insert_info(leader.clone());
new_node_cluster_info.set_leader(leader.id);
let new_node_cluster_info_ref = Arc::new(RwLock::new(new_node_cluster_info));
let new_node_window = Arc::new(RwLock::new(default_window()));
@ -96,8 +96,8 @@ fn converge(leader: &NodeInfo, num_nodes: usize) -> Vec<NodeInfo> {
let mut rv = vec![];
for _ in 0..30 {
let num = spy_ref.read().unwrap().convergence();
let mut v = spy_ref.read().unwrap().get_valid_peers();
if num >= num_nodes as u64 && v.len() >= num_nodes {
let mut v = spy_ref.read().unwrap().rpc_peers();
if num >= num_nodes && v.len() >= num_nodes {
rv.append(&mut v);
converged = true;
break;
@ -183,7 +183,7 @@ fn test_multi_node_ledger_window() -> result::Result<()> {
&zero_ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -288,7 +288,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
&ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -326,7 +326,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
&zero_ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -420,7 +420,7 @@ fn test_multi_node_basic() {
&ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -496,7 +496,7 @@ fn test_boot_validator_from_file() -> result::Result<()> {
&ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -584,7 +584,7 @@ fn test_leader_restart_validator_start_from_old_ledger() -> result::Result<()> {
&stale_ledger_path,
keypair,
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
false,
LeaderScheduler::from_bootstrap_leader(leader_data.id),
None,
@ -715,7 +715,7 @@ fn test_multi_node_dynamic_network() {
&ledger_path,
Arc::new(keypair),
Arc::new(Keypair::new()),
Some(leader_data.contact_info.ncp),
Some(leader_data.ncp),
true,
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
None,
@ -861,7 +861,7 @@ fn test_leader_to_validator_transition() {
&leader_ledger_path,
leader_keypair,
Arc::new(Keypair::new()),
Some(leader_info.contact_info.ncp),
Some(leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -875,10 +875,10 @@ fn test_leader_to_validator_transition() {
let mut converged = false;
for _ in 0..30 {
let num = spy_node.read().unwrap().convergence();
let mut v: Vec<NodeInfo> = spy_node.read().unwrap().get_valid_peers();
let mut v: Vec<NodeInfo> = spy_node.read().unwrap().rpc_peers();
// There's only one person excluding the spy node (the leader) who should see
// two nodes on the network
if num >= 2 as u64 && v.len() >= 1 {
if num >= 2 && v.len() >= 1 {
converged = true;
break;
}
@ -1001,7 +1001,7 @@ fn test_leader_validator_basic() {
&validator_ledger_path,
validator_keypair,
Arc::new(vote_account_keypair),
Some(leader_info.contact_info.ncp),
Some(leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1013,7 +1013,7 @@ fn test_leader_validator_basic() {
&leader_ledger_path,
leader_keypair,
Arc::new(Keypair::new()),
Some(leader_info.contact_info.ncp),
Some(leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1189,7 +1189,7 @@ fn test_dropped_handoff_recovery() {
&bootstrap_leader_ledger_path,
bootstrap_leader_keypair,
Arc::new(Keypair::new()),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1212,7 +1212,7 @@ fn test_dropped_handoff_recovery() {
&validator_ledger_path,
kp,
Arc::new(Keypair::new()),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1238,7 +1238,7 @@ fn test_dropped_handoff_recovery() {
&next_leader_ledger_path,
next_leader_keypair,
Arc::new(vote_account_keypair),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1355,7 +1355,7 @@ fn test_full_leader_validator_network() {
&bootstrap_leader_ledger_path,
Arc::new(node_keypairs.pop_front().unwrap()),
Arc::new(vote_account_keypairs.pop_front().unwrap()),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1382,7 +1382,7 @@ fn test_full_leader_validator_network() {
&validator_ledger_path,
Arc::new(kp),
Arc::new(vote_account_keypairs.pop_front().unwrap()),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1559,7 +1559,7 @@ fn test_broadcast_last_tick() {
&bootstrap_leader_ledger_path,
Arc::new(bootstrap_leader_keypair),
Arc::new(Keypair::new()),
Some(bootstrap_leader_info.contact_info.ncp),
Some(bootstrap_leader_info.ncp),
false,
LeaderScheduler::new(&leader_scheduler_config),
None,
@ -1621,12 +1621,8 @@ fn test_broadcast_last_tick() {
fn mk_client(leader: &NodeInfo) -> ThinClient {
let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
assert!(ClusterInfo::is_valid_address(&leader.contact_info.tpu));
ThinClient::new(
leader.contact_info.rpc,
leader.contact_info.tpu,
transactions_socket,
)
assert!(ClusterInfo::is_valid_address(&leader.tpu));
ThinClient::new(leader.rpc, leader.tpu, transactions_socket)
}
fn send_tx_and_retry_get_balance(