Add scalable gossip library (#1546)
* Cluster Replicated Data Store Separate the data storage and merge strategy from the network IO boundary. Implement an eager push overlay for transporting recent messages. Simulation shows fast convergence with 20k nodes.
This commit is contained in:
committed by
GitHub
parent
4a3230904e
commit
a41254e18c
@ -10,6 +10,7 @@ use solana::ncp::Ncp;
|
||||
use solana::packet::{Blob, SharedBlob};
|
||||
use solana::result;
|
||||
use solana::service::Service;
|
||||
use solana::timing::timestamp;
|
||||
use std::net::UdpSocket;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
@ -22,6 +23,7 @@ fn test_node(exit: Arc<AtomicBool>) -> (Arc<RwLock<ClusterInfo>>, Ncp, UdpSocket
|
||||
let c = Arc::new(RwLock::new(cluster_info));
|
||||
let w = Arc::new(RwLock::new(vec![]));
|
||||
let d = Ncp::new(&c.clone(), w, None, tn.sockets.gossip, exit);
|
||||
let _ = c.read().unwrap().my_data();
|
||||
(c, d, tn.sockets.replicate.pop().unwrap())
|
||||
}
|
||||
|
||||
@ -29,38 +31,31 @@ fn test_node(exit: Arc<AtomicBool>) -> (Arc<RwLock<ClusterInfo>>, Ncp, UdpSocket
|
||||
/// Run until every node in the network has a full NodeInfo set.
|
||||
/// Check that nodes stop sending updates after all the NodeInfo has been shared.
|
||||
/// tests that actually use this function are below
|
||||
fn run_gossip_topo<F>(topo: F)
|
||||
fn run_gossip_topo<F>(num: usize, topo: F)
|
||||
where
|
||||
F: Fn(&Vec<(Arc<RwLock<ClusterInfo>>, Ncp, UdpSocket)>) -> (),
|
||||
{
|
||||
let num: usize = 5;
|
||||
let exit = Arc::new(AtomicBool::new(false));
|
||||
let listen: Vec<_> = (0..num).map(|_| test_node(exit.clone())).collect();
|
||||
topo(&listen);
|
||||
let mut done = true;
|
||||
for i in 0..(num * 32) {
|
||||
done = false;
|
||||
trace!("round {}", i);
|
||||
for (c, _, _) in &listen {
|
||||
if num == c.read().unwrap().convergence() as usize {
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
//at least 1 node converged
|
||||
if done == true {
|
||||
done = true;
|
||||
let total: usize = listen
|
||||
.iter()
|
||||
.map(|v| v.0.read().unwrap().ncp_peers().len())
|
||||
.sum();
|
||||
if (total + num) * 10 > num * num * 9 {
|
||||
done = true;
|
||||
break;
|
||||
} else {
|
||||
trace!("not converged {} {} {}", i, total + num, num * num);
|
||||
}
|
||||
sleep(Duration::new(1, 0));
|
||||
}
|
||||
exit.store(true, Ordering::Relaxed);
|
||||
for (c, dr, _) in listen {
|
||||
for (_, dr, _) in listen {
|
||||
dr.join().unwrap();
|
||||
// make it clear what failed
|
||||
// protocol is to chatty, updates should stop after everyone receives `num`
|
||||
assert!(c.read().unwrap().update_index <= num as u64);
|
||||
// protocol is not chatty enough, everyone should get `num` entries
|
||||
assert_eq!(c.read().unwrap().table.len(), num);
|
||||
}
|
||||
assert!(done);
|
||||
}
|
||||
@ -68,37 +63,57 @@ where
|
||||
#[test]
|
||||
fn gossip_ring() -> result::Result<()> {
|
||||
logger::setup();
|
||||
run_gossip_topo(|listen| {
|
||||
run_gossip_topo(50, |listen| {
|
||||
let num = listen.len();
|
||||
for n in 0..num {
|
||||
let y = n % listen.len();
|
||||
let x = (n + 1) % listen.len();
|
||||
let mut xv = listen[x].0.write().unwrap();
|
||||
let yv = listen[y].0.read().unwrap();
|
||||
let mut d = yv.table[&yv.id].clone();
|
||||
d.version = 0;
|
||||
xv.insert(&d);
|
||||
let mut d = yv.lookup(yv.id()).unwrap().clone();
|
||||
d.wallclock = timestamp();
|
||||
xv.insert_info(d);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// ring a -> b -> c -> d -> e -> a
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn gossip_ring_large() -> result::Result<()> {
|
||||
logger::setup();
|
||||
run_gossip_topo(600, |listen| {
|
||||
let num = listen.len();
|
||||
for n in 0..num {
|
||||
let y = n % listen.len();
|
||||
let x = (n + 1) % listen.len();
|
||||
let mut xv = listen[x].0.write().unwrap();
|
||||
let yv = listen[y].0.read().unwrap();
|
||||
let mut d = yv.lookup(yv.id()).unwrap().clone();
|
||||
d.wallclock = timestamp();
|
||||
xv.insert_info(d);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
/// star a -> (b,c,d,e)
|
||||
#[test]
|
||||
fn gossip_star() {
|
||||
logger::setup();
|
||||
run_gossip_topo(|listen| {
|
||||
run_gossip_topo(50, |listen| {
|
||||
let num = listen.len();
|
||||
for n in 0..(num - 1) {
|
||||
let x = 0;
|
||||
let y = (n + 1) % listen.len();
|
||||
let mut xv = listen[x].0.write().unwrap();
|
||||
let yv = listen[y].0.read().unwrap();
|
||||
let mut yd = yv.table[&yv.id].clone();
|
||||
yd.version = 0;
|
||||
xv.insert(&yd);
|
||||
trace!("star leader {:?}", &xv.id.as_ref()[..4]);
|
||||
let mut yd = yv.lookup(yv.id()).unwrap().clone();
|
||||
yd.wallclock = timestamp();
|
||||
xv.insert_info(yd);
|
||||
trace!("star leader {}", &xv.id());
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -107,22 +122,18 @@ fn gossip_star() {
|
||||
#[test]
|
||||
fn gossip_rstar() {
|
||||
logger::setup();
|
||||
run_gossip_topo(|listen| {
|
||||
run_gossip_topo(50, |listen| {
|
||||
let num = listen.len();
|
||||
let xd = {
|
||||
let xv = listen[0].0.read().unwrap();
|
||||
xv.table[&xv.id].clone()
|
||||
xv.lookup(xv.id()).unwrap().clone()
|
||||
};
|
||||
trace!("rstar leader {:?}", &xd.id.as_ref()[..4]);
|
||||
trace!("rstar leader {}", xd.id);
|
||||
for n in 0..(num - 1) {
|
||||
let y = (n + 1) % listen.len();
|
||||
let mut yv = listen[y].0.write().unwrap();
|
||||
yv.insert(&xd);
|
||||
trace!(
|
||||
"rstar insert {:?} into {:?}",
|
||||
&xd.id.as_ref()[..4],
|
||||
&yv.id.as_ref()[..4]
|
||||
);
|
||||
yv.insert_info(xd.clone());
|
||||
trace!("rstar insert {} into {}", xd.id, yv.id());
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -140,19 +151,20 @@ pub fn cluster_info_retransmit() -> result::Result<()> {
|
||||
let c1_data = c1.read().unwrap().my_data().clone();
|
||||
c1.write().unwrap().set_leader(c1_data.id);
|
||||
|
||||
c2.write().unwrap().insert(&c1_data);
|
||||
c3.write().unwrap().insert(&c1_data);
|
||||
c2.write().unwrap().insert_info(c1_data.clone());
|
||||
c3.write().unwrap().insert_info(c1_data.clone());
|
||||
|
||||
c2.write().unwrap().set_leader(c1_data.id);
|
||||
c3.write().unwrap().set_leader(c1_data.id);
|
||||
let num = 3;
|
||||
|
||||
//wait to converge
|
||||
trace!("waiting to converge:");
|
||||
let mut done = false;
|
||||
for _ in 0..30 {
|
||||
done = c1.read().unwrap().table.len() == 3
|
||||
&& c2.read().unwrap().table.len() == 3
|
||||
&& c3.read().unwrap().table.len() == 3;
|
||||
done = c1.read().unwrap().ncp_peers().len() == num - 1
|
||||
&& c2.read().unwrap().ncp_peers().len() == num - 1
|
||||
&& c3.read().unwrap().ncp_peers().len() == num - 1;
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
@ -180,102 +192,3 @@ pub fn cluster_info_retransmit() -> result::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_external_liveness_table() {
|
||||
logger::setup();
|
||||
assert!(cfg!(feature = "test"));
|
||||
let c1_c4_exit = Arc::new(AtomicBool::new(false));
|
||||
let c2_c3_exit = Arc::new(AtomicBool::new(false));
|
||||
|
||||
trace!("c1:");
|
||||
let (c1, dr1, _) = test_node(c1_c4_exit.clone());
|
||||
trace!("c2:");
|
||||
let (c2, dr2, _) = test_node(c2_c3_exit.clone());
|
||||
trace!("c3:");
|
||||
let (c3, dr3, _) = test_node(c2_c3_exit.clone());
|
||||
trace!("c4:");
|
||||
let (c4, dr4, _) = test_node(c1_c4_exit.clone());
|
||||
|
||||
let c1_data = c1.read().unwrap().my_data().clone();
|
||||
c1.write().unwrap().set_leader(c1_data.id);
|
||||
|
||||
let c2_id = c2.read().unwrap().id;
|
||||
let c3_id = c3.read().unwrap().id;
|
||||
let c4_id = c4.read().unwrap().id;
|
||||
|
||||
// Insert the remote data about c4
|
||||
let c2_index_for_c4 = 10;
|
||||
c2.write().unwrap().remote.insert(c4_id, c2_index_for_c4);
|
||||
let c3_index_for_c4 = 20;
|
||||
c3.write().unwrap().remote.insert(c4_id, c3_index_for_c4);
|
||||
|
||||
// Set up the initial network topology
|
||||
c2.write().unwrap().insert(&c1_data);
|
||||
c3.write().unwrap().insert(&c1_data);
|
||||
|
||||
c2.write().unwrap().set_leader(c1_data.id);
|
||||
c3.write().unwrap().set_leader(c1_data.id);
|
||||
|
||||
// Wait to converge
|
||||
trace!("waiting to converge:");
|
||||
let mut done = false;
|
||||
for _ in 0..30 {
|
||||
done = c1.read().unwrap().table.len() == 3
|
||||
&& c2.read().unwrap().table.len() == 3
|
||||
&& c3.read().unwrap().table.len() == 3;
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
sleep(Duration::new(1, 0));
|
||||
}
|
||||
assert!(done);
|
||||
|
||||
// Validate c1's external liveness table, then release lock rc1
|
||||
{
|
||||
let rc1 = c1.read().unwrap();
|
||||
let el = rc1.get_external_liveness_entry(&c4.read().unwrap().id);
|
||||
|
||||
// Make sure liveness table entry for c4 exists on node c1
|
||||
assert!(el.is_some());
|
||||
let liveness_map = el.unwrap();
|
||||
|
||||
// Make sure liveness table entry contains correct result for c2
|
||||
let c2_index_result_for_c4 = liveness_map.get(&c2_id);
|
||||
assert!(c2_index_result_for_c4.is_some());
|
||||
assert_eq!(*(c2_index_result_for_c4.unwrap()), c2_index_for_c4);
|
||||
|
||||
// Make sure liveness table entry contains correct result for c3
|
||||
let c3_index_result_for_c4 = liveness_map.get(&c3_id);
|
||||
assert!(c3_index_result_for_c4.is_some());
|
||||
assert_eq!(*(c3_index_result_for_c4.unwrap()), c3_index_for_c4);
|
||||
}
|
||||
|
||||
// Shutdown validators c2 and c3
|
||||
c2_c3_exit.store(true, Ordering::Relaxed);
|
||||
dr2.join().unwrap();
|
||||
dr3.join().unwrap();
|
||||
|
||||
// Allow communication between c1 and c4, make sure that c1's external_liveness table
|
||||
// entry for c4 gets cleared
|
||||
c4.write().unwrap().insert(&c1_data);
|
||||
c4.write().unwrap().set_leader(c1_data.id);
|
||||
for _ in 0..30 {
|
||||
done = c1
|
||||
.read()
|
||||
.unwrap()
|
||||
.get_external_liveness_entry(&c4_id)
|
||||
.is_none();
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
sleep(Duration::new(1, 0));
|
||||
}
|
||||
assert!(done);
|
||||
|
||||
// Shutdown validators c1 and c4
|
||||
c1_c4_exit.store(true, Ordering::Relaxed);
|
||||
dr1.join().unwrap();
|
||||
dr4.join().unwrap();
|
||||
}
|
||||
|
@ -45,9 +45,9 @@ fn make_spy_node(leader: &NodeInfo) -> (Ncp, Arc<RwLock<ClusterInfo>>, Pubkey) {
|
||||
let mut spy = Node::new_localhost();
|
||||
let me = spy.info.id.clone();
|
||||
let daddr = "0.0.0.0:0".parse().unwrap();
|
||||
spy.info.contact_info.tvu = daddr;
|
||||
spy.info.tvu = daddr;
|
||||
let mut spy_cluster_info = ClusterInfo::new(spy.info).expect("ClusterInfo::new");
|
||||
spy_cluster_info.insert(&leader);
|
||||
spy_cluster_info.insert_info(leader.clone());
|
||||
spy_cluster_info.set_leader(leader.id);
|
||||
let spy_cluster_info_ref = Arc::new(RwLock::new(spy_cluster_info));
|
||||
let spy_window = Arc::new(RwLock::new(default_window()));
|
||||
@ -68,7 +68,7 @@ fn make_listening_node(leader: &NodeInfo) -> (Ncp, Arc<RwLock<ClusterInfo>>, Nod
|
||||
let new_node_info = new_node.info.clone();
|
||||
let me = new_node.info.id.clone();
|
||||
let mut new_node_cluster_info = ClusterInfo::new(new_node_info).expect("ClusterInfo::new");
|
||||
new_node_cluster_info.insert(&leader);
|
||||
new_node_cluster_info.insert_info(leader.clone());
|
||||
new_node_cluster_info.set_leader(leader.id);
|
||||
let new_node_cluster_info_ref = Arc::new(RwLock::new(new_node_cluster_info));
|
||||
let new_node_window = Arc::new(RwLock::new(default_window()));
|
||||
@ -96,8 +96,8 @@ fn converge(leader: &NodeInfo, num_nodes: usize) -> Vec<NodeInfo> {
|
||||
let mut rv = vec![];
|
||||
for _ in 0..30 {
|
||||
let num = spy_ref.read().unwrap().convergence();
|
||||
let mut v = spy_ref.read().unwrap().get_valid_peers();
|
||||
if num >= num_nodes as u64 && v.len() >= num_nodes {
|
||||
let mut v = spy_ref.read().unwrap().rpc_peers();
|
||||
if num >= num_nodes && v.len() >= num_nodes {
|
||||
rv.append(&mut v);
|
||||
converged = true;
|
||||
break;
|
||||
@ -183,7 +183,7 @@ fn test_multi_node_ledger_window() -> result::Result<()> {
|
||||
&zero_ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -288,7 +288,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
|
||||
&ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -326,7 +326,7 @@ fn test_multi_node_validator_catchup_from_zero() -> result::Result<()> {
|
||||
&zero_ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -420,7 +420,7 @@ fn test_multi_node_basic() {
|
||||
&ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -496,7 +496,7 @@ fn test_boot_validator_from_file() -> result::Result<()> {
|
||||
&ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -584,7 +584,7 @@ fn test_leader_restart_validator_start_from_old_ledger() -> result::Result<()> {
|
||||
&stale_ledger_path,
|
||||
keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
false,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_data.id),
|
||||
None,
|
||||
@ -715,7 +715,7 @@ fn test_multi_node_dynamic_network() {
|
||||
&ledger_path,
|
||||
Arc::new(keypair),
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_data.contact_info.ncp),
|
||||
Some(leader_data.ncp),
|
||||
true,
|
||||
LeaderScheduler::from_bootstrap_leader(leader_pubkey),
|
||||
None,
|
||||
@ -861,7 +861,7 @@ fn test_leader_to_validator_transition() {
|
||||
&leader_ledger_path,
|
||||
leader_keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_info.contact_info.ncp),
|
||||
Some(leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -875,10 +875,10 @@ fn test_leader_to_validator_transition() {
|
||||
let mut converged = false;
|
||||
for _ in 0..30 {
|
||||
let num = spy_node.read().unwrap().convergence();
|
||||
let mut v: Vec<NodeInfo> = spy_node.read().unwrap().get_valid_peers();
|
||||
let mut v: Vec<NodeInfo> = spy_node.read().unwrap().rpc_peers();
|
||||
// There's only one person excluding the spy node (the leader) who should see
|
||||
// two nodes on the network
|
||||
if num >= 2 as u64 && v.len() >= 1 {
|
||||
if num >= 2 && v.len() >= 1 {
|
||||
converged = true;
|
||||
break;
|
||||
}
|
||||
@ -1001,7 +1001,7 @@ fn test_leader_validator_basic() {
|
||||
&validator_ledger_path,
|
||||
validator_keypair,
|
||||
Arc::new(vote_account_keypair),
|
||||
Some(leader_info.contact_info.ncp),
|
||||
Some(leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1013,7 +1013,7 @@ fn test_leader_validator_basic() {
|
||||
&leader_ledger_path,
|
||||
leader_keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(leader_info.contact_info.ncp),
|
||||
Some(leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1189,7 +1189,7 @@ fn test_dropped_handoff_recovery() {
|
||||
&bootstrap_leader_ledger_path,
|
||||
bootstrap_leader_keypair,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1212,7 +1212,7 @@ fn test_dropped_handoff_recovery() {
|
||||
&validator_ledger_path,
|
||||
kp,
|
||||
Arc::new(Keypair::new()),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1238,7 +1238,7 @@ fn test_dropped_handoff_recovery() {
|
||||
&next_leader_ledger_path,
|
||||
next_leader_keypair,
|
||||
Arc::new(vote_account_keypair),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1355,7 +1355,7 @@ fn test_full_leader_validator_network() {
|
||||
&bootstrap_leader_ledger_path,
|
||||
Arc::new(node_keypairs.pop_front().unwrap()),
|
||||
Arc::new(vote_account_keypairs.pop_front().unwrap()),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1382,7 +1382,7 @@ fn test_full_leader_validator_network() {
|
||||
&validator_ledger_path,
|
||||
Arc::new(kp),
|
||||
Arc::new(vote_account_keypairs.pop_front().unwrap()),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1559,7 +1559,7 @@ fn test_broadcast_last_tick() {
|
||||
&bootstrap_leader_ledger_path,
|
||||
Arc::new(bootstrap_leader_keypair),
|
||||
Arc::new(Keypair::new()),
|
||||
Some(bootstrap_leader_info.contact_info.ncp),
|
||||
Some(bootstrap_leader_info.ncp),
|
||||
false,
|
||||
LeaderScheduler::new(&leader_scheduler_config),
|
||||
None,
|
||||
@ -1621,12 +1621,8 @@ fn test_broadcast_last_tick() {
|
||||
|
||||
fn mk_client(leader: &NodeInfo) -> ThinClient {
|
||||
let transactions_socket = UdpSocket::bind("0.0.0.0:0").unwrap();
|
||||
assert!(ClusterInfo::is_valid_address(&leader.contact_info.tpu));
|
||||
ThinClient::new(
|
||||
leader.contact_info.rpc,
|
||||
leader.contact_info.tpu,
|
||||
transactions_socket,
|
||||
)
|
||||
assert!(ClusterInfo::is_valid_address(&leader.tpu));
|
||||
ThinClient::new(leader.rpc, leader.tpu, transactions_socket)
|
||||
}
|
||||
|
||||
fn send_tx_and_retry_get_balance(
|
||||
|
Reference in New Issue
Block a user