effeciently pack gossip responsens and only respond up to max size. (#1493)

This commit is contained in:
anatoly yakovenko
2018-10-14 06:45:02 -07:00
committed by GitHub
parent d3b4dfe104
commit 6aaa350145
2 changed files with 141 additions and 68 deletions

View File

@ -12,7 +12,7 @@
//! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. //! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes.
//! //!
//! Bank needs to provide an interface for us to query the stake weight //! Bank needs to provide an interface for us to query the stake weight
use bincode::{deserialize, serialize}; use bincode::{deserialize, serialize, serialized_size};
use budget_instruction::Vote; use budget_instruction::Vote;
use choose_gossip_peer_strategy::{ChooseGossipPeerStrategy, ChooseWeightedPeerStrategy}; use choose_gossip_peer_strategy::{ChooseGossipPeerStrategy, ChooseWeightedPeerStrategy};
use counter::Counter; use counter::Counter;
@ -228,6 +228,7 @@ pub struct ClusterInfo {
// TODO These messages should be signed, and go through the gpu pipeline for spam filtering // TODO These messages should be signed, and go through the gpu pipeline for spam filtering
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "cargo-clippy", allow(large_enum_variant))]
enum Protocol { enum Protocol {
/// forward your own latest data structure when requesting an update /// forward your own latest data structure when requesting an update
/// this doesn't update the `remote` update index, but it allows the /// this doesn't update the `remote` update index, but it allows the
@ -235,8 +236,14 @@ enum Protocol {
/// (last update index i saw from you, my replicated data) /// (last update index i saw from you, my replicated data)
RequestUpdates(u64, NodeInfo), RequestUpdates(u64, NodeInfo),
//TODO might need a since? //TODO might need a since?
/// from id, form's last update index, NodeInfo /// * from - from id,
ReceiveUpdates(Pubkey, u64, Vec<NodeInfo>, Vec<(Pubkey, u64)>), /// * max_update_index - from's max update index in the response
/// * nodes - (NodeInfo, remote_last_update) vector
ReceiveUpdates {
from: Pubkey,
max_update_index: u64,
nodes: Vec<(NodeInfo, u64)>,
},
/// ask for a missing index /// ask for a missing index
/// (my replicated data to keep alive, missing window index) /// (my replicated data to keep alive, missing window index)
RequestWindowIndex(NodeInfo, u64), RequestWindowIndex(NodeInfo, u64),
@ -702,17 +709,43 @@ impl ClusterInfo {
1.0 1.0
} }
fn get_updates_since(&self, v: u64) -> (Pubkey, u64, Vec<NodeInfo>) { fn max_updates(max_bytes: usize) -> usize {
//trace!("get updates since {}", v); let unit = (NodeInfo::new_localhost(Default::default()), 0);
let data = self let unit_size = serialized_size(&unit).unwrap();
let msg = Protocol::ReceiveUpdates {
from: Default::default(),
max_update_index: 0,
nodes: vec![unit],
};
let msg_size = serialized_size(&msg).unwrap();
((max_bytes - (msg_size as usize)) / (unit_size as usize)) + 1
}
// Get updated node since v up to a maximum of `max_bytes` updates
fn get_updates_since(&self, v: u64, max: usize) -> (Pubkey, u64, Vec<(NodeInfo, u64)>) {
let nodes: Vec<_> = self
.table .table
.values() .values()
.filter(|x| x.id != Pubkey::default() && self.local[&x.id] > v) .filter(|x| x.id != Pubkey::default() && self.local[&x.id] > v)
.cloned() .cloned()
.collect(); .collect();
let liveness: Vec<u64> = nodes
.iter()
.map(|d| *self.remote.get(&d.id).unwrap_or(&0))
.collect();
let updates: Vec<u64> = nodes.iter().map(|d| self.local[&d.id]).collect();
trace!("{:?}", updates);
let id = self.id; let id = self.id;
let ups = self.update_index; let mut out: Vec<(u64, (NodeInfo, u64))> = updates
(id, ups, data) .into_iter()
.zip(nodes.into_iter().zip(liveness))
.collect();
out.sort_by_key(|k| k.0);
let last_node = std::cmp::max(1, std::cmp::min(out.len(), max)) - 1;
let max_updated_node = out.get(last_node).map(|x| x.0).unwrap_or(0);
let updated_data: Vec<(NodeInfo, u64)> = out.into_iter().take(max).map(|x| x.1).collect();
trace!("get updates since response {} {}", v, updated_data.len());
(id, max_updated_node, updated_data)
} }
pub fn valid_last_ids(&self) -> Vec<Hash> { pub fn valid_last_ids(&self) -> Vec<Hash> {
@ -852,24 +885,19 @@ impl ClusterInfo {
/// * `from` - identity of the sender of the updates /// * `from` - identity of the sender of the updates
/// * `update_index` - the number of updates that `from` has completed and this set of `data` represents /// * `update_index` - the number of updates that `from` has completed and this set of `data` represents
/// * `data` - the update data /// * `data` - the update data
fn apply_updates( fn apply_updates(&mut self, from: Pubkey, update_index: u64, data: &[(NodeInfo, u64)]) {
&mut self,
from: Pubkey,
update_index: u64,
data: &[NodeInfo],
external_liveness: &[(Pubkey, u64)],
) {
trace!("got updates {}", data.len()); trace!("got updates {}", data.len());
// TODO we need to punish/spam resist here // TODO we need to punish/spam resist here
// sigverify the whole update and slash anyone who sends a bad update // sigverify the whole update and slash anyone who sends a bad update
let mut insert_total = 0; let mut insert_total = 0;
for v in data { for v in data {
insert_total += self.insert(&v); insert_total += self.insert(&v.0);
} }
inc_new_counter_info!("cluster_info-update-count", insert_total); inc_new_counter_info!("cluster_info-update-count", insert_total);
for (pubkey, external_remote_index) in external_liveness { for (node, external_remote_index) in data {
let remote_entry = if let Some(v) = self.remote.get(pubkey) { let pubkey = node.id;
let remote_entry = if let Some(v) = self.remote.get(&pubkey) {
*v *v
} else { } else {
0 0
@ -881,7 +909,7 @@ impl ClusterInfo {
let liveness_entry = self let liveness_entry = self
.external_liveness .external_liveness
.entry(*pubkey) .entry(pubkey)
.or_insert_with(HashMap::new); .or_insert_with(HashMap::new);
let peer_index = *liveness_entry.entry(from).or_insert(*external_remote_index); let peer_index = *liveness_entry.entry(from).or_insert(*external_remote_index);
if *external_remote_index > peer_index { if *external_remote_index > peer_index {
@ -1055,20 +1083,8 @@ impl ClusterInfo {
inc_new_counter_info!("cluster_info-window-request-updates-unspec-ncp", 1); inc_new_counter_info!("cluster_info-window-request-updates-unspec-ncp", 1);
from.contact_info.ncp = *from_addr; from.contact_info.ncp = *from_addr;
} }
let max = Self::max_updates(1024 * 64 - 512);
let (from_id, ups, data, liveness) = { let (from_id, ups, data) = me.read().unwrap().get_updates_since(version, max);
let me = me.read().unwrap();
// only lock for these two calls, dont lock during IO `sock.send_to` or `sock.recv_from`
let (from_id, ups, data) = me.get_updates_since(version);
(
from_id,
ups,
data,
me.remote.iter().map(|(k, v)| (*k, *v)).collect(),
)
};
// update entry only after collecting liveness // update entry only after collecting liveness
{ {
@ -1077,10 +1093,10 @@ impl ClusterInfo {
me.update_liveness(from.id); me.update_liveness(from.id);
} }
trace!("get updates since response {} {}", version, data.len());
let len = data.len(); let len = data.len();
trace!("get updates since response {} {}", version, len);
if len < 1 { if data.is_empty() {
let me = me.read().unwrap(); let me = me.read().unwrap();
trace!( trace!(
"no updates me {} ix {} since {}", "no updates me {} ix {} since {}",
@ -1090,7 +1106,11 @@ impl ClusterInfo {
); );
None None
} else { } else {
let rsp = Protocol::ReceiveUpdates(from_id, ups, data, liveness); let rsp = Protocol::ReceiveUpdates {
from: from_id,
max_update_index: ups,
nodes: data,
};
if let Ok(r) = to_blob(rsp, from.contact_info.ncp) { if let Ok(r) = to_blob(rsp, from.contact_info.ncp) {
trace!( trace!(
@ -1107,22 +1127,26 @@ impl ClusterInfo {
} }
} }
} }
Protocol::ReceiveUpdates(from, update_index, data, external_liveness) => { Protocol::ReceiveUpdates {
from,
max_update_index,
nodes,
} => {
let now = Instant::now(); let now = Instant::now();
trace!( trace!(
"ReceivedUpdates from={} update_index={} len={}", "ReceivedUpdates from={} update_index={} len={}",
from, from,
update_index, max_update_index,
data.len() nodes.len()
); );
me.write() me.write()
.expect("'me' write lock in ReceiveUpdates") .expect("'me' write lock in ReceiveUpdates")
.apply_updates(from, update_index, &data, &external_liveness); .apply_updates(from, max_update_index, &nodes);
report_time_spent( report_time_spent(
"ReceiveUpdates", "ReceiveUpdates",
&now.elapsed(), &now.elapsed(),
&format!(" len: {}", data.len()), &format!(" len: {}", nodes.len()),
); );
None None
} }
@ -1361,6 +1385,7 @@ fn report_time_spent(label: &str, time: &Duration, extra: &str) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use bincode::serialize;
use budget_instruction::Vote; use budget_instruction::Vote;
use cluster_info::{ use cluster_info::{
ClusterInfo, ClusterInfoError, Node, NodeInfo, Protocol, FULLNODE_PORT_RANGE, ClusterInfo, ClusterInfoError, Node, NodeInfo, Protocol, FULLNODE_PORT_RANGE,
@ -1465,9 +1490,9 @@ mod tests {
//should be accepted, since the update is for the same address field as the one we know //should be accepted, since the update is for the same address field as the one we know
assert_eq!(cluster_info.table[&d.id].version, 1); assert_eq!(cluster_info.table[&d.id].version, 1);
} }
fn sorted(ls: &Vec<NodeInfo>) -> Vec<NodeInfo> { fn sorted(ls: &Vec<(NodeInfo, u64)>) -> Vec<(NodeInfo, u64)> {
let mut copy: Vec<_> = ls.iter().cloned().collect(); let mut copy: Vec<_> = ls.iter().cloned().collect();
copy.sort_by(|x, y| x.id.cmp(&y.id)); copy.sort_by(|x, y| x.0.id.cmp(&y.0.id));
copy copy
} }
#[test] #[test]
@ -1484,42 +1509,92 @@ mod tests {
assert_eq!(d1.contact_info.tpu, socketaddr!("127.0.0.1:1234")); assert_eq!(d1.contact_info.tpu, socketaddr!("127.0.0.1:1234"));
} }
#[test] #[test]
fn max_updates() {
let size = 1024 * 64 - 512;
let num = ClusterInfo::max_updates(size);
let msg = Protocol::ReceiveUpdates {
from: Default::default(),
max_update_index: 0,
nodes: vec![(NodeInfo::new_unspecified(), 0); num],
};
trace!("{} {} {}", serialize(&msg).unwrap().len(), size, num);
assert!(serialize(&msg).unwrap().len() <= size);
}
#[test]
fn update_test() { fn update_test() {
let d1 = NodeInfo::new_localhost(Keypair::new().pubkey()); let d1 = NodeInfo::new_localhost(Keypair::new().pubkey());
let d2 = NodeInfo::new_localhost(Keypair::new().pubkey()); let d2 = NodeInfo::new_localhost(Keypair::new().pubkey());
let d3 = NodeInfo::new_localhost(Keypair::new().pubkey()); let d3 = NodeInfo::new_localhost(Keypair::new().pubkey());
let mut cluster_info = ClusterInfo::new(d1.clone()).expect("ClusterInfo::new"); let mut cluster_info = ClusterInfo::new(d1.clone()).expect("ClusterInfo::new");
let (key, ix, ups) = cluster_info.get_updates_since(0); let (key, ix, ups) = cluster_info.get_updates_since(0, 1);
assert_eq!(key, d1.id); assert_eq!(key, d1.id);
assert_eq!(ix, 1); assert_eq!(ix, 1);
assert_eq!(ups.len(), 1); assert_eq!(ups.len(), 1);
assert_eq!(sorted(&ups), sorted(&vec![d1.clone()])); assert_eq!(sorted(&ups), sorted(&vec![(d1.clone(), 0)]));
cluster_info.insert(&d2); cluster_info.insert(&d2);
let (key, ix, ups) = cluster_info.get_updates_since(0); let (key, ix, ups) = cluster_info.get_updates_since(0, 2);
assert_eq!(key, d1.id); assert_eq!(key, d1.id);
assert_eq!(ix, 2); assert_eq!(ix, 2);
assert_eq!(ups.len(), 2); assert_eq!(ups.len(), 2);
assert_eq!(sorted(&ups), sorted(&vec![d1.clone(), d2.clone()])); assert_eq!(
sorted(&ups),
sorted(&vec![(d1.clone(), 0), (d2.clone(), 0)])
);
cluster_info.insert(&d3); cluster_info.insert(&d3);
let (key, ix, ups) = cluster_info.get_updates_since(0); let (key, ix, ups) = cluster_info.get_updates_since(0, 3);
assert_eq!(key, d1.id); assert_eq!(key, d1.id);
assert_eq!(ix, 3); assert_eq!(ix, 3);
assert_eq!(ups.len(), 3); assert_eq!(ups.len(), 3);
assert_eq!( assert_eq!(
sorted(&ups), sorted(&ups),
sorted(&vec![d1.clone(), d2.clone(), d3.clone()]) sorted(&vec![(d1.clone(), 0), (d2.clone(), 0), (d3.clone(), 0)])
); );
let mut cluster_info2 = ClusterInfo::new(d2.clone()).expect("ClusterInfo::new"); let mut cluster_info2 = ClusterInfo::new(d2.clone()).expect("ClusterInfo::new");
cluster_info2.apply_updates(key, ix, &ups, &vec![]); cluster_info2.apply_updates(key, ix, &ups);
assert_eq!(cluster_info2.table.values().len(), 3); assert_eq!(cluster_info2.table.values().len(), 3);
assert_eq!( assert_eq!(
sorted(&cluster_info2.table.values().map(|x| x.clone()).collect()), sorted(
sorted(&cluster_info.table.values().map(|x| x.clone()).collect()) &cluster_info2
.table
.values()
.map(|x| (x.clone(), 0))
.collect()
),
sorted(
&cluster_info
.table
.values()
.map(|x| (x.clone(), 0))
.collect()
)
); );
let d4 = NodeInfo::new_entry_point(&socketaddr!("127.0.0.4:1234")); let d4 = NodeInfo::new_entry_point(&socketaddr!("127.0.0.4:1234"));
cluster_info.insert(&d4); cluster_info.insert(&d4);
let (_key, _ix, ups) = cluster_info.get_updates_since(0); let (_key, ix, ups) = cluster_info.get_updates_since(0, 3);
assert_eq!(sorted(&ups), sorted(&vec![d2.clone(), d1, d3])); assert_eq!(
sorted(&ups),
sorted(&vec![(d2.clone(), 0), (d1.clone(), 0), (d3.clone(), 0)])
);
assert_eq!(ix, 3);
let (_key, ix, ups) = cluster_info.get_updates_since(0, 2);
assert_eq!(
sorted(&ups),
sorted(&vec![(d2.clone(), 0), (d1.clone(), 0)])
);
assert_eq!(ix, 2);
let (_key, ix, ups) = cluster_info.get_updates_since(0, 1);
assert_eq!(sorted(&ups), sorted(&vec![(d1.clone(), 0)]));
assert_eq!(ix, 1);
let (_key, ix, ups) = cluster_info.get_updates_since(1, 3);
assert_eq!(ups.len(), 2);
assert_eq!(ix, 3);
assert_eq!(sorted(&ups), sorted(&vec![(d2, 0), (d3, 0)]));
let (_key, ix, ups) = cluster_info.get_updates_since(3, 3);
assert_eq!(ups.len(), 0);
assert_eq!(ix, 0);
} }
#[test] #[test]
fn window_index_request() { fn window_index_request() {

View File

@ -651,30 +651,28 @@ fn test_multi_node_dynamic_network() {
let t1: Vec<_> = (0..num_nodes) let t1: Vec<_> = (0..num_nodes)
.into_iter() .into_iter()
.map(|n| { .map(|n| {
let leader_data = leader_data.clone();
let alice_clone = alice_arc.clone();
Builder::new() Builder::new()
.name("keypair-thread".to_string()) .name("keypair-thread".to_string())
.spawn(move || { .spawn(move || {
info!("Spawned thread {}", n); info!("Spawned thread {}", n);
let keypair = Keypair::new(); Keypair::new()
//send some tokens to the new validators
let bal = retry_send_tx_and_retry_get_balance(
&leader_data,
&alice_clone.read().unwrap(),
&keypair.pubkey(),
Some(500),
);
assert_eq!(bal, Some(500));
info!("sent balance to[{}/{}] {}", n, num_nodes, keypair.pubkey());
keypair
}).unwrap() }).unwrap()
}).collect(); }).collect();
info!("Waiting for keypairs to be created"); info!("Waiting for keypairs to be created");
let keypairs: Vec<_> = t1.into_iter().map(|t| t.join().unwrap()).collect(); let keypairs: Vec<_> = t1.into_iter().map(|t| t.join().unwrap()).collect();
info!("keypairs created"); info!("keypairs created");
keypairs.iter().enumerate().for_each(|(n, keypair)| {
//send some tokens to the new validators
let bal = retry_send_tx_and_retry_get_balance(
&leader_data,
&alice_arc.read().unwrap(),
&keypair.pubkey(),
Some(500),
);
assert_eq!(bal, Some(500));
info!("sent balance to [{}/{}] {}", n, num_nodes, keypair.pubkey());
});
let t2: Vec<_> = keypairs let t2: Vec<_> = keypairs
.into_iter() .into_iter()
.map(|keypair| { .map(|keypair| {