samples repair peers using WeightedIndex (#13919)

To output one random sample, weighted_best generates n random numbers:
https://github.com/solana-labs/solana/blob/f751a5d4e/core/src/weighted_shuffle.rs#L38-L63
WeightedIndex does so with only one random number:
https://github.com/rust-random/rand/blob/eb02f0e46/src/distributions/weighted_index.rs#L223-L240
Additionally, if the index is already constructed, it only does a total
of O(log(n)) amount of work; which can be achieved if RepairCache,
caches the weighted index:
https://github.com/solana-labs/solana/blob/f751a5d4e/core/src/serve_repair.rs#L83

Also, the repair-peers code can be reorganized to have fewer redundant
unlock-then-lock code.
This commit is contained in:
behzad nouri
2020-12-03 14:26:07 +00:00
committed by GitHub
parent b4c24bfa42
commit c3048b451d
6 changed files with 82 additions and 59 deletions

View File

@@ -1170,13 +1170,15 @@ impl ClusterInfo {
/// all validators that have a valid tvu port and are on the same `shred_version`.
pub fn tvu_peers(&self) -> Vec<ContactInfo> {
let self_pubkey = self.id();
let self_shred_version = self.my_shred_version();
self.time_gossip_read_lock("tvu_peers", &self.stats.tvu_peers)
.crds
.get_nodes_contact_info()
.filter(|x| {
ContactInfo::is_valid_address(&x.tvu)
&& x.id != self.id()
&& x.shred_version == self.my_shred_version()
.filter(|node| {
node.id != self_pubkey
&& node.shred_version == self_shred_version
&& ContactInfo::is_valid_address(&node.tvu)
})
.cloned()
.collect()
@@ -1200,22 +1202,24 @@ impl ClusterInfo {
/// all tvu peers with valid gossip addrs that likely have the slot being requested
pub fn repair_peers(&self, slot: Slot) -> Vec<ContactInfo> {
let mut time = Measure::start("repair_peers");
let ret = ClusterInfo::tvu_peers(self)
.into_iter()
.filter(|x| {
x.id != self.id()
&& x.shred_version == self.my_shred_version()
&& ContactInfo::is_valid_address(&x.serve_repair)
&& {
self.get_lowest_slot_for_node(&x.id, None, |lowest_slot, _| {
lowest_slot.lowest <= slot
})
.unwrap_or_else(|| /* fallback to legacy behavior */ true)
}
})
.collect();
// self.tvu_peers() already filters on:
// node.id != self.id() &&
// node.shred_verion == self.my_shred_version()
let nodes = {
let gossip = self.gossip.read().unwrap();
self.tvu_peers()
.into_iter()
.filter(|node| {
ContactInfo::is_valid_address(&node.serve_repair)
&& match gossip.crds.get_lowest_slot(node.id) {
None => true, // fallback to legacy behavior
Some(lowest_slot) => lowest_slot.lowest <= slot,
}
})
.collect()
};
self.stats.repair_peers.add_measure(&mut time);
ret
nodes
}
fn is_spy_node(contact_info: &ContactInfo) -> bool {
@@ -1654,7 +1658,7 @@ impl ClusterInfo {
push_messages
.into_iter()
.filter_map(|(pubkey, messages)| {
let peer = gossip.crds.get_contact_info(&pubkey)?;
let peer = gossip.crds.get_contact_info(pubkey)?;
Some((peer.gossip, messages))
})
.collect()
@@ -2351,7 +2355,7 @@ impl ClusterInfo {
let gossip = self.gossip.read().unwrap();
messages
.iter()
.map(|(from, _)| match gossip.crds.get_contact_info(from) {
.map(|(from, _)| match gossip.crds.get_contact_info(*from) {
None => 0,
Some(info) => info.shred_version,
})
@@ -2424,7 +2428,7 @@ impl ClusterInfo {
.into_par_iter()
.with_min_len(256)
.filter_map(|(from, prunes)| {
let peer = gossip.crds.get_contact_info(&from)?;
let peer = gossip.crds.get_contact_info(from)?;
let mut prune_data = PruneData {
pubkey: self_pubkey,
prunes,