samples repair peers using WeightedIndex (#13919)
To output one random sample, weighted_best generates n random numbers: https://github.com/solana-labs/solana/blob/f751a5d4e/core/src/weighted_shuffle.rs#L38-L63 WeightedIndex does so with only one random number: https://github.com/rust-random/rand/blob/eb02f0e46/src/distributions/weighted_index.rs#L223-L240 Additionally, if the index is already constructed, it only does a total of O(log(n)) amount of work; which can be achieved if RepairCache, caches the weighted index: https://github.com/solana-labs/solana/blob/f751a5d4e/core/src/serve_repair.rs#L83 Also, the repair-peers code can be reorganized to have fewer redundant unlock-then-lock code.
This commit is contained in:
@@ -1170,13 +1170,15 @@ impl ClusterInfo {
|
||||
|
||||
/// all validators that have a valid tvu port and are on the same `shred_version`.
|
||||
pub fn tvu_peers(&self) -> Vec<ContactInfo> {
|
||||
let self_pubkey = self.id();
|
||||
let self_shred_version = self.my_shred_version();
|
||||
self.time_gossip_read_lock("tvu_peers", &self.stats.tvu_peers)
|
||||
.crds
|
||||
.get_nodes_contact_info()
|
||||
.filter(|x| {
|
||||
ContactInfo::is_valid_address(&x.tvu)
|
||||
&& x.id != self.id()
|
||||
&& x.shred_version == self.my_shred_version()
|
||||
.filter(|node| {
|
||||
node.id != self_pubkey
|
||||
&& node.shred_version == self_shred_version
|
||||
&& ContactInfo::is_valid_address(&node.tvu)
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
@@ -1200,22 +1202,24 @@ impl ClusterInfo {
|
||||
/// all tvu peers with valid gossip addrs that likely have the slot being requested
|
||||
pub fn repair_peers(&self, slot: Slot) -> Vec<ContactInfo> {
|
||||
let mut time = Measure::start("repair_peers");
|
||||
let ret = ClusterInfo::tvu_peers(self)
|
||||
.into_iter()
|
||||
.filter(|x| {
|
||||
x.id != self.id()
|
||||
&& x.shred_version == self.my_shred_version()
|
||||
&& ContactInfo::is_valid_address(&x.serve_repair)
|
||||
&& {
|
||||
self.get_lowest_slot_for_node(&x.id, None, |lowest_slot, _| {
|
||||
lowest_slot.lowest <= slot
|
||||
})
|
||||
.unwrap_or_else(|| /* fallback to legacy behavior */ true)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
// self.tvu_peers() already filters on:
|
||||
// node.id != self.id() &&
|
||||
// node.shred_verion == self.my_shred_version()
|
||||
let nodes = {
|
||||
let gossip = self.gossip.read().unwrap();
|
||||
self.tvu_peers()
|
||||
.into_iter()
|
||||
.filter(|node| {
|
||||
ContactInfo::is_valid_address(&node.serve_repair)
|
||||
&& match gossip.crds.get_lowest_slot(node.id) {
|
||||
None => true, // fallback to legacy behavior
|
||||
Some(lowest_slot) => lowest_slot.lowest <= slot,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
self.stats.repair_peers.add_measure(&mut time);
|
||||
ret
|
||||
nodes
|
||||
}
|
||||
|
||||
fn is_spy_node(contact_info: &ContactInfo) -> bool {
|
||||
@@ -1654,7 +1658,7 @@ impl ClusterInfo {
|
||||
push_messages
|
||||
.into_iter()
|
||||
.filter_map(|(pubkey, messages)| {
|
||||
let peer = gossip.crds.get_contact_info(&pubkey)?;
|
||||
let peer = gossip.crds.get_contact_info(pubkey)?;
|
||||
Some((peer.gossip, messages))
|
||||
})
|
||||
.collect()
|
||||
@@ -2351,7 +2355,7 @@ impl ClusterInfo {
|
||||
let gossip = self.gossip.read().unwrap();
|
||||
messages
|
||||
.iter()
|
||||
.map(|(from, _)| match gossip.crds.get_contact_info(from) {
|
||||
.map(|(from, _)| match gossip.crds.get_contact_info(*from) {
|
||||
None => 0,
|
||||
Some(info) => info.shred_version,
|
||||
})
|
||||
@@ -2424,7 +2428,7 @@ impl ClusterInfo {
|
||||
.into_par_iter()
|
||||
.with_min_len(256)
|
||||
.filter_map(|(from, prunes)| {
|
||||
let peer = gossip.crds.get_contact_info(&from)?;
|
||||
let peer = gossip.crds.get_contact_info(from)?;
|
||||
let mut prune_data = PruneData {
|
||||
pubkey: self_pubkey,
|
||||
prunes,
|
||||
|
Reference in New Issue
Block a user