retains hash value of outdated responses received from pull requests (#12513) (#12603)

pull_response_fail_inserts has been increasing: https://cdn.discordapp.com/attachments/478692221441409024/759096187587657778/pull_response_fail_insert.png but for outdated values which fail to insert: https://github.com/solana-labs/solana/blob/a5c3fc14b3/core/src/crds_gossip_pull.rs#L332-L344 https://github.com/solana-labs/solana/blob/a5c3fc14b3/core/src/crds.rs#L104-L108 are not recorded anywhere, and so the next pull request may obtain the same redundant payload again, unnecessary taking bandwidth. This commit holds on to the hashes of failed-inserts for a while, similar to purged_values: https://github.com/solana-labs/solana/blob/a5c3fc14b3/core/src/crds_gossip_pull.rs#L380 and filter them out for the next pull request: https://github.com/solana-labs/solana/blob/a5c3fc14b3/core/src/crds_gossip_pull.rs#L204 (cherry picked from commit 1866521df6) Co-authored-by: behzad nouri <behzadnouri@gmail.com>
2020-10-01 01:47:20 +00:00
parent afbdcf3068
commit fbe5a89e74
5 changed files with 112 additions and 64 deletions
--- a/core/src/cluster_info.rs
+++ b/core/src/cluster_info.rs
@@ -1933,16 +1933,20 @@ impl ClusterInfo {
        let filtered_len = crds_values.len();

        let mut pull_stats = ProcessPullStats::default();
-        let (filtered_pulls, filtered_pulls_expired_timeout) = self
+        let (filtered_pulls, filtered_pulls_expired_timeout, failed_inserts) = self
            .time_gossip_read_lock("filter_pull_resp", &self.stats.filter_pull_response)
            .filter_pull_responses(timeouts, crds_values, timestamp(), &mut pull_stats);

-        if !filtered_pulls.is_empty() || !filtered_pulls_expired_timeout.is_empty() {
+        if !filtered_pulls.is_empty()
+            || !filtered_pulls_expired_timeout.is_empty()
+            || !failed_inserts.is_empty()
+        {
            self.time_gossip_write_lock("process_pull_resp", &self.stats.process_pull_response)
                .process_pull_responses(
                    from,
                    filtered_pulls,
                    filtered_pulls_expired_timeout,
+                    failed_inserts,
                    timestamp(),
                    &mut pull_stats,
                );
@@ -2156,9 +2160,13 @@ impl ClusterInfo {

    fn print_reset_stats(&self, last_print: &mut Instant) {
        if last_print.elapsed().as_millis() > 2000 {
-            let (table_size, purged_values_size) = {
+            let (table_size, purged_values_size, failed_inserts_size) = {
                let r_gossip = self.gossip.read().unwrap();
-                (r_gossip.crds.table.len(), r_gossip.pull.purged_values.len())
+                (
+                    r_gossip.crds.table.len(),
+                    r_gossip.pull.purged_values.len(),
+                    r_gossip.pull.failed_inserts.len(),
+                )
            };
            datapoint_info!(
                "cluster_info_stats",
@@ -2185,6 +2193,7 @@ impl ClusterInfo {
                ),
                ("table_size", table_size as i64, i64),
                ("purged_values_size", purged_values_size as i64, i64),
+                ("failed_inserts_size", failed_inserts_size as i64, i64),
            );
            datapoint_info!(
                "cluster_info_stats2",