diff --git a/core/src/cluster_info.rs b/core/src/cluster_info.rs index 57a314d0aa..2741fff685 100644 --- a/core/src/cluster_info.rs +++ b/core/src/cluster_info.rs @@ -745,7 +745,11 @@ impl ClusterInfo { let (peers, peers_and_stakes) = self.sorted_tvu_peers_and_stakes(stakes); let broadcast_len = peers_and_stakes.len(); if broadcast_len == 0 { - datapoint_debug!("cluster_info-num_nodes", ("count", 1, i64)); + datapoint_debug!( + "cluster_info-num_nodes", + ("live_count", 1, i64), + ("broadcast_count", 1, i64) + ); return Ok(()); } let mut packets: Vec<_> = shreds @@ -768,7 +772,18 @@ impl ClusterInfo { } } - datapoint_debug!("cluster_info-num_nodes", ("count", broadcast_len + 1, i64)); + let mut num_live_peers = 1i64; + peers.iter().for_each(|p| { + // A peer is considered live if they generated their contact info recently + if timestamp() - p.wallclock <= CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS { + num_live_peers += 1; + } + }); + datapoint_debug!( + "cluster_info-num_nodes", + ("live_count", num_live_peers, i64), + ("broadcast_count", broadcast_len + 1, i64) + ); Ok(()) } diff --git a/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json b/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json index b85987bfe0..f3b30d2f74 100644 --- a/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json +++ b/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json @@ -236,7 +236,7 @@ "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT LAST(median) FROM ( SELECT median(count) FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND count > 0 GROUP BY time(5s) )\n", + "query": "SELECT LAST(median) FROM ( SELECT median(live_count) FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND live_count > 0 GROUP BY time(5s) )\n", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -1713,9 +1713,47 @@ "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT median(\"count\") AS \"total\" FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND count > 0 GROUP BY time(5s)", + "query": "SELECT median(\"broadcast_count\") AS \"broadcast_total\" FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND broadcast_count > 0 GROUP BY time(5s)", "rawQuery": true, - "refId": "C", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT median(\"live_count\") AS \"live_total\" FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND live_count > 0 GROUP BY time(5s)", + "rawQuery": true, + "refId": "B", "resultFormat": "time_series", "select": [ [