Metrics for window repair (#2106)

* Metrics for window repair

- Also increase max repair length

* fix vote counters, and add repair window graph

* update per node graphs

* revert max repair length change
This commit is contained in:
Pankaj Garg
2018-12-11 15:43:41 -08:00
committed by GitHub
parent 2238725d1c
commit 9243bc58db
6 changed files with 254 additions and 56 deletions

View File

@ -15,8 +15,8 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 74,
"iteration": 1544477352265,
"id": 79,
"iteration": 1544546712840,
"links": [
{
"asDropdown": true,
@ -458,7 +458,7 @@
"hide": false,
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"counter-cluster_info-vote-count\" WHERE $timeFilter \n",
"query": "SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter \n",
"rawQuery": true,
"refId": "A",
"resultFormat": "table",
@ -530,44 +530,6 @@
"stack": false,
"steppedLine": false,
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-cluster_info-vote-count\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"count"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": []
},
{
"groupBy": [
{
@ -585,7 +547,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-validator-vote_sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT sum(\"count\") AS \"total\" FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
@ -622,9 +584,9 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-vote_stage-leader_sent_vote\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n\n\n",
"query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-validator-vote_sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "C",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
@ -4995,7 +4957,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "UDP Net Stats (validators)",
"title": "UDP Net Stats ($hostid)",
"tooltip": {
"shared": true,
"sort": 0,
@ -5089,7 +5051,7 @@
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT sum(\"count\") AS \"retransmit\" FROM \"$testnet\".\"autogen\".\"retransmit-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT sum(\"count\") AS \"retransmit\" FROM \"$testnet\".\"autogen\".\"retransmit-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@ -5127,7 +5089,7 @@
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT sum(\"count\") AS \"replicate\" FROM \"$testnet\".\"autogen\".\"replicate-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT sum(\"count\") AS \"replicate\" FROM \"$testnet\".\"autogen\".\"replicate-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
@ -5165,7 +5127,7 @@
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT sum(\"count\") AS \"retransmit_q\" FROM \"$testnet\".\"autogen\".\"retransmit-queue\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT sum(\"count\") AS \"retransmit_q\" FROM \"$testnet\".\"autogen\".\"retransmit-queue\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "C",
"resultFormat": "time_series",
@ -5203,7 +5165,7 @@
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT sum(\"count\") AS \"recv_window\" FROM \"$testnet\".\"autogen\".\"recv-window\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT sum(\"count\") AS \"recv_window\" FROM \"$testnet\".\"autogen\".\"recv-window\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
@ -5227,7 +5189,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Channel Pressure (validator)",
"title": "Channel Pressure ($hostid)",
"tooltip": {
"shared": true,
"sort": 0,
@ -5320,7 +5282,7 @@
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT last(\"consumed\") AS \"validator\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"query": "SELECT last(\"consumed\") AS \"validator\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
@ -5382,7 +5344,201 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Leader broadcast vs Validator consume",
"title": "Leader broadcast vs Validator consume ($hostid)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
"cluster-info.repair": "#ba43a9",
"window-service.receive": "#b7dbab",
"window-stage.consumed": "#5195ce"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Solana Metrics (read-only)",
"fill": 1,
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 71
},
"id": 42,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 1,
"points": true,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "counter-cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT last(\"last-recv\") AS \"receive\" FROM \"$testnet\".\"autogen\".\"window-service\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"count"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT last(\"consumed\") AS \"consumed\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT last(\"repair-ix\") AS \"repair\" FROM \"$testnet\".\"autogen\".\"cluster-info\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)",
"rawQuery": true,
"refId": "C",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Receive/Consume/Repair ($hostid)",
"tooltip": {
"shared": true,
"sort": 0,
@ -5425,7 +5581,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 71
"y": 76
},
"id": 40,
"panels": [],
@ -5443,7 +5599,7 @@
"h": 5,
"w": 12,
"x": 0,
"y": 72
"y": 77
},
"id": 41,
"legend": {
@ -5575,6 +5731,25 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"datasource": "Solana Metrics (read-only)",
"hide": 0,
"includeAll": false,
"label": "HostID",
"multi": false,
"name": "hostid",
"options": [],
"query": "SELECT DISTINCT(\"host_id\") FROM \"$testnet\".\"autogen\".\"counter-bank-process_transactions-txs\" ",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
@ -5610,5 +5785,5 @@
"timezone": "",
"title": "Testnet Monitor (edge)",
"uid": "testnet-edge",
"version": 112
}
"version": 113
}