Fix metrics when leader does not report metrics (#5291)

This commit is contained in:
Justin Starry
2019-07-30 16:18:33 -04:00
committed by GitHub
parent 4a57cd3300
commit 84368697af
8 changed files with 143 additions and 164 deletions

View File

@ -32,9 +32,7 @@ use rand::SeedableRng;
use rand::{thread_rng, Rng}; use rand::{thread_rng, Rng};
use rand_chacha::ChaChaRng; use rand_chacha::ChaChaRng;
use rayon::prelude::*; use rayon::prelude::*;
use solana_metrics::{ use solana_metrics::{datapoint_debug, inc_new_counter_debug, inc_new_counter_error};
datapoint_debug, inc_new_counter_debug, inc_new_counter_error, inc_new_counter_warn,
};
use solana_netutil::{ use solana_netutil::{
bind_in_range, bind_to, find_available_port_in_range, multi_bind_in_range, PortRange, bind_in_range, bind_to, find_available_port_in_range, multi_bind_in_range, PortRange,
}; };
@ -719,9 +717,10 @@ impl ClusterInfo {
last_err?; last_err?;
inc_new_counter_debug!("cluster_info-broadcast-max_idx", blobs_len); inc_new_counter_debug!("cluster_info-broadcast-max_idx", blobs_len);
if broadcast_table_len != 0 { datapoint_info!(
inc_new_counter_warn!("broadcast_service-num_peers", broadcast_table_len + 1); "cluster_info-num_nodes",
} ("count", broadcast_table_len + 1, i64)
);
Ok(()) Ok(())
} }

View File

@ -144,14 +144,25 @@ impl ReplayStage {
if let Some((_, bank, lockouts)) = votable.into_iter().last() { if let Some((_, bank, lockouts)) = votable.into_iter().last() {
subscriptions.notify_subscribers(bank.slot(), &bank_forks); subscriptions.notify_subscribers(bank.slot(), &bank_forks);
if let Some(new_leader) = if let Some(votable_leader) =
leader_schedule_cache.slot_leader_at(bank.slot(), Some(&bank)) leader_schedule_cache.slot_leader_at(bank.slot(), Some(&bank))
{ {
Self::log_leader_change( Self::log_leader_change(
&my_pubkey, &my_pubkey,
bank.slot(), bank.slot(),
&mut current_leader, &mut current_leader,
&new_leader, &votable_leader,
);
}
let next_slot = bank.slot() + 1;
if let Some(new_leader) =
leader_schedule_cache.slot_leader_at(next_slot, Some(&bank))
{
datapoint_info!(
"replay_stage-new_leader",
("slot", next_slot, i64),
("leader", new_leader.to_string(), String),
); );
} }
@ -261,7 +272,7 @@ impl ReplayStage {
assert!(!poh_recorder.lock().unwrap().has_bank()); assert!(!poh_recorder.lock().unwrap().has_bank());
let (reached_leader_tick, grace_ticks, poh_slot, parent_slot) = let (reached_leader_tick, _grace_ticks, poh_slot, parent_slot) =
poh_recorder.lock().unwrap().reached_leader_tick(); poh_recorder.lock().unwrap().reached_leader_tick();
if !reached_leader_tick { if !reached_leader_tick {
@ -303,10 +314,10 @@ impl ReplayStage {
return; return;
} }
datapoint_warn!( datapoint_info!(
"replay_stage-new_leader", "replay_stage-new_leader",
("count", poh_slot, i64), ("slot", poh_slot, i64),
("grace", grace_ticks, i64) ("leader", next_leader.to_string(), String),
); );
let tpu_bank = bank_forks let tpu_bank = bank_forks
@ -335,13 +346,16 @@ impl ReplayStage {
} }
} }
// Returns the replay result and the number of replayed transactions
fn replay_blocktree_into_bank( fn replay_blocktree_into_bank(
bank: &Bank, bank: &Bank,
blocktree: &Blocktree, blocktree: &Blocktree,
progress: &mut HashMap<u64, ForkProgress>, progress: &mut HashMap<u64, ForkProgress>,
) -> Result<()> { ) -> (Result<()>, usize) {
let mut tx_count = 0;
let result = let result =
Self::load_blocktree_entries(bank, blocktree, progress).and_then(|(entries, num)| { Self::load_blocktree_entries(bank, blocktree, progress).and_then(|(entries, num)| {
tx_count += entries.iter().map(|e| e.transactions.len()).sum::<usize>();
Self::replay_entries_into_bank(bank, entries, progress, num) Self::replay_entries_into_bank(bank, entries, progress, num)
}); });
@ -354,7 +368,7 @@ impl ReplayStage {
Self::mark_dead_slot(bank.slot(), blocktree, progress); Self::mark_dead_slot(bank.slot(), blocktree, progress);
} }
result (result, tx_count)
} }
fn mark_dead_slot(slot: u64, blocktree: &Blocktree, progress: &mut HashMap<u64, ForkProgress>) { fn mark_dead_slot(slot: u64, blocktree: &Blocktree, progress: &mut HashMap<u64, ForkProgress>) {
@ -499,6 +513,7 @@ impl ReplayStage {
slot_full_senders: &[Sender<(u64, Pubkey)>], slot_full_senders: &[Sender<(u64, Pubkey)>],
) -> bool { ) -> bool {
let mut did_complete_bank = false; let mut did_complete_bank = false;
let mut tx_count = 0;
let active_banks = bank_forks.read().unwrap().active_banks(); let active_banks = bank_forks.read().unwrap().active_banks();
trace!("active banks {:?}", active_banks); trace!("active banks {:?}", active_banks);
@ -509,15 +524,16 @@ impl ReplayStage {
} }
let bank = bank_forks.read().unwrap().get(*bank_slot).unwrap().clone(); let bank = bank_forks.read().unwrap().get(*bank_slot).unwrap().clone();
if bank.collector_id() != my_pubkey if bank.collector_id() != my_pubkey {
&& Self::is_replay_result_fatal(&Self::replay_blocktree_into_bank( let (replay_result, replay_tx_count) =
&bank, &blocktree, progress, Self::replay_blocktree_into_bank(&bank, &blocktree, progress);
)) tx_count += replay_tx_count;
{ if Self::is_replay_result_fatal(&replay_result) {
trace!("replay_result_fatal slot {}", bank_slot); trace!("replay_result_fatal slot {}", bank_slot);
// If the bank was corrupted, don't try to run the below logic to check if the // If the bank was corrupted, don't try to run the below logic to check if the
// bank is completed // bank is completed
continue; continue;
}
} }
assert_eq!(*bank_slot, bank.slot()); assert_eq!(*bank_slot, bank.slot());
if bank.tick_height() == bank.max_tick_height() { if bank.tick_height() == bank.max_tick_height() {
@ -532,6 +548,7 @@ impl ReplayStage {
); );
} }
} }
inc_new_counter_info!("replay_stage-replay_transactions", tx_count);
did_complete_bank did_complete_bank
} }
@ -989,7 +1006,8 @@ mod test {
progress.insert(bank0.slot(), ForkProgress::new(last_blockhash)); progress.insert(bank0.slot(), ForkProgress::new(last_blockhash));
let blob = blob_to_insert(&last_blockhash); let blob = blob_to_insert(&last_blockhash);
blocktree.insert_data_blobs(&[blob]).unwrap(); blocktree.insert_data_blobs(&[blob]).unwrap();
let res = ReplayStage::replay_blocktree_into_bank(&bank0, &blocktree, &mut progress); let (res, _tx_count) =
ReplayStage::replay_blocktree_into_bank(&bank0, &blocktree, &mut progress);
// Check that the erroring bank was marked as dead in the progress map // Check that the erroring bank was marked as dead in the progress map
assert!(progress assert!(progress

View File

@ -14,6 +14,7 @@ use rand::SeedableRng;
use rand_chacha::ChaChaRng; use rand_chacha::ChaChaRng;
use solana_metrics::{datapoint_info, inc_new_counter_error}; use solana_metrics::{datapoint_info, inc_new_counter_error};
use solana_runtime::epoch_schedule::EpochSchedule; use solana_runtime::epoch_schedule::EpochSchedule;
use std::cmp;
use std::net::UdpSocket; use std::net::UdpSocket;
use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicBool;
use std::sync::mpsc::channel; use std::sync::mpsc::channel;
@ -39,12 +40,13 @@ fn retransmit(
let r_bank = bank_forks.read().unwrap().working_bank(); let r_bank = bank_forks.read().unwrap().working_bank();
let bank_epoch = r_bank.get_stakers_epoch(r_bank.slot()); let bank_epoch = r_bank.get_stakers_epoch(r_bank.slot());
let mut peers_len = 0;
for blob in &blobs { for blob in &blobs {
let (my_index, mut peers) = cluster_info.read().unwrap().shuffle_peers_and_index( let (my_index, mut peers) = cluster_info.read().unwrap().shuffle_peers_and_index(
staking_utils::staked_nodes_at_epoch(&r_bank, bank_epoch).as_ref(), staking_utils::staked_nodes_at_epoch(&r_bank, bank_epoch).as_ref(),
ChaChaRng::from_seed(blob.read().unwrap().seed()), ChaChaRng::from_seed(blob.read().unwrap().seed()),
); );
peers_len = cmp::max(peers_len, peers.len());
peers.remove(my_index); peers.remove(my_index);
let (neighbors, children) = compute_retransmit_peers(DATA_PLANE_FANOUT, my_index, peers); let (neighbors, children) = compute_retransmit_peers(DATA_PLANE_FANOUT, my_index, peers);
@ -58,6 +60,7 @@ fn retransmit(
ClusterInfo::retransmit_to(&cluster_info, &children, blob, leader, sock, true)?; ClusterInfo::retransmit_to(&cluster_info, &children, blob, leader, sock, true)?;
} }
} }
datapoint_info!("cluster_info-num_nodes", ("count", peers_len, i64));
Ok(()) Ok(())
} }

View File

@ -276,7 +276,7 @@ impl Validator {
&exit, &exit,
); );
datapoint_info!("validator-new"); datapoint_info!("validator-new", ("id", id.to_string(), String));
Self { Self {
id, id,
gossip_service, gossip_service,

View File

@ -55,7 +55,7 @@ if channel == 'local':
'multi': False, 'multi': False,
'name': 'hostid', 'name': 'hostid',
'options': [], 'options': [],
'query': 'SELECT DISTINCT(\"host_id\") FROM \"$testnet\".\"autogen\".\"validator-new\" ', 'query': 'SELECT DISTINCT(\"id\") FROM \"$testnet\".\"autogen\".\"validator-new\" ',
'refresh': 2, 'refresh': 2,
'regex': '', 'regex': '',
'sort': 1, 'sort': 1,
@ -138,7 +138,7 @@ else:
'multi': False, 'multi': False,
'name': 'hostid', 'name': 'hostid',
'options': [], 'options': [],
'query': 'SELECT DISTINCT(\"host_id\") FROM \"$testnet\".\"autogen\".\"validator-new\" ', 'query': 'SELECT DISTINCT(\"id\") FROM \"$testnet\".\"autogen\".\"validator-new\" ',
'refresh': 2, 'refresh': 2,
'regex': '', 'regex': '',
'sort': 1, 'sort': 1,

View File

@ -103,7 +103,7 @@
"lineColor": "rgb(31, 120, 193)", "lineColor": "rgb(31, 120, 193)",
"show": false "show": false
}, },
"tableColumn": "last", "tableColumn": "mode",
"targets": [ "targets": [
{ {
"groupBy": [ "groupBy": [
@ -123,7 +123,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT last(\"host_id\") FROM \"$testnet\".\"autogen\".\"replay_stage-new_leader\" WHERE $timeFilter \n", "query": "SELECT MODE(last) FROM ( SELECT last(\"leader\") FROM \"$testnet\".\"autogen\".\"replay_stage-new_leader\" WHERE $timeFilter GROUP BY host_id )\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "table", "resultFormat": "table",
@ -236,7 +236,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT last(\"count\") FROM \"$testnet\".\"autogen\".\"broadcast_service-num_peers\" WHERE $timeFilter GROUP BY time(1s) \n\n", "query": "SELECT LAST(median) FROM ( SELECT median(count) FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND count > 0 GROUP BY time(5s) )\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -325,7 +325,7 @@
], ],
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT count(\"count\") AS \"total\" FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", "query": "SELECT MEDIAN(\"host_count\") AS \"total\" FROM ( SELECT COUNT(\"count\") as host_count FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter GROUP BY time($__interval), host_id ) GROUP BY time($__interval) fill(0)",
"rawQuery": true, "rawQuery": true,
"refId": "B", "refId": "B",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -344,43 +344,6 @@
] ]
], ],
"tags": [] "tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT count(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"validator-vote_sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
} }
], ],
"thresholds": [], "thresholds": [],
@ -502,7 +465,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT ROUND(MEAN(\"sum\")) FROM ( SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"banking_stage-record_transactions\" WHERE $timeFilter GROUP BY time(1s) )\n\n", "query": "SELECT ROUND(MEAN(\"sum\")) FROM ( SELECT MEDIAN(tx_count) AS sum FROM (SELECT SUM(\"count\") AS tx_count FROM \"replay_stage-replay_transactions\" WHERE $timeFilter AND count > 0 GROUP BY time(1s), host_id) GROUP BY time(1s) )\n\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -614,7 +577,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT MAX(\"sum\") FROM ( SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"banking_stage-record_transactions\" WHERE $timeFilter GROUP BY time(1s) )\n\n", "query": "SELECT MAX(\"median_sum\") FROM ( SELECT MEDIAN(tx_count) AS median_sum FROM (SELECT SUM(\"count\") AS tx_count FROM \"bank-process_transactions\" WHERE $timeFilter AND count > 0 GROUP BY time(1s), host_id) GROUP BY time(1s) )\n\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -726,7 +689,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT sum(\"count\") AS \"transactions\" FROM \"$testnet\".\"autogen\".\"banking_stage-record_transactions\" WHERE $timeFilter \n\n", "query": "SELECT MEDIAN(tx_count) AS transactions FROM (SELECT SUM(\"count\") AS tx_count FROM \"bank-process_transactions\" WHERE $timeFilter GROUP BY host_id) WHERE tx_count > 0\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -818,7 +781,7 @@
"lineColor": "rgb(31, 120, 193)", "lineColor": "rgb(31, 120, 193)",
"show": false "show": false
}, },
"tableColumn": "sum", "tableColumn": "median",
"targets": [ "targets": [
{ {
"groupBy": [ "groupBy": [
@ -838,7 +801,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter \n", "query": "SELECT MEDIAN(\"vote_count\") FROM ( SELECT sum(\"count\") as vote_count FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter GROUP BY host_id )\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "table", "resultFormat": "table",
@ -1556,6 +1519,44 @@
], ],
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "select median(\"tx_count\") as \"transactions\" from (select sum(\"count\") / 2 as \"tx_count\" from \"bank-process_transactions\" where $timeFilter AND count > 0 GROUP BY time(2s), host_id) group by time(2s) fill(0)",
"rawQuery": true,
"refId": "E",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") / 2 AS \"transactions\" FROM \"$testnet\".\"autogen\".\"banking_stage-record_transactions\" WHERE $timeFilter GROUP BY time(2s) FILL(0)\n", "query": "SELECT sum(\"count\") / 2 AS \"transactions\" FROM \"$testnet\".\"autogen\".\"banking_stage-record_transactions\" WHERE $timeFilter GROUP BY time(2s) FILL(0)\n",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
@ -1575,6 +1576,44 @@
] ]
], ],
"tags": [] "tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": true,
"orderByTime": "ASC",
"policy": "default",
"query": "select median(\"tx_count\") as \"transactions\" from (select sum(\"count\") / 2 as \"tx_count\" from \"replay_stage-replay_transactions\" where $timeFilter AND count > 0 GROUP BY time(2s), host_id) group by time(2s) fill(0)",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
} }
], ],
"thresholds": [], "thresholds": [],
@ -1655,44 +1694,6 @@
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "cluster_info-vote-count",
"orderByTime": "ASC",
"policy": "autogen",
"query": "SELECT mean(\"total_peers\") as \"total peers\" FROM \"$testnet\".\"autogen\".\"vote_stage-peer_count\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"count"
],
"type": "field"
},
{
"params": [],
"type": "sum"
}
]
],
"tags": []
},
{ {
"groupBy": [ "groupBy": [
{ {
@ -1711,45 +1712,7 @@
"hide": false, "hide": false,
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "default", "policy": "default",
"query": "SELECT mean(\"valid_peers\") as \"valid peers\" FROM \"$testnet\".\"autogen\".\"vote_stage-peer_count\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", "query": "SELECT median(\"count\") AS \"total\" FROM \"$testnet\".\"autogen\".\"cluster_info-num_nodes\" WHERE $timeFilter AND count > 0 GROUP BY time(5s)",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"hide": false,
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT mean(\"count\") AS \"peers\" FROM \"$testnet\".\"autogen\".\"broadcast_service-num_peers\" WHERE $timeFilter GROUP BY time(1s) FILL(0)",
"rawQuery": true, "rawQuery": true,
"refId": "C", "refId": "C",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -7408,7 +7371,7 @@
"measurement": "cluster_info-vote-count", "measurement": "cluster_info-vote-count",
"orderByTime": "ASC", "orderByTime": "ASC",
"policy": "autogen", "policy": "autogen",
"query": "SELECT last(\"count\") FROM \"$testnet\".\"autogen\".\"replay_stage-new_leader\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "query": "SELECT median(\"slot\") FROM \"$testnet\".\"autogen\".\"replay_stage-new_leader\" WHERE $timeFilter GROUP BY time($__interval)",
"rawQuery": true, "rawQuery": true,
"refId": "A", "refId": "A",
"resultFormat": "time_series", "resultFormat": "time_series",
@ -7432,7 +7395,7 @@
"thresholds": [], "thresholds": [],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "Leader Change ($hostid)", "title": "Leader Change",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 0, "sort": 0,
@ -8055,7 +8018,7 @@
"multi": false, "multi": false,
"name": "hostid", "name": "hostid",
"options": [], "options": [],
"query": "SELECT DISTINCT(\"host_id\") FROM \"$testnet\".\"autogen\".\"counter-fullnode-new\" ", "query": "SELECT DISTINCT(\"id\") FROM \"$testnet\".\"autogen\".\"validator-new\" ",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -56,11 +56,6 @@ macro_rules! datapoint {
(@point $name:expr) => { (@point $name:expr) => {
$crate::influxdb::Point::new(&$name) $crate::influxdb::Point::new(&$name)
}; };
($name:expr) => {
if log_enabled!(log::Level::Debug) {
$crate::submit($crate::datapoint!(@point $name), log::Level::Debug);
}
};
($name:expr, $($fields:tt)+) => { ($name:expr, $($fields:tt)+) => {
if log_enabled!(log::Level::Debug) { if log_enabled!(log::Level::Debug) {
$crate::submit($crate::datapoint!(@point $name, $($fields)+), log::Level::Debug); $crate::submit($crate::datapoint!(@point $name, $($fields)+), log::Level::Debug);
@ -245,7 +240,7 @@ impl MetricsAgent {
let extra = influxdb::Point::new("metrics") let extra = influxdb::Point::new("metrics")
.add_timestamp(timing::timestamp() as i64) .add_timestamp(timing::timestamp() as i64)
.add_field("host_id", influxdb::Value::String(HOST_ID.to_string())) .add_tag("host_id", influxdb::Value::String(HOST_ID.to_string()))
.add_field( .add_field(
"points_written", "points_written",
influxdb::Value::Integer(points_written as i64), influxdb::Value::Integer(points_written as i64),
@ -342,7 +337,7 @@ impl MetricsAgent {
} }
pub fn submit(&self, mut point: influxdb::Point, level: log::Level) { pub fn submit(&self, mut point: influxdb::Point, level: log::Level) {
point.add_field("host_id", influxdb::Value::String(HOST_ID.to_string())); point.add_tag("host_id", influxdb::Value::String(HOST_ID.to_string()));
if point.timestamp.is_none() { if point.timestamp.is_none() {
point.timestamp = Some(timing::timestamp() as i64); point.timestamp = Some(timing::timestamp() as i64);
} }
@ -383,7 +378,8 @@ fn get_singleton_agent() -> Arc<Mutex<MetricsAgent>> {
/// Submits a new point from any thread. Note that points are internally queued /// Submits a new point from any thread. Note that points are internally queued
/// and transmitted periodically in batches. /// and transmitted periodically in batches.
pub fn submit(point: influxdb::Point, level: log::Level) { pub fn submit(mut point: influxdb::Point, level: log::Level) {
point.add_tag("host_id", influxdb::Value::String(HOST_ID.to_string()));
let agent_mutex = get_singleton_agent(); let agent_mutex = get_singleton_agent();
let agent = agent_mutex.lock().unwrap(); let agent = agent_mutex.lock().unwrap();
agent.submit(point, level); agent.submit(point, level);
@ -435,6 +431,7 @@ pub fn set_panic_hook(program: &'static str) {
thread::current().name().unwrap_or("?").to_string(), thread::current().name().unwrap_or("?").to_string(),
), ),
) )
.add_tag("host_id", influxdb::Value::String(HOST_ID.to_string()))
// The 'one' field exists to give Kapacitor Alerts a numerical value // The 'one' field exists to give Kapacitor Alerts a numerical value
// to filter on // to filter on
.add_field("one", influxdb::Value::Integer(1)) .add_field("one", influxdb::Value::Integer(1))
@ -452,7 +449,6 @@ pub fn set_panic_hook(program: &'static str) {
None => "?".to_string(), None => "?".to_string(),
}), }),
) )
.add_field("host_id", influxdb::Value::String(HOST_ID.to_string()))
.to_owned(), .to_owned(),
Level::Error, Level::Error,
); );
@ -610,7 +606,6 @@ mod test {
} }
}; };
} }
datapoint!("name");
datapoint!("name", ("field name", "test".to_string(), String)); datapoint!("name", ("field name", "test".to_string(), String));
datapoint!("name", ("field name", 12.34_f64, f64)); datapoint!("name", ("field name", 12.34_f64, f64));
datapoint!("name", ("field name", true, bool)); datapoint!("name", ("field name", true, bool));

View File

@ -962,6 +962,7 @@ impl Bank {
usize, usize,
) { ) {
debug!("processing transactions: {}", txs.len()); debug!("processing transactions: {}", txs.len());
inc_new_counter_info!("bank-process_transactions", txs.len());
let mut error_counters = ErrorCounters::default(); let mut error_counters = ErrorCounters::default();
let mut load_time = Measure::start("accounts_load"); let mut load_time = Measure::start("accounts_load");