Rate limit counter metrics points to one per second (#5496)

* Rate limit counter metrics points to one per second

* Remove old env var

* Test that metrics counter is incrementing

* Fix typo
This commit is contained in:
Justin Starry
2019-08-12 18:15:34 -04:00
committed by GitHub
parent 771d1a78fd
commit 0fde19239b
12 changed files with 209 additions and 163 deletions

View File

@@ -7,6 +7,7 @@ use log::*;
use solana_sdk::hash::hash;
use solana_sdk::timing;
use std::collections::HashMap;
use std::convert::Into;
use std::sync::mpsc::{channel, Receiver, RecvTimeoutError, Sender};
use std::sync::{Arc, Barrier, Mutex, Once};
use std::thread;
@@ -128,10 +129,41 @@ lazy_static! {
};
}
type CounterMap = HashMap<(&'static str, u64), CounterPoint>;
#[derive(Clone, Debug)]
pub struct CounterPoint {
pub name: &'static str,
pub count: i64,
pub timestamp: u64,
}
impl CounterPoint {
#[cfg(test)]
fn new(name: &'static str) -> Self {
CounterPoint {
name,
count: 0,
timestamp: 0,
}
}
}
impl Into<influxdb::Point> for CounterPoint {
fn into(self) -> influxdb::Point {
let mut point = influxdb::Point::new(self.name);
point.add_tag("host_id", influxdb::Value::String(HOST_ID.to_string()));
point.add_field("count", influxdb::Value::Integer(self.count));
point.add_timestamp(self.timestamp as i64);
point
}
}
#[derive(Debug)]
enum MetricsCommand {
Submit(influxdb::Point, log::Level),
Flush(Arc<Barrier>),
Submit(influxdb::Point, log::Level),
SubmitCounter(CounterPoint, log::Level, u64),
}
struct MetricsAgent {
@@ -270,7 +302,7 @@ impl MetricsAgent {
) {
trace!("run: enter");
let mut last_write_time = Instant::now();
let mut points_map = HashMap::<log::Level, (Instant, Vec<Point>)>::new();
let mut points_map = HashMap::<log::Level, (Instant, CounterMap, Vec<Point>)>::new();
let max_points = write_frequency_secs.as_secs() as usize * max_points_per_sec;
loop {
@@ -278,20 +310,38 @@ impl MetricsAgent {
Ok(cmd) => match cmd {
MetricsCommand::Flush(barrier) => {
debug!("metrics_thread: flush");
points_map.retain(|_, (_, points)| {
writer.write(points.to_vec());
points_map.drain().for_each(|(_, (_, counters, points))| {
let counter_points = counters.into_iter().map(|(_, v)| v.into());
let points: Vec<_> = points.into_iter().chain(counter_points).collect();
writer.write(points);
last_write_time = Instant::now();
false
});
barrier.wait();
}
MetricsCommand::Submit(point, level) => {
debug!("run: submit {:?}", point);
let (_, points) = points_map
.entry(level)
.or_insert((last_write_time, Vec::new()));
let (_, _, points) = points_map.entry(level).or_insert((
last_write_time,
HashMap::new(),
Vec::new(),
));
points.push(point);
}
MetricsCommand::SubmitCounter(counter, level, bucket) => {
debug!("run: submit counter {:?}", counter);
let (_, counters, _) = points_map.entry(level).or_insert((
last_write_time,
HashMap::new(),
Vec::new(),
));
let key = (counter.name, bucket);
if let Some(value) = counters.get_mut(&key) {
value.count += counter.count;
} else {
counters.insert(key, counter);
}
}
},
Err(RecvTimeoutError::Timeout) => {
trace!("run: receive timeout");
@@ -315,7 +365,9 @@ impl MetricsAgent {
]
.iter()
.for_each(|x| {
if let Some((last_time, points)) = points_map.remove(x) {
if let Some((last_time, counters, points)) = points_map.remove(x) {
let counter_points = counters.into_iter().map(|(_, v)| v.into());
let points: Vec<_> = points.into_iter().chain(counter_points).collect();
let num_written = Self::write(
&points,
last_time,
@@ -338,7 +390,7 @@ impl MetricsAgent {
pub fn submit(&self, mut point: influxdb::Point, level: log::Level) {
if point.timestamp.is_none() {
point.timestamp = Some(timing::timestamp() as i64);
point.add_timestamp(timing::timestamp() as i64);
}
debug!("Submitting point: {:?}", point);
self.sender
@@ -346,6 +398,13 @@ impl MetricsAgent {
.unwrap();
}
pub fn submit_counter(&self, point: CounterPoint, level: log::Level, bucket: u64) {
debug!("Submitting counter point: {:?}", point);
self.sender
.send(MetricsCommand::SubmitCounter(point, level, bucket))
.unwrap();
}
pub fn flush(&self) {
debug!("Flush");
let barrier = Arc::new(Barrier::new(2));
@@ -384,6 +443,14 @@ pub fn submit(mut point: influxdb::Point, level: log::Level) {
agent.submit(point, level);
}
/// Submits a new counter or updates an existing counter from any thread. Note that points are
/// internally queued and transmitted periodically in batches.
pub fn submit_counter(point: CounterPoint, level: log::Level, bucket: u64) {
let agent_mutex = get_singleton_agent();
let agent = agent_mutex.lock().unwrap();
agent.submit_counter(point, level, bucket);
}
fn get_env_settings() -> Result<(String, String, String, String), env::VarError> {
let host =
env::var("INFLUX_HOST").unwrap_or_else(|_| "https://metrics.solana.com:8086".to_string());
@@ -461,20 +528,20 @@ pub fn set_panic_hook(program: &'static str) {
#[cfg(test)]
mod test {
use super::*;
use std::sync::atomic::{AtomicUsize, Ordering};
use serde_json;
struct MockMetricsWriter {
points_written: AtomicUsize,
points_written: Arc<Mutex<Vec<influxdb::Point>>>,
}
impl MockMetricsWriter {
fn new() -> Self {
MockMetricsWriter {
points_written: AtomicUsize::new(0),
points_written: Arc::new(Mutex::new(Vec::new())),
}
}
fn points_written(&self) -> usize {
return self.points_written.load(Ordering::Relaxed);
self.points_written.lock().unwrap().len()
}
}
@@ -482,13 +549,16 @@ mod test {
fn write(&self, points: Vec<influxdb::Point>) {
assert!(!points.is_empty());
let new_points = points.len();
self.points_written
.fetch_add(points.len(), Ordering::Relaxed);
.lock()
.unwrap()
.extend(points.into_iter());
info!(
"Writing {} points ({} total)",
points.len(),
self.points_written.load(Ordering::Relaxed)
new_points,
self.points_written()
);
}
}
@@ -509,6 +579,63 @@ mod test {
assert_eq!(writer.points_written(), 42);
}
#[test]
fn test_submit_counter() {
let writer = Arc::new(MockMetricsWriter::new());
let agent = MetricsAgent::new(writer.clone(), Duration::from_secs(10), 1000);
for i in 0..10 {
agent.submit_counter(CounterPoint::new("counter - 1"), Level::Info, i);
agent.submit_counter(CounterPoint::new("counter - 2"), Level::Info, i);
}
agent.flush();
assert_eq!(writer.points_written(), 20);
}
#[test]
fn test_submit_counter_increment() {
let writer = Arc::new(MockMetricsWriter::new());
let agent = MetricsAgent::new(writer.clone(), Duration::from_secs(10), 1000);
for _ in 0..10 {
agent.submit_counter(
CounterPoint {
name: "counter",
count: 10,
timestamp: 0,
},
Level::Info,
0, // use the same bucket
);
}
agent.flush();
assert_eq!(writer.points_written(), 1);
let submitted_point = writer.points_written.lock().unwrap()[0].clone();
let submitted_count = submitted_point.fields.get("count").unwrap();
let expected_count = &influxdb::Value::Integer(100);
assert_eq!(
serde_json::to_string(submitted_count).unwrap(),
serde_json::to_string(expected_count).unwrap()
);
}
#[test]
fn test_submit_bucketed_counter() {
let writer = Arc::new(MockMetricsWriter::new());
let agent = MetricsAgent::new(writer.clone(), Duration::from_secs(10), 1000);
for i in 0..50 {
agent.submit_counter(CounterPoint::new("counter - 1"), Level::Info, i / 10);
agent.submit_counter(CounterPoint::new("counter - 2"), Level::Info, i / 10);
}
agent.flush();
assert_eq!(writer.points_written(), 10);
}
#[test]
fn test_submit_with_delay() {
let writer = Arc::new(MockMetricsWriter::new());