From 17d698d20a781b8f99fb7d16b0bdb30c762b578d Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 23 Dec 2021 18:16:56 +0000 Subject: [PATCH] report mem stats (#21258) (#22066) (cherry picked from commit f8dcb2f38b13d9e812112c2d641f097d2cd3618c) # Conflicts: # Cargo.lock # core/Cargo.toml # core/src/system_monitor_service.rs # runtime/src/bucket_map_holder_stats.rs Co-authored-by: Jeff Washington (jwash) <75863576+jeffwashington@users.noreply.github.com> --- Cargo.lock | 11 +++++ core/Cargo.toml | 1 + core/src/system_monitor_service.rs | 68 ++++++++++++++++++++++++++---- sdk/src/timing.rs | 13 ++++-- 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab1aa5b690..1eb45302bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4676,6 +4676,7 @@ dependencies = [ "solana-vote-program", "spl-token", "symlink", + "sys-info", "systemstat", "tempfile", "thiserror", @@ -6098,6 +6099,16 @@ dependencies = [ "unicode-xid 0.2.0", ] +[[package]] +name = "sys-info" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "sysctl" version = "0.4.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index 324bfb5af9..4702279182 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -72,6 +72,7 @@ spl-token-v2-0 = { package = "spl-token", version = "=3.2.0", features = ["no-en tempfile = "3.1.0" thiserror = "1.0" solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.8.12" } +sys-info = "0.9.1" trees = "0.2.1" [dev-dependencies] diff --git a/core/src/system_monitor_service.rs b/core/src/system_monitor_service.rs index 93f5461fba..8ce5028139 100644 --- a/core/src/system_monitor_service.rs +++ b/core/src/system_monitor_service.rs @@ -1,3 +1,4 @@ +use solana_sdk::timing::AtomicInterval; use std::{ collections::HashMap, io::BufRead, @@ -6,12 +7,14 @@ use std::{ Arc, }, thread::{self, sleep, Builder, JoinHandle}, - time::{Duration, Instant}, + time::Duration, }; #[cfg(target_os = "linux")] use std::{fs::File, io::BufReader, path::Path}; -const SAMPLE_INTERVAL: Duration = Duration::from_secs(2); +const MS_PER_S: u64 = 1_000; +const SAMPLE_INTERVAL_UDP_MS: u64 = 2 * MS_PER_S; +const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S; const SLEEP_INTERVAL: Duration = Duration::from_millis(500); #[cfg(target_os = "linux")] @@ -172,19 +175,68 @@ impl SystemMonitorService { ); } + fn calc_percent(numerator: u64, denom: u64) -> f32 { + if denom == 0 { + 0.0 + } else { + (numerator as f32 / denom as f32) * 100.0 + } + } + + fn report_mem_stats() { + if let Ok(info) = sys_info::mem_info() { + datapoint_info!( + "memory-stats", + ("total", info.total, i64), + ("swap_total", info.swap_total, i64), + ( + "free_percent", + Self::calc_percent(info.free, info.total), + f64 + ), + ("used_bytes", info.total.saturating_sub(info.avail), i64), + ( + "avail_percent", + Self::calc_percent(info.avail, info.total), + f64 + ), + ( + "buffers_percent", + Self::calc_percent(info.buffers, info.total), + f64 + ), + ( + "cached_percent", + Self::calc_percent(info.cached, info.total), + f64 + ), + ( + "swap_free_percent", + Self::calc_percent(info.swap_free, info.swap_total), + f64 + ), + ) + } + } + pub fn run(exit: Arc, report_os_network_stats: bool) { let mut udp_stats = None; - let mut now = Instant::now(); + + let udp_timer = AtomicInterval::default(); + let mem_timer = AtomicInterval::default(); loop { if exit.load(Ordering::Relaxed) { break; } - if now.elapsed() >= SAMPLE_INTERVAL { - now = Instant::now(); - if report_os_network_stats { - SystemMonitorService::process_udp_stats(&mut udp_stats); - } + + if udp_timer.should_update(SAMPLE_INTERVAL_UDP_MS) && report_os_network_stats { + SystemMonitorService::process_udp_stats(&mut udp_stats); } + + if mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) { + SystemMonitorService::report_mem_stats(); + } + sleep(SLEEP_INTERVAL); } } diff --git a/sdk/src/timing.rs b/sdk/src/timing.rs index 42e3ba48ef..4c5edd1b80 100644 --- a/sdk/src/timing.rs +++ b/sdk/src/timing.rs @@ -32,6 +32,7 @@ pub fn duration_as_s(d: &Duration) -> f32 { d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0) } +/// return timestamp as ms pub fn timestamp() -> u64 { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -70,14 +71,18 @@ pub struct AtomicInterval { } impl AtomicInterval { - pub fn should_update(&self, interval_time: u64) -> bool { - self.should_update_ext(interval_time, true) + /// true if 'interval_time_ms' has elapsed since last time we returned true as long as it has been 'interval_time_ms' since this struct was created + pub fn should_update(&self, interval_time_ms: u64) -> bool { + self.should_update_ext(interval_time_ms, true) } - pub fn should_update_ext(&self, interval_time: u64, skip_first: bool) -> bool { + /// a primary use case is periodic metric reporting, potentially from different threads + /// true if 'interval_time_ms' has elapsed since last time we returned true + /// except, if skip_first=false, false until 'interval_time_ms' has elapsed since this struct was created + pub fn should_update_ext(&self, interval_time_ms: u64, skip_first: bool) -> bool { let now = timestamp(); let last = self.last_update.load(Ordering::Relaxed); - now.saturating_sub(last) > interval_time + now.saturating_sub(last) > interval_time_ms && self .last_update .compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)