report mem stats (#21258) (#22066)

(cherry picked from commit f8dcb2f38b)

# Conflicts:
#	Cargo.lock
#	core/Cargo.toml
#	core/src/system_monitor_service.rs
#	runtime/src/bucket_map_holder_stats.rs

Co-authored-by: Jeff Washington (jwash) <75863576+jeffwashington@users.noreply.github.com>
This commit is contained in:
mergify[bot]
2021-12-23 18:16:56 +00:00
committed by GitHub
parent cf34ae7d6f
commit 17d698d20a
4 changed files with 81 additions and 12 deletions

11
Cargo.lock generated
View File

@ -4676,6 +4676,7 @@ dependencies = [
"solana-vote-program",
"spl-token",
"symlink",
"sys-info",
"systemstat",
"tempfile",
"thiserror",
@ -6098,6 +6099,16 @@ dependencies = [
"unicode-xid 0.2.0",
]
[[package]]
name = "sys-info"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "sysctl"
version = "0.4.0"

View File

@ -72,6 +72,7 @@ spl-token-v2-0 = { package = "spl-token", version = "=3.2.0", features = ["no-en
tempfile = "3.1.0"
thiserror = "1.0"
solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.8.12" }
sys-info = "0.9.1"
trees = "0.2.1"
[dev-dependencies]

View File

@ -1,3 +1,4 @@
use solana_sdk::timing::AtomicInterval;
use std::{
collections::HashMap,
io::BufRead,
@ -6,12 +7,14 @@ use std::{
Arc,
},
thread::{self, sleep, Builder, JoinHandle},
time::{Duration, Instant},
time::Duration,
};
#[cfg(target_os = "linux")]
use std::{fs::File, io::BufReader, path::Path};
const SAMPLE_INTERVAL: Duration = Duration::from_secs(2);
const MS_PER_S: u64 = 1_000;
const SAMPLE_INTERVAL_UDP_MS: u64 = 2 * MS_PER_S;
const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S;
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
#[cfg(target_os = "linux")]
@ -172,19 +175,68 @@ impl SystemMonitorService {
);
}
fn calc_percent(numerator: u64, denom: u64) -> f32 {
if denom == 0 {
0.0
} else {
(numerator as f32 / denom as f32) * 100.0
}
}
fn report_mem_stats() {
if let Ok(info) = sys_info::mem_info() {
datapoint_info!(
"memory-stats",
("total", info.total, i64),
("swap_total", info.swap_total, i64),
(
"free_percent",
Self::calc_percent(info.free, info.total),
f64
),
("used_bytes", info.total.saturating_sub(info.avail), i64),
(
"avail_percent",
Self::calc_percent(info.avail, info.total),
f64
),
(
"buffers_percent",
Self::calc_percent(info.buffers, info.total),
f64
),
(
"cached_percent",
Self::calc_percent(info.cached, info.total),
f64
),
(
"swap_free_percent",
Self::calc_percent(info.swap_free, info.swap_total),
f64
),
)
}
}
pub fn run(exit: Arc<AtomicBool>, report_os_network_stats: bool) {
let mut udp_stats = None;
let mut now = Instant::now();
let udp_timer = AtomicInterval::default();
let mem_timer = AtomicInterval::default();
loop {
if exit.load(Ordering::Relaxed) {
break;
}
if now.elapsed() >= SAMPLE_INTERVAL {
now = Instant::now();
if report_os_network_stats {
if udp_timer.should_update(SAMPLE_INTERVAL_UDP_MS) && report_os_network_stats {
SystemMonitorService::process_udp_stats(&mut udp_stats);
}
if mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) {
SystemMonitorService::report_mem_stats();
}
sleep(SLEEP_INTERVAL);
}
}

View File

@ -32,6 +32,7 @@ pub fn duration_as_s(d: &Duration) -> f32 {
d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0)
}
/// return timestamp as ms
pub fn timestamp() -> u64 {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
@ -70,14 +71,18 @@ pub struct AtomicInterval {
}
impl AtomicInterval {
pub fn should_update(&self, interval_time: u64) -> bool {
self.should_update_ext(interval_time, true)
/// true if 'interval_time_ms' has elapsed since last time we returned true as long as it has been 'interval_time_ms' since this struct was created
pub fn should_update(&self, interval_time_ms: u64) -> bool {
self.should_update_ext(interval_time_ms, true)
}
pub fn should_update_ext(&self, interval_time: u64, skip_first: bool) -> bool {
/// a primary use case is periodic metric reporting, potentially from different threads
/// true if 'interval_time_ms' has elapsed since last time we returned true
/// except, if skip_first=false, false until 'interval_time_ms' has elapsed since this struct was created
pub fn should_update_ext(&self, interval_time_ms: u64, skip_first: bool) -> bool {
let now = timestamp();
let last = self.last_update.load(Ordering::Relaxed);
now.saturating_sub(last) > interval_time
now.saturating_sub(last) > interval_time_ms
&& self
.last_update
.compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)