regularly report network limits (#22563)
This commit is contained in:
@ -60,7 +60,6 @@ jemallocator = {package = "tikv-jemallocator", version = "0.4.1", features = ["u
|
||||
[target."cfg(unix)".dependencies]
|
||||
libc = "0.2.112"
|
||||
signal-hook = "0.3.13"
|
||||
sysctl = "0.4.3"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
targets = ["x86_64-unknown-linux-gnu"]
|
||||
|
@ -23,6 +23,7 @@ use {
|
||||
},
|
||||
solana_core::{
|
||||
ledger_cleanup_service::{DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS},
|
||||
system_monitor_service::SystemMonitorService,
|
||||
tower_storage,
|
||||
tpu::DEFAULT_TPU_COALESCE_MS,
|
||||
validator::{is_snapshot_config_valid, Validator, ValidatorConfig, ValidatorStartProgress},
|
||||
@ -32,7 +33,6 @@ use {
|
||||
contact_info::ContactInfo,
|
||||
},
|
||||
solana_ledger::blockstore_db::BlockstoreRecoveryMode,
|
||||
solana_metrics::datapoint_info,
|
||||
solana_perf::recycler::enable_recycler_warming,
|
||||
solana_poh::poh_service,
|
||||
solana_replica_lib::accountsdb_repl_server::AccountsDbReplServiceConfig,
|
||||
@ -411,85 +411,6 @@ fn get_cluster_shred_version(entrypoints: &[SocketAddr]) -> Option<u16> {
|
||||
None
|
||||
}
|
||||
|
||||
fn platform_id() -> String {
|
||||
format!(
|
||||
"{}/{}/{}",
|
||||
std::env::consts::FAMILY,
|
||||
std::env::consts::OS,
|
||||
std::env::consts::ARCH
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn check_os_network_limits() {
|
||||
use {solana_metrics::datapoint_warn, std::collections::HashMap, sysctl::Sysctl};
|
||||
|
||||
fn sysctl_read(name: &str) -> Result<String, sysctl::SysctlError> {
|
||||
let ctl = sysctl::Ctl::new(name)?;
|
||||
let val = ctl.value_string()?;
|
||||
Ok(val)
|
||||
}
|
||||
let mut check_failed = false;
|
||||
|
||||
info!("Testing OS network limits:");
|
||||
|
||||
// Reference: https://medium.com/@CameronSparr/increase-os-udp-buffers-to-improve-performance-51d167bb1360
|
||||
let mut recommended_limits: HashMap<&str, i64> = HashMap::default();
|
||||
recommended_limits.insert("net.core.rmem_max", 134217728);
|
||||
recommended_limits.insert("net.core.rmem_default", 134217728);
|
||||
recommended_limits.insert("net.core.wmem_max", 134217728);
|
||||
recommended_limits.insert("net.core.wmem_default", 134217728);
|
||||
recommended_limits.insert("vm.max_map_count", 1000000);
|
||||
|
||||
// Additionally collect the following limits
|
||||
recommended_limits.insert("net.core.optmem_max", 0);
|
||||
recommended_limits.insert("net.core.netdev_max_backlog", 0);
|
||||
|
||||
let mut current_limits: HashMap<&str, i64> = HashMap::default();
|
||||
for (key, _) in recommended_limits.iter() {
|
||||
let current_val = match sysctl_read(key) {
|
||||
Ok(val) => val.parse::<i64>().unwrap(),
|
||||
Err(e) => {
|
||||
error!("Failed to query value for {}: {}", key, e);
|
||||
check_failed = true;
|
||||
-1
|
||||
}
|
||||
};
|
||||
current_limits.insert(key, current_val);
|
||||
}
|
||||
|
||||
for (key, recommended_val) in recommended_limits.iter() {
|
||||
let current_val = *current_limits.get(key).unwrap();
|
||||
if current_val < *recommended_val {
|
||||
datapoint_warn!("os-config", (key, current_val, i64));
|
||||
warn!(
|
||||
" {}: recommended={} current={}, too small",
|
||||
key, recommended_val, current_val
|
||||
);
|
||||
check_failed = true;
|
||||
} else {
|
||||
datapoint_info!("os-config", (key, current_val, i64));
|
||||
info!(
|
||||
" {}: recommended={} current={}",
|
||||
key, recommended_val, current_val
|
||||
);
|
||||
}
|
||||
}
|
||||
datapoint_info!("os-config", ("platform", platform_id(), String));
|
||||
|
||||
if check_failed {
|
||||
datapoint_warn!("os-config", ("network_limit_test_failed", 1, i64));
|
||||
warn!("OS network limit test failed. solana-sys-tuner may be used to configure OS network limits. Bypass check with --no-os-network-limits-test.");
|
||||
} else {
|
||||
info!("OS network limits test passed.");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn check_os_network_limits() {
|
||||
datapoint_info!("os-config", ("platform", platform_id(), String));
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let default_dynamic_port_range =
|
||||
&format!("{}-{}", VALIDATOR_PORT_RANGE.0, VALIDATOR_PORT_RANGE.1);
|
||||
@ -2516,7 +2437,11 @@ pub fn main() {
|
||||
});
|
||||
|
||||
if !matches.is_present("no_os_network_limits_test") {
|
||||
check_os_network_limits();
|
||||
if SystemMonitorService::check_os_network_limits() {
|
||||
info!("OS network limits test passed.");
|
||||
} else {
|
||||
eprintln!("OS network limit test failed. solana-sys-tuner may be used to configure OS network limits. Bypass check with --no-os-network-limits-test.");
|
||||
}
|
||||
}
|
||||
|
||||
let mut ledger_lock = ledger_lockfile(&ledger_path);
|
||||
|
Reference in New Issue
Block a user