| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  | #!/usr/bin/env bash
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Reports cpu and ram usage statistics | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | set -e | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | [[ $(uname) == Linux ]] || exit 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # need to cd like this to avoid #SC1091 | 
					
						
							|  |  |  | cd "$(dirname "$0")/.." | 
					
						
							|  |  |  | source scripts/configure-metrics.sh | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | while true; do | 
					
						
							| 
									
										
										
										
											2019-11-15 13:25:40 -08:00
										 |  |  |   # collect top twice because the first time is inaccurate | 
					
						
							|  |  |  |   top_ouput="$(top -bn2 -d1)" | 
					
						
							| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  |   # collect the total cpu usage by subtracting idle usage from 100% | 
					
						
							| 
									
										
										
										
											2019-11-15 13:25:40 -08:00
										 |  |  |   cpu_usage=$(echo "${top_ouput}" | grep '%Cpu(s):' | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | tail -1 | awk '{print 100 - $1}') | 
					
						
							| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  |   # collect the total ram usage by dividing used memory / total memory | 
					
						
							| 
									
										
										
										
											2019-11-15 13:25:40 -08:00
										 |  |  |   ram_total_and_usage=$(echo "${top_ouput}" | grep '.*B Mem'| tail -1 | sed "s/.*: *\([0-9.]*\)%* total.*, *\([0-9.]*\)%* used.*/\1 \2/") | 
					
						
							| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  |   read -r total used <<< "$ram_total_and_usage" | 
					
						
							|  |  |  |   ram_usage=$(awk "BEGIN {print $used / $total * 100}") | 
					
						
							| 
									
										
										
										
											2019-11-21 08:33:02 -08:00
										 |  |  |   cpu_report="cpu_usage=$cpu_usage,ram_usage=$ram_usage" | 
					
						
							| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-21 08:33:02 -08:00
										 |  |  |   # if nvidia-smi exists, report gpu stats | 
					
						
							|  |  |  |   gpu_report="" | 
					
						
							|  |  |  |   if [ -x "$(command -v nvidia-smi)" ]; then | 
					
						
							|  |  |  |     mapfile -t individual_gpu_usage < <(nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total --format=csv,nounits,noheader) | 
					
						
							|  |  |  |     total_gpu_usage=0 | 
					
						
							|  |  |  |     total_gpu_mem_usage=0 | 
					
						
							|  |  |  |     num_gpus=${#individual_gpu_usage[@]} | 
					
						
							|  |  |  |     for entry in "${individual_gpu_usage[@]}" | 
					
						
							|  |  |  |     do | 
					
						
							|  |  |  |       read -r compute mem_used mem_total <<< "${entry//,/}" | 
					
						
							|  |  |  |       total_gpu_usage=$(awk "BEGIN {print $total_gpu_usage + $compute }") | 
					
						
							|  |  |  |       total_gpu_mem_usage=$(awk "BEGIN {print $total_gpu_mem_usage + $mem_used / $mem_total * 100}") | 
					
						
							|  |  |  |     done | 
					
						
							|  |  |  |     avg_gpu_usage=$(awk "BEGIN {print $total_gpu_usage / $num_gpus}") | 
					
						
							|  |  |  |     avg_gpu_mem_usage=$(awk "BEGIN {print $total_gpu_mem_usage / $num_gpus}") | 
					
						
							|  |  |  |     gpu_report=",avg_gpu_usage=$avg_gpu_usage,avg_gpu_mem_usage=$avg_gpu_mem_usage" | 
					
						
							|  |  |  |   fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   report="${cpu_report}${gpu_report}" | 
					
						
							| 
									
										
										
										
											2019-11-14 20:36:34 -08:00
										 |  |  |   ./scripts/metrics-write-datapoint.sh "system-stats,hostname=$HOSTNAME $report" | 
					
						
							|  |  |  |   sleep 1 | 
					
						
							|  |  |  | done |