From df808dedd1d40729c05a9881bfcf05682631adec Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Tue, 7 Aug 2018 12:38:13 -0700 Subject: [PATCH] Add simple OOM Killer monitor --- multinode-demo/oom_monitor.sh | 32 ++++++++++++++++++++++++++++++++ snap/snapcraft.yaml | 5 +++++ 2 files changed, 37 insertions(+) create mode 100755 multinode-demo/oom_monitor.sh diff --git a/multinode-demo/oom_monitor.sh b/multinode-demo/oom_monitor.sh new file mode 100755 index 0000000000..5fb392e3a2 --- /dev/null +++ b/multinode-demo/oom_monitor.sh @@ -0,0 +1,32 @@ +#!/bin/bash -e +# +# Reports Linux OOM Killer activity +# + +here=$(dirname "$0") +# shellcheck source=multinode-demo/common.sh +source "$here"/common.sh + +if [[ $(uname) != Linux ]]; then + exit 0 +fi + +syslog=/var/log/syslog +if [[ ! -r $syslog ]]; then + echo Unable to read $syslog + exit 0 +fi + +# Adjust OOM score to reduce the chance that this script will be killed +# during an Out of Memory event since the purpose of this script is to +# report such events +oom_score_adj "self" -500 + +while read -r victim; do + echo "Out of memory event detected, $victim killed" + "$here"/metrics_write_datapoint.sh "oom-killer,victim=$victim killed=1" +done < <( \ + tail --follow=name --retry -n0 $syslog \ + | sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \ +) +exit 1 diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 07470b3ee6..98196d1dfe 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -77,6 +77,11 @@ apps: plugs: - network - network-bind + daemon-oom-monitor: + daemon: simple + command: oom_monitor.sh + plugs: + - network parts: solana: