Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
# _____ _ _
# | __|___ ___ ___ _| |___ _____| |_ ___ ___ ___
# | __| _| -_| -_| . | . | | . | . | | -_|
# |__| |_| |___|___|___|___|_|_|_|___|___|_|_|___|
#
# Freedom in the Cloud
#
# Daemon which monitors the cpu and looks for anomalous
# levels of usage
#
# License
# =======
#
# Copyright (C) 2018 Bob Mottram <bob@freedombone.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
PROJECT_NAME='freedombone'
export TEXTDOMAIN=${PROJECT_NAME}-cpu-monitor
export TEXTDOMAINDIR="/usr/share/locale"
# The initial number of samples to take before calculating
# cpu usage average
INITIAL_SAMPLES=10
# number of samples so far
no_of_samples=0
# current running average cpu usage
running_average=0
prev_running_average=0
# percent usage threshold for warnings
THRESHOLD_WARNING=80
THRESHOLD_CRITICAL=90
# A timer used to count down to zero after a notification
# has been sent. This helps to suppress hunting, which could
# be annoying
suppress=0
debug_mode="$1"
if [ "$debug_mode" ]; then
echo $'CPU monitor Debug mode'
UPDATE_INTERVAL=1
fi
while true
do
cpu_usage=$(grep 'cpu ' /proc/stat | awk '{usage=($2+$4)*100/($2+$4+$5)} END {print usage "%"}' | awk -F '.' '{print $1}')
# get a number of samples first so that we're working from
# an initial good average
if [ $no_of_samples -lt $INITIAL_SAMPLES ]; then
running_average=$((running_average + cpu_usage))
no_of_samples=$((no_of_samples+1))
if [ "$debug_mode" ]; then
echo -n '.'
fi
if [ $no_of_samples -eq $INITIAL_SAMPLES ]; then
# calculate the initial running average value
new_running_average=$((running_average / INITIAL_SAMPLES))
running_average=$new_running_average
if [ "$debug_mode" ]; then
echo "initial $running_average"
fi
no_of_samples=$((no_of_samples+1))
fi
if [ $no_of_samples -gt $INITIAL_SAMPLES ]; then
temp1=$((running_average*9))
temp2=$((temp1+cpu_usage))
running_average=$((temp2 / 10))
if [ "$debug_mode" ]; then
echo "$cpu_usage ${running_average}%"
fi
if [ $prev_running_average -lt $THRESHOLD_WARNING ]; then
if [ $running_average -ge $THRESHOLD_WARNING ]; then
if [ "$debug_mode" ]; then
echo "Warning"
else
if [ $suppress -eq 0 ]; then
/usr/local/bin/${PROJECT_NAME}-notification -s $'High CPU usage' -m $"CPU usage is getting high. ${running_average}%"
suppress=180
fi
fi
fi
fi
if [ $prev_running_average -lt $THRESHOLD_CRITICAL ]; then
if [ $running_average -ge $THRESHOLD_CRITICAL ]; then
if [ "$debug_mode" ]; then
echo "Critical"
else
if [ $suppress -eq 0 ]; then
/usr/local/bin/${PROJECT_NAME}-notification -s $'Critical CPU usage' -m $"CPU usage is reaching critical level. ${running_average}%"
suppress=180
fi
fi
fi
fi
prev_running_average=$running_average
fi
# count down the suppression of messages
if [ $suppress -gt 0 ]; then
new_suppress=$((suppress-1))
suppress=$new_suppress
fi
done
exit 0