Skip to content

Instantly share code, notes, and snippets.

@jazzl0ver
Last active April 30, 2025 09:28
Show Gist options
  • Save jazzl0ver/369bcea867809e54326bb0a785092fb7 to your computer and use it in GitHub Desktop.
Save jazzl0ver/369bcea867809e54326bb0a785092fb7 to your computer and use it in GitHub Desktop.
Nagios plugin for monitoring all mounted filesystems and detecting abnormal spikes in disk usage
#!/bin/bash
#
# This script monitors all mounted filesystems and detects abnormal spikes
# in disk usage based on historical percentage growth. It calculates the
# median and standard deviation of usage changes over time and reports a
# warning or critical alert if the latest change deviates significantly
# (Z-score based) from typical behavior.
#
# Parameters
WARNING_Z=${1:-2} # Warning if Z-score exceeds this
CRITICAL_Z=${2:-3} # Critical if Z-score exceeds this
STATE_DIR="/var/tmp/disk_spike_monitor"
TRAINING_PERIOD_DAYS=1
mkdir -p "$STATE_DIR"
NOW=$(date +%s)
DAY_SEC=86400
STATUS=0
MSG=()
# Helper: percent difference
percent_diff() {
local old=$1
local new=$2
if [ "$old" -eq 0 ]; then echo 0; return; fi
echo $(( (100 * (new - old)) / old ))
}
# Helper: median
median() {
awk '{a[NR]=$1} END {
n=int((NR + 1)/2)
if (NR % 2) print a[n]
else print (a[n] + a[n+1]) / 2
}'
}
# Helper: standard deviation
stddev() {
awk -v mean="$1" '{sum += ($1 - mean)^2} END { if (NR > 1) print sqrt(sum / (NR - 1)); else print 0 }'
}
df -P | awk 'NR>1 && $1 !~ /(tmpfs|udev|devtmpfs)/ {print $6}' | while read -r mount; do
mount_safe="${mount//\//_}"
usage_file="$STATE_DIR/$mount_safe.usage"
current_used=$(df -P "$mount" | awk 'NR==2 {print $3}') # in KB
echo "$NOW $current_used" >> "$usage_file"
awk -v cutoff=$((NOW - TRAINING_PERIOD_DAYS * DAY_SEC)) '$1 >= cutoff' "$usage_file" > "$usage_file.tmp" && mv "$usage_file.tmp" "$usage_file"
num_samples=$(wc -l < "$usage_file")
if [ "$num_samples" -lt 3 ]; then
echo "[$mount] OK: Collecting data ($num_samples samples)"
continue
fi
# Calculate % diffs
diffs=()
last=""
awk '{print $2}' "$usage_file" | while read value; do
if [ -n "$last" ]; then
diff=$(percent_diff "$last" "$value")
diffs+=("$diff")
fi
last="$value"
done
# Use temporary file to pipe diffs to awk (easier than Bash arrays)
tmp_diffs=$(mktemp)
printf "%s\n" "${diffs[@]}" | sort -n > "$tmp_diffs"
med=$(median < "$tmp_diffs")
std=$(stddev "$med" < "$tmp_diffs")
rm -f "$tmp_diffs"
# Current diff (last 2 values)
last_two=($(tail -n 2 "$usage_file" | awk '{print $2}'))
last_diff=$(percent_diff "${last_two[0]}" "${last_two[1]}")
# Z-score: (x - mean) / stddev
if (( $(echo "$std == 0" | bc -l) )); then
zscore=0
else
zscore=$(echo "scale=2; ($last_diff - $med) / $std" | bc -l) #"
fi
abs_zscore=$(echo "${zscore#-}")
if (( $(echo "$abs_zscore >= $CRITICAL_Z" | bc -l) )); then
MSG+=("CRITICAL: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std")
STATUS=2
elif (( $(echo "$abs_zscore >= $WARNING_Z" | bc -l) )); then
MSG+=("WARNING: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std")
[ "$STATUS" -lt 1 ] && STATUS=1
else
MSG+=("OK: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std")
fi
done
if [ "${#MSG[@]}" -eq 0 ]; then
echo "OK: No significant disk usage changes detected"
else
echo "${MSG[@]}"
fi
exit $STATUS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment