Last active
April 30, 2025 09:28
-
-
Save jazzl0ver/369bcea867809e54326bb0a785092fb7 to your computer and use it in GitHub Desktop.
Nagios plugin for monitoring all mounted filesystems and detecting abnormal spikes in disk usage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script monitors all mounted filesystems and detects abnormal spikes | |
# in disk usage based on historical percentage growth. It calculates the | |
# median and standard deviation of usage changes over time and reports a | |
# warning or critical alert if the latest change deviates significantly | |
# (Z-score based) from typical behavior. | |
# | |
# Parameters | |
WARNING_Z=${1:-2} # Warning if Z-score exceeds this | |
CRITICAL_Z=${2:-3} # Critical if Z-score exceeds this | |
STATE_DIR="/var/tmp/disk_spike_monitor" | |
TRAINING_PERIOD_DAYS=1 | |
mkdir -p "$STATE_DIR" | |
NOW=$(date +%s) | |
DAY_SEC=86400 | |
STATUS=0 | |
MSG=() | |
# Helper: percent difference | |
percent_diff() { | |
local old=$1 | |
local new=$2 | |
if [ "$old" -eq 0 ]; then echo 0; return; fi | |
echo $(( (100 * (new - old)) / old )) | |
} | |
# Helper: median | |
median() { | |
awk '{a[NR]=$1} END { | |
n=int((NR + 1)/2) | |
if (NR % 2) print a[n] | |
else print (a[n] + a[n+1]) / 2 | |
}' | |
} | |
# Helper: standard deviation | |
stddev() { | |
awk -v mean="$1" '{sum += ($1 - mean)^2} END { if (NR > 1) print sqrt(sum / (NR - 1)); else print 0 }' | |
} | |
df -P | awk 'NR>1 && $1 !~ /(tmpfs|udev|devtmpfs)/ {print $6}' | while read -r mount; do | |
mount_safe="${mount//\//_}" | |
usage_file="$STATE_DIR/$mount_safe.usage" | |
current_used=$(df -P "$mount" | awk 'NR==2 {print $3}') # in KB | |
echo "$NOW $current_used" >> "$usage_file" | |
awk -v cutoff=$((NOW - TRAINING_PERIOD_DAYS * DAY_SEC)) '$1 >= cutoff' "$usage_file" > "$usage_file.tmp" && mv "$usage_file.tmp" "$usage_file" | |
num_samples=$(wc -l < "$usage_file") | |
if [ "$num_samples" -lt 3 ]; then | |
echo "[$mount] OK: Collecting data ($num_samples samples)" | |
continue | |
fi | |
# Calculate % diffs | |
diffs=() | |
last="" | |
awk '{print $2}' "$usage_file" | while read value; do | |
if [ -n "$last" ]; then | |
diff=$(percent_diff "$last" "$value") | |
diffs+=("$diff") | |
fi | |
last="$value" | |
done | |
# Use temporary file to pipe diffs to awk (easier than Bash arrays) | |
tmp_diffs=$(mktemp) | |
printf "%s\n" "${diffs[@]}" | sort -n > "$tmp_diffs" | |
med=$(median < "$tmp_diffs") | |
std=$(stddev "$med" < "$tmp_diffs") | |
rm -f "$tmp_diffs" | |
# Current diff (last 2 values) | |
last_two=($(tail -n 2 "$usage_file" | awk '{print $2}')) | |
last_diff=$(percent_diff "${last_two[0]}" "${last_two[1]}") | |
# Z-score: (x - mean) / stddev | |
if (( $(echo "$std == 0" | bc -l) )); then | |
zscore=0 | |
else | |
zscore=$(echo "scale=2; ($last_diff - $med) / $std" | bc -l) #" | |
fi | |
abs_zscore=$(echo "${zscore#-}") | |
if (( $(echo "$abs_zscore >= $CRITICAL_Z" | bc -l) )); then | |
MSG+=("CRITICAL: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std") | |
STATUS=2 | |
elif (( $(echo "$abs_zscore >= $WARNING_Z" | bc -l) )); then | |
MSG+=("WARNING: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std") | |
[ "$STATUS" -lt 1 ] && STATUS=1 | |
else | |
MSG+=("OK: [$mount] Δ=${last_diff}% (Z=$zscore) vs median=$med% σ=$std") | |
fi | |
done | |
if [ "${#MSG[@]}" -eq 0 ]; then | |
echo "OK: No significant disk usage changes detected" | |
else | |
echo "${MSG[@]}" | |
fi | |
exit $STATUS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment