Last active
July 5, 2018 20:36
-
-
Save shokoe/6890f5e0a7c5e4bc0cf25ebed13e7715 to your computer and use it in GitHub Desktop.
AWS RDS disk usage check for Naemon/Nagios
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# size units are in GB | |
# syntax: check_rds_disk.sh <1:db ident> <2:max delay> <3:warn> <4:crit> | |
# e.g: check_rds_disk.sh rds.db1 300 70 85 | |
db_name="${1/*./}" | |
# max_daly in seconds | |
max_delay=$2 | |
warn=$3 | |
crit=$4 | |
base_delay=360 | |
msg_append(){ | |
if [ -z "$msg" ]; then | |
msg="$1" | |
else | |
msg="$msg, $1" | |
fi | |
} | |
ec_esca(){ [ ${ec:-0} -lt $1 ] && ec=$1; } | |
pptime(){ | |
if [ $1 -gt 129600 ]; then | |
echo "$(($1/86400))d$(($1%86400/3600))h$(($1%3600/60))m" | |
elif [ $1 -gt 5400 ]; then | |
echo "$(($1/3600))h$(($1%3600/60))m" | |
elif [ $1 -gt 120 ]; then | |
echo "$(($1/60))m$(($1%60))s" | |
else | |
echo "$(($1))s" | |
fi | |
} | |
get_store(){ aws rds describe-db-instances --db-instance-identifier $db_name | jq '.DBInstances[].AllocatedStorage'; } | |
get_cw(){ | |
unset o | |
# retries for cloudwatch eventual consistancy (in a monitoring service?!?!?!?) | |
for i in `seq 1 10`; do | |
o=$(aws cloudwatch get-metric-statistics --namespace AWS/RDS \ | |
--metric-name FreeStorageSpace \ | |
--dimensions Name=DBInstanceIdentifier,Value=$db_name \ | |
--start-time $((`date +%s`/60*60-60*$i)) --end-time $((`date +%s`/60*60-60*($i-1))) \ | |
--period 60 --statistics Average --output text | sed 1d |\ | |
awk '{printf "%s %d %s %s", $1, $2/1024/1024/1024, $3, $4}') | |
[ ! -z "$o" ] && break | |
done | |
if [ ! -z "$o" ]; then | |
read D m t u <<< "$o" | |
echo ${m/.*/} $((`date +%s`-`date -d "$t" +%s`)) | |
else | |
echo "NA NA" | |
fi | |
} | |
size=`get_store` | |
read data delay <<< "`get_cw`" | |
perc=$((($size-$data)*100/$size)) | |
perf_data="freeGB:$data; delaySec:$delay; sizeGB:$size; usedPercent:$perc" | |
### check delay | |
############### | |
if [ ${delay} -gt $max_delay ]; then | |
msg_append "delay too big (`pptime $delay`>`pptime $max_delay`)" | |
ec_esca 2 | |
fi | |
### check data | |
####################### | |
if [ "$data" = "NA" ]; then | |
msg_append "unable to get cloudwatch data point" | |
ec_esca 2 | |
elif [ $perc -gt $crit ]; then | |
msg_append "Used storage ${perc}% > ${crit}%" | |
ec_esca 2 | |
elif [ $perc -gt $warn ]; then | |
msg_append "Used storage ${perc}% > ${warn}%" | |
ec_esca 1 | |
else | |
msg_append "Used storage ${perc}% < ${warn}%" | |
ec_esca 0 | |
fi | |
echo "$msg | $perf_data" | |
#echo "<pre>$info</pre>" | |
exit $ec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment