Skip to content

Instantly share code, notes, and snippets.

@ashrithr
Last active January 14, 2019 06:01
Show Gist options
  • Save ashrithr/5614221 to your computer and use it in GitHub Desktop.
Save ashrithr/5614221 to your computer and use it in GitHub Desktop.
nagios plugin to monitor dfs
#!/usr/bin/env bash
# ---
# => Nagios plugin to monitor hadoop dfs
# This script serves as base line for nagios plugins for hadoop
# Author: Ashrith
# ---
PROGNAME=`basename $0`
VERSION="Version 1.0"
ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3
print_help() {
echo "$PROGNAME $VERSION"
echo ""
echo "$PROGNAME is a Nagios plugin to check the status of HDFS, Hadoop's"
echo "underlying, redundant, distributed file system."
echo ""
echo "Options:"
echo " -w|--warning)"
echo " Defines the warning level for available datanodes. Default"
echo " is: off"
echo " -c|--critical)"
echo " Defines the critical level for available datanodes. Default"
echo " is: off"
exit $ST_UK
}
#validate input arguments
while [ -n "$1" ]; do
case "$1" in
--help|-h)
print_help
exit $ST_UK
;;
--version|-v)
print_version $PROGNAME $VERSION
exit $ST_UK
;;
--warning|-w)
warning=$2
shift
;;
--critical|-c)
critical=$2
shift
;;
*)
echo "Unknown argument: $1"
print_help
exit $ST_UK
;;
esac
shift
done
get_wcdiff() {
if [ ! -z "$warning" -a ! -z "$critical" ]
then
wclvls=1
if [ ${warning} -lt ${critical} ]
then
wcdiff=1
fi
elif [ ! -z "$warning" -a -z "$critical" ]
then
wcdiff=2
elif [ -z "$warning" -a ! -z "$critical" ]
then
wcdiff=3
fi
}
val_wcdiff() {
if [ "$wcdiff" = 1 ]
then
echo "Please adjust your warning/critical thresholds. The warning \
must be higher than the critical level!"
exit $ST_UK
elif [ "$wcdiff" = 2 ]
then
echo "Please also set a critical value when you want to use \
warning/critical thresholds!"
exit $ST_UK
elif [ "$wcdiff" = 3 ]
then
echo "Please also set a warning value when you want to use \
warning/critical thresholds!"
exit $ST_UK
fi
}
get_vals() {
tmp_vals=`sudo -u hdfs hdfs dfsadmin -report`
dn_avail=`echo -e "$tmp_vals" | grep -m1 "Datanodes available:" | awk '{print $3}'`
dfs_used=`echo -e "$tmp_vals" | grep -m1 "DFS Used:" | awk '{print $3}'`
dfs_used=`expr ${dfs_used} / 1024 / 1024`
dfs_used_p=`echo -e "$tmp_vals" | grep -m1 "DFS Used%:" | awk '{print $3}'`
dfs_total=`echo -e "$tmp_vals" | grep -m1 "Configured Capacity:" | awk '{print $3}'`
dfs_total=`expr ${dfs_total} / 1024 / 1024`
}
do_output() {
output="Datanodes up and running: ${dn_avail}, DFS total: \
${dfs_total} MB, DFS used: ${dfs_used} MB (${dfs_used_p})"
}
do_perfdata() {
perfdata="'datanodes_available'=${dn_avail} 'dfs_total'=${dfs_total} \
'dfs_used'=${dfs_used}"
}
#Call the Functions
get_wcdiff
val_wcdiff
get_vals #collects the actual values
do_output
do_perfdata
if [ -n "$warning" -a -n "$critical" ]
then
if [ "$dn_avail" -le "$warning" -a "$dn_avail" -gt "$critical" ]
then
echo "WARNING - ${output} | ${perfdata}"
exit $ST_WR
elif [ "$dn_avail" -le "$critical" ]
then
echo "CRITICAL - ${output} | ${perfdata}"
exit $ST_CR
else
echo "OK - ${output} | ${perfdata} "
exit $ST_OK
fi
else
echo "OK - ${output} | ${perfdata}"
exit $ST_OK
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment