Skip to content

Instantly share code, notes, and snippets.

@jefft
Last active July 30, 2020 03:44
Show Gist options
  • Save jefft/5e545abe3f720632d8a37b11830f06db to your computer and use it in GitHub Desktop.
Save jefft/5e545abe3f720632d8a37b11830f06db to your computer and use it in GitHub Desktop.
Nagios check script for Postfix mail queue
#!/bin/bash
###################################################################
# check_postfix_mailqueue is developped with GPL Licence 2.0
#
# GPL License: http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
#
# First version developped by : Bjoern Bongermino
#
###################################################################
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
####################################################################
#
# original https://gist.github.com/alexlehm/8084195
#
### From https://exchange.nagios.org/directory/Plugins/Email-and-Groupware/Postfix/check_postfix_mailqueue/details
### altmas5's version from , March 26, 2018
### augmented with --ignore_readdir_race flags per rstevens comment
# created by McArt <[email protected]> http://www.mcart.ru/
# Uncomment to enable debugging
# set -x
PROGNAME=`basename $0`
VERSION="Version 2.0"
AUTHOR="McArt (http://www.mcart.ru)"
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
warning=unknown
critical=unknown
bounces=unknown
defers=1
print_version() {
echo "$PROGNAME $VERSION $AUTHOR"
}
print_help() {
print_version $PROGNAME $VERSION
echo ""
echo "$PROGNAME - Checks postfix mailqueue statistic"
echo ""
echo "$PROGNAME is a Nagios plugin which generates statistics"
echo "for the postfix mailqueue and checks for corrupt messages."
echo "The following values will be checked:"
echo "active: Mails being delivered (should be small)"
echo "deferred: Stuck mails (that will be retried later)"
echo "corrupt: Messages found to not be in correct format (should be 0)"
echo "hold: Recent addition, messages put on hold indefinitly - delete of free"
echo "bounced: Bounced mails"
echo ""
echo "Usage: $PROGNAME -w WARN-Level -c CRIT-Level"
echo ""
echo "Options:"
echo " -w)"
echo " Warning level for active mails"
echo " -c)"
echo " Critical level for active mail"
echo " -b)"
echo " Bounced message count since last log rotation"
echo " -d)"
echo " Deferred message count"
echo " -h)"
echo " This help"
echo " -v)"
echo " Version"
exit $STATE_OK
}
# Check for parameters
while test -n "$1"; do
case "$1" in
-h)
print_help
exit $STATE_OK;;
-v)
print_version
exit $STATE_OK;;
-w)
warning=$2
shift
;;
-c)
critical=$2
shift
;;
-b)
bounces=$2
shift
;;
-d)
defers=$2
shift
;;
*)
echo "Usage: ./check_postfix_mailqueue2.sh -w <Warning level for active mails> -c <Critical level for active mail> -d <Deferred count>"
;;
esac
shift
done
if [ $warning == "unknown" ] || [ $critical == "unknown" ] || [ $bounces == "unknown" ];then
echo "You need to specify warning and critical for active mails and bounced message count threshold";
echo "Usage: ./check_postfix_mailqueue2.sh -w <warn> -c <crit> -b <bounce> [-d <deferred>]"
exit $STATE_UNKNOWN
fi
# make sure CRIT is larger than WARN
if [ $warning -ge $critical ];then
echo "UNKNOWN: WARN value may not be greater than or equal the CRIT value"
exit $OK
fi
check_postfix_mailqueue() {
# Can be set via environment, but default is fetched by postconf (if available,
# else /var/spool/postfix)
if which postconf > /dev/null ; then
SPOOLDIR=${spooldir:-`postconf -h queue_directory`}
else
SPOOLDIR=${spooldir:-/opt/zimbra/data/postfix/spool}
fi
cd $SPOOLDIR >/dev/null 2>/dev/null || {
echo -n "Cannot cd to $SPOOLDIR"
exit $STATE_CRITICAL
}
for d in deferred active corrupt hold
do
if [ ! -r $d ]
then
echo -n "queue dir '$d' is not readable"
exit $STATE_CRITICAL
fi
done
# Get values
deferred=`(test -d deferred && find deferred -ignore_readdir_race -type f ) | wc -l`
active=`(test -d active && find active -ignore_readdir_race -type f ) | wc -l`
corrupt=`(test -d corrupt && find corrupt -ignore_readdir_race -type f ) | wc -l`
hold=`( test -d hold && find hold -ignore_readdir_race -type f ) | wc -l`
bounced=`cat /var/log/mail.log | grep bounced | wc -l`
}
check_postfix_mailqueue
values="Deferred mails=$deferred Active deliveries=$active Corrupt mails=$corrupt Mails on hold=$hold Bounced mails=$bounced"
perfdata="deferred=$deferred;; active=$active;; corrupt=$corrupt;; hold=$hold;; bounced=$bounced;;"
if [ $corrupt -gt 0 ]; then
echo -n "Postfix Mailqueue WARNING - $corrupt corrupt messages found! | $perfdata"
exit $STATE_WARNING
fi
if [ $hold -gt 0 ]; then
echo -n "Postfix Mailqueue WARNING - $hold hold messages found! | $perfdata"
exit $STATE_WARNING
fi
# Note: this was originally 0, but I keep seeing 1 mail deferred on At*'s server so it can't be that abnormal. A correct fix would be to alert if the same email is deferred for more than X minutes.
if [ $deferred -gt $defers ]; then
echo -n "Postfix Mailqueue WARNING - $deferred deferred messages found! | $perfdata"
exit $STATE_WARNING
fi
if [[ $bounced -gt $bounces ]]; then
echo -n "Postfix Mailqueue WARNING - $bounced bounced messages found! | $perfdata"
exit $STATE_WARNING
fi
if [ $active -gt $critical ]; then
MES_TO_EXIT="Postfix Mailqueue CRITICAL - $values | $perfdata"
STATE_TO_EXIT=$STATE_CRITICAL
elif [ $active -gt $warning ]; then
MES_TO_EXIT="Postfix Mailqueue WARNING - $values | $perfdata"
STATE_TO_EXIT=$STATE_WARNING
else
MES_TO_EXIT="Postfix Mailqueue OK - $values | $perfdata"
STATE_TO_EXIT=$STATE_OK
fi
echo -n $MES_TO_EXIT
echo -e "\n"
exit $STATE_TO_EXIT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment