Last active
January 3, 2016 07:28
-
-
Save kchristensen/913a23dcb64d35c6783e to your computer and use it in GitHub Desktop.
Nagios DNS resolution health check
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Nagios check to test DNS resolution data pulled from Graphite | |
""" | |
import optparse | |
import logging | |
import requests | |
import sys | |
OK = 0 | |
WARNING = 1 | |
CRITICAL = 2 | |
LOGGER = logging.getLogger(__name__) | |
LOGGER.addHandler(logging.StreamHandler(sys.stdout)) | |
LOGGER.setLevel(logging.INFO) | |
parse = optparse.OptionParser() | |
parse.add_option('-s', '--server', type='string', dest='server', | |
help='hostname of the server being checked') | |
parse.add_option('-g', '--graphite', type='string', dest='graphite', | |
help='hostname of the graphite server to poll') | |
parse.add_option('-w', '--warning', type='int', dest='warning_threshold', | |
help='threshold at which to warn') | |
parse.add_option('-c', '--critical', type='int', dest='critical_threshold', | |
help='threshold at which to go critical') | |
parse.add_option('-u', '--url', type='string', dest='url', | |
help='url of the site being resolved') | |
parse.add_option('-p', '--points', type='int', dest='points', default='10', | |
help='number of historical data points to average') | |
(opts, args) = parse.parse_args() | |
# Require at least one argument | |
if not opts.server: | |
parse.print_help() | |
exit(OK) | |
# Define the metric url on the Graphite server | |
graphite_url = 'http://{0}/render/?from=-1days&target=servers.{1}.dns.{2}' \ | |
'&rawData'.format(opts.graphite, opts.server, opts.url.replace('.', '_')) | |
try: | |
# Grab Graphite data and parse out the data points | |
data = requests.get(graphite_url).text.split('|')[-1].rstrip('\n') | |
# Replace None with zeros | |
data = [d.replace('None', '0.0') for d in data.split(',')] | |
# Convert everything from strings -> floats | |
data = [float(d) for d in data[-opts.points:]] | |
# Determine the average response time over the past X points | |
average = float(sum(data) / opts.points) | |
if not average: | |
LOGGER.info('CRITICAL - DNS Resolution is failing!') | |
exit(CRITICAL) | |
elif average > opts.critical_threshold: | |
LOGGER.info('CRITICAL - DNS Resolution took %s ms' % average) | |
exit(CRITICAL) | |
elif average > opts.warning_threshold: | |
LOGGER.info('WARNING - DNS Resolution took %s ms' % average) | |
exit(WARNING) | |
else: | |
LOGGER.info('OK - DNS Resolution took %s ms' % average) | |
exit(OK) | |
except requests.ConnectionError: | |
LOGGER.critical('CRITICAL - There was an error connecting to graphite') | |
exit(CRITICAL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment