Skip to content

Instantly share code, notes, and snippets.

@mastbaum
Created January 9, 2015 15:05
Show Gist options
  • Select an option

  • Save mastbaum/bfdb9808bf5c85978757 to your computer and use it in GitHub Desktop.

Select an option

Save mastbaum/bfdb9808bf5c85978757 to your computer and use it in GitHub Desktop.
Nagios plugin to monitor the SMART status of hard disks
#!/usr/bin/env python
'''Report the SMART status of hard disks using a log file.'''
import argparse
import subprocess
import sys
# Nagios status codes
OK, WARNING, CRITICAL, UNKNOWN = range(4)
# Bits in the return code of smartctl
BITS = (
('smartctl error', 3),
('Device open failed', 2),
('SMART or ATA command failed', 1),
('Disk failing', 1),
('Prefail attributes over threshold', 1),
('Attributes over threshold in the past', 1),
('Error log contains SMART errors', 1),
('Self-test lof contains errors', 1),
)
def main(logfile):
with open(logfile, 'r') as f:
last_line = f.readlines()[-1][:-1]
timestamp, devices = last_line.split(';', 1)
devices = devices.split(';')
total_status = 0
output = []
for device in devices:
name, code = device.split('::')
status = 0
status_string = []
for i in range(8):
if (int(code) & (1 << i)):
meaning, c = BITS[i]
status = max(status, c)
status_string.append(meaning)
if status > 0:
status_string = ', '.join(status_string) + ' (%s)' % bin(int(code))
else:
status_string = 'OK'
total_status = max(total_status, status)
output.append('%s: %s' % (name, status_string))
if total_status == 0:
s = 'OK'
elif total_status == 1:
s = 'WARNING'
elif total_status == 2:
s = 'CRITICAL'
else:
s = 'UNKNOWN'
output = 'SMART STATUS %s - ' % s + ', '.join(output)
print output
return total_status
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--logfile', '-f',
default='/var/log/smart_status.log',
help='Path to SMART status log file')
args = parser.parse_args()
try:
status = main(args.logfile)
except Exception as e:
print 'SMART STATUS UNKNOWN - Python exception %s' % str(e)
status = UNKNOWN
sys.exit(status)
#!/usr/bin/env python
'''Record the status of hard disks using SMART data, writing to a log file.
Run this regularly with cron.
'''
import argparse
import subprocess
import sys
def check_smart(device):
args = ['/usr/sbin/smartctl', '--all', '-q', 'silent'] + device.split()
return subprocess.call(args)
def main(logfile, devices):
date_cmd = subprocess.Popen(['date', '+%s'], stdout=subprocess.PIPE)
now = date_cmd.communicate()[0].rstrip()
line = [now]
for device in devices:
code = check_smart(device)
# Prettify devices on RAID controllers/HBAs
if '-d' in device:
device = device.split()[1]
s = '%s::%i' % (device, code)
line.append(s)
line = ';'.join(line) + '\n'
with open(logfile, 'a') as f:
f.write(line)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--logfile', '-f',
default='/var/log/smart_status.log',
help='Path to SMART status log file')
parser.add_argument('--device', '-x', action='append',
default=[],
help='Add a device to monitor')
args = parser.parse_args()
if len(args.device) == 0:
sys.stderr.write('No devices specified.\n')
parser.print_help()
sys.exit(1)
main(args.logfile, args.device)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment