Skip to content

Instantly share code, notes, and snippets.

@ryancurrah
Last active September 8, 2015 22:18
Show Gist options
  • Save ryancurrah/9c1984c15a8674962343 to your computer and use it in GitHub Desktop.
Save ryancurrah/9c1984c15a8674962343 to your computer and use it in GitHub Desktop.
Check Salt Minion Keys Threshold
from os import listdir
from os.path import isfile
from argparse import ArgumentParser
from common.sensu import Sensu
from common.proc import Proc
sensu = Sensu()
def main():
"""
Checks locally if the salt-master has hit the
threshold for maximum number of minion keys.
The number of accepted minion keys should
be lower than 1/4 of the max open files soft
setting. - http://git.io/vZTkt
"""
parser = ArgumentParser()
parser.add_argument('--pki-dir', default='/etc/salt/pki/master/minions')
parser.add_argument('-w', default=70, type=int,
choices=range(0, 101), help='Warning Percent')
parser.add_argument('-c', default=90, type=int,
choices=range(0, 101), help='Critical Percent')
args = parser.parse_args()
#
# Get number of minion keys
minions = _count_minions(args.pki_dir)
#
# Get number of max open files for salt-master process
max_open_files = _get_limit()
#
# Calculate threshold percent based on saltstacks recommendation
threshold_percent = _threshold_percent(minions, max_open_files)
#
# Output check state
if threshold_percent >= args.c:
return sensu.output_check(sensu.STATE_CRITICAL,
message='Minion key threshold CRITICAL '
'({0}%)'.format(threshold_percent))
elif threshold_percent >= args.w:
return sensu.output_check(sensu.STATE_WARNING,
message='Minion key threshold WARNING '
'({0}%)'.format(threshold_percent))
else:
return sensu.output_check(sensu.STATE_OK,
message='Minion key threshold OK '
'({0}%)'.format(threshold_percent))
def _count_minions(pdir):
"""
Count the number of minion keys
:pdir: Master minion pki dir location
:returns: int count of minion keys
"""
return len([n for n in listdir(pdir) if isfile('{0}/{1}'.format(pdir, n))])
def _get_limit(name='salt-master',
limit_type='soft',
limit_name='max_open_files'):
"""
Get the requested limit for a process
:name: name of the salt master process optional
:limit_type: Limit type either 'soft' or 'hard' optional
:limit_name: Limit name optional
:returns: int number of max open files limit
"""
p = Proc(name=name)
return p.limits[limit_type][limit_name]
def _threshold_percent(minions, max_open_files):
"""
Using salts recommended formula calc minion key
threshold as a percent
:minions: int count of minion keys
:max_open_files: int soft limit of max open files
for salt-master process
:returns: threshold of max minion keys as a percent
"""
return minions / (max_open_files * 0.25) * 100
if __name__ == '__main__':
main()
import subprocess
from collections import OrderedDict
class Proc():
"""
Instantiates a Proc class that collects information
about the given process ID or name
WARNING: TESTED ONLY ON RHEL 6
"""
def __init__(self, pid='', name=''):
if not pid and not name:
raise ProcessIDAndNameNotDefined('Provide either a process '
'"pid" or "name"')
self.name = name if name else self.get_name(pid)
self.pid = pid if pid else self.get_pid(name)
self.limits = self.get_limits(self.pid)
return
def __str__(self):
"""
Class string name of process id and name
"""
return '<Proc Object(pid: {0}, name: {1})>'.format(self.pid, self.name)
def _subprocess(self, cmd, shell=False):
"""
A shortcut method for the subprocess class
:cmd: command to execute via subprocess
:shell: execute via shell
:returns: tuple of stdout and error
"""
process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=shell)
return process.communicate()
def get_name(self, pid):
"""
Gets the process name by ID
:pid: pid of the process
:returns: name of the process id
"""
cmd = ['cat', '/proc/{0}/comm'.format(pid)]
name, err = self._subprocess(cmd)
if not name:
raise ProcessIDDoesNotExist(pid)
return name.strip()
def get_pid(self, name):
"""
Gets parent process ID by name
:name: name of the process
:returns: parent process id of the process name
"""
cmd = ['ps', 'hf', '-opid', '-C', name]
pid, err = self._subprocess(cmd)
try:
pid = int(pid.split('\n')[0])
except (IndexError, ValueError):
raise ProcessNameDoesNotExist(name)
return pid
def get_limits(self, pid):
"""
Gets the soft and hard limits for a given process id
:pid: process id
:returns: dict with keys 'soft' and 'hard' limit for given
process ID with OrdererdDict of limit names and
values
"""
def limits(pid, limit):
"""
:pid: Process ID
:limit: Limit type of 'soft' or 'hard'
:returns: dict of limit keys and values
"""
template = OrderedDict([
('max_cpu_time', ''),
('max_file_size', ''),
('max_data_size', ''),
('max_stack_size', ''),
('max_core_file_size', ''),
('max_resident_set', ''),
('max_processes', ''),
('max_open_files', ''),
('max_locked_memory', ''),
('max_address_space', ''),
('max_file_locks', ''),
('max_pending_signals', ''),
('max_msgqueue_size', ''),
('max_nice_priority', ''),
('max_realtime_priority', ''),
('max_realtime_timeout', '')
])
limit = '\'{print $1}\'' if limit == 'soft' else '\'{print $2}\''
cmd = ['cut -c 27- /proc/{0}/limits | awk {1}'.format(pid, limit)]
limits, err = self._subprocess(cmd, shell=True)
if err:
raise GetProcessLimitsFailed()
else:
limits = limits.split('\n')[1:]
for n, limit in enumerate(template):
try:
template[limit] = int(limits[n])
except ValueError:
template[limit] = limits[n]
return template
return {'soft': limits(pid, 'soft'), 'hard': limits(pid, 'hard')}
class ProcessIDAndNameNotDefined(Exception):
pass
class ProcessIDDoesNotExist(Exception):
pass
class ProcessNameDoesNotExist(Exception):
pass
class GetProcessLimitsFailed(Exception):
pass
import socket
import time
import warnings
from sys import exit
warnings.filterwarnings("ignore", category=DeprecationWarning)
class Sensu(object):
"""
Sensu object for creating checks and metrics
"""
STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3
VALID_STATES = range(0,4)
def __init__(self, scheme=None):
"""
:scheme: base scheme of the metric as an str
"""
if scheme:
self.scheme = scheme.rstrip('.')
return
def output_metric(self, name, value):
"""
Output metric to stdout
The metric name will be appended to the base scheme
:name: name of the metric as an str
:value: value of the metric as an int
:returns: prints metric to stdout
"""
print '{0}.{1}\t{2}\t{3}'.format(self.scheme, name, value, int(time.time()))
return
def output_check(self, state, message=''):
"""
Output check result to stdout
:state: one of the valid sensu states as an int
:message: message to show to standard out as an str
:returns: exit code and message to stdout
"""
if not self._valid_state(state):
raise BadSensuCheckState("Please enter a valid Sensu check state.")
print '{0}'.format(message)
exit(state)
return
def _valid_state(self, state):
"""
Validates the sensu check state
:state: the state as an int
"""
return True if state in self.VALID_STATES else False
class BadSensuCheckState(Exception):
pass
@ryancurrah
Copy link
Author

This checks if the salt-master has hit it's upper limit on number of salt-minion keys that should be stored. As per salts documentation 'The number of accepted minion keys should be lower than 1/4 of the max open files soft setting.'. So this check should be run on the salt-master and it will compare the minion keys to the current max open files limit for the salt-master process. If it is above the threshold percent for warning or critical it will alert.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment