Last active
January 7, 2018 21:49
-
-
Save simonrad/6102135 to your computer and use it in GitHub Desktop.
A cron script to monitor and restart a set of processes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| This script monitors a set of processes, (re)starting them if they are not already running. | |
| This script is meant to be called periodically as a cron job. | |
| Example crontab entry: | |
| * * * * * $HOME/bin/python $HOME/misc/monit_cron.py 1>>$HOME/logs/cron/monit/monit_out.log 2>>$HOME/logs/cron/monit/monit_err.log | |
| 0 * * * * mv $HOME/logs/cron/monit/monit_out.log $HOME/logs/cron/monit/monit_out.log.1 | |
| 0 * * * * mv $HOME/logs/cron/monit/monit_err.log $HOME/logs/cron/monit/monit_err.log.1 | |
| """ | |
| import argparse | |
| import getpass | |
| import logging | |
| import os | |
| import psutil | |
| import signal | |
| import traceback | |
| class ExtraProcData(object): | |
| def __init__(self, keep_running=True): | |
| self.keep_running = keep_running | |
| def get_extra_proc_data(proc_entry): | |
| return extra_proc_data.get(proc_entry, ExtraProcData()) | |
| # -------------------------------------------------- | |
| # A list of processes that should be kept running. | |
| # Each entry is a tuple: (working_dir, (path_to_executable, arg1, arg2, ...)) | |
| # Make sure that `working_dir` does not have a trailing slash. | |
| # Make sure that you explicitly specify the running program, e.g. bash or python. | |
| monitored_procs = [ | |
| # ('/home/simonrad/tmp', ('/home/simonrad/bin/python', '/home/simonrad/tmp/test.py')), | |
| ] | |
| # A map from proc entry (same format as monitored_procs) to ExtraProcData object. | |
| extra_proc_data = { | |
| # monitored_procs[0]: ExtraProcData(True), | |
| } | |
| # -------------------------------------------------- | |
| # Set up logging. | |
| logging.basicConfig(level = logging.INFO, | |
| format = '%(asctime)s %(levelname)-8s %(message)s', | |
| datefmt = '%m/%d %H:%M:%S %Z', | |
| ) | |
| # -------------------------------------------------- | |
| # Parse command-line arguments. | |
| cmdline_parser = argparse.ArgumentParser() | |
| cmdline_parser.add_argument('-k', '--kill', action='append', default=[], help='Kill any process(es) which match the i\'th entry in monitored_procs.', metavar='i') | |
| cmdline_parser.add_argument('--kill-signal', action='store', type=int, default=signal.SIGTERM, help='Send this signal when killing processes.', metavar='signal_number') | |
| cmdline_parser.add_argument('-l', '--list', action='store_true', help='List the running PIDs.') | |
| cmdline_parser.add_argument('-d', '--dont-start', action='store_true', help='Supress starting of processes that aren\'t running.') | |
| cmdline_args = cmdline_parser.parse_args() | |
| indices_to_kill = [int(i) for i in cmdline_args.kill] | |
| procs_to_kill = set(entry for (index, entry) in enumerate(monitored_procs) if index in indices_to_kill) | |
| do_start_procs = (not cmdline_args.dont_start) and (not cmdline_args.kill) # If 'kill' options are specified, don't start any processes. | |
| # -------------------------------------------------- | |
| running_procs = {} # Map from proc entry (same format as monitored_procs) to non-empty list of psutil.Process objects. | |
| # Find all running processes. (Only those run by the current user.) | |
| for p in psutil.process_iter(): | |
| try: | |
| if p.username() == getpass.getuser(): | |
| entry = (p.cwd(), tuple(p.cmdline())) | |
| running_procs.setdefault(entry, []).append(p) | |
| except Exception as e: | |
| if type(e) is not psutil.AccessDenied: | |
| logging.error('Exception while scanning running processes: ' + repr(e), exc_info=True) | |
| # Print the running processes. | |
| if cmdline_args.list: | |
| for (index, entry) in enumerate(monitored_procs): | |
| procs = running_procs.get(entry, []) | |
| pids = [p.pid for p in procs] | |
| logging.info('Pids for process #%d: %r' % (index, pids)) | |
| # Print debug information about processes that have multiple instances running. | |
| for entry in monitored_procs: | |
| if entry in running_procs and len(running_procs[entry]) > 1: | |
| logging.info('Note: The following process has multiple instances running: ' + repr(entry)) | |
| # Kill any processes that were requested to be killed. | |
| for entry in procs_to_kill: | |
| if entry in running_procs: | |
| for p in running_procs[entry]: | |
| logging.warning('Killing process with pid %d: %r' % (p.pid, entry)) | |
| try: | |
| p.send_signal(cmdline_args.kill_signal) | |
| except Exception as e: | |
| logging.error('Exception while trying to kill process: ' + repr(e), exc_info=True) | |
| if do_start_procs: | |
| # Find the processes which need to be started. | |
| procs_to_start = set(monitored_procs) - set(running_procs.keys()) | |
| procs_to_start = set(entry for entry in procs_to_start if get_extra_proc_data(entry).keep_running) | |
| # Print debug information. | |
| if procs_to_start: | |
| logging.warning('The following processes were not running and will be started:') | |
| for entry in procs_to_start: | |
| logging.warning(' ' + repr(entry)) | |
| else: | |
| logging.info('All processes are already running.') | |
| original_cwd = os.getcwd() # The current working directory. | |
| # Start the processes. | |
| for (working_dir, args_tuple) in procs_to_start: | |
| os.chdir(working_dir) | |
| os.spawnv(os.P_NOWAIT, args_tuple[0], args_tuple) | |
| logging.info('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment