Skip to content

Instantly share code, notes, and snippets.

@simonrad
Last active January 7, 2018 21:49
Show Gist options
  • Select an option

  • Save simonrad/6102135 to your computer and use it in GitHub Desktop.

Select an option

Save simonrad/6102135 to your computer and use it in GitHub Desktop.
A cron script to monitor and restart a set of processes.
#!/usr/bin/env python
"""
This script monitors a set of processes, (re)starting them if they are not already running.
This script is meant to be called periodically as a cron job.
Example crontab entry:
* * * * * $HOME/bin/python $HOME/misc/monit_cron.py 1>>$HOME/logs/cron/monit/monit_out.log 2>>$HOME/logs/cron/monit/monit_err.log
0 * * * * mv $HOME/logs/cron/monit/monit_out.log $HOME/logs/cron/monit/monit_out.log.1
0 * * * * mv $HOME/logs/cron/monit/monit_err.log $HOME/logs/cron/monit/monit_err.log.1
"""
import argparse
import getpass
import logging
import os
import psutil
import signal
import traceback
class ExtraProcData(object):
def __init__(self, keep_running=True):
self.keep_running = keep_running
def get_extra_proc_data(proc_entry):
return extra_proc_data.get(proc_entry, ExtraProcData())
# --------------------------------------------------
# A list of processes that should be kept running.
# Each entry is a tuple: (working_dir, (path_to_executable, arg1, arg2, ...))
# Make sure that `working_dir` does not have a trailing slash.
# Make sure that you explicitly specify the running program, e.g. bash or python.
monitored_procs = [
# ('/home/simonrad/tmp', ('/home/simonrad/bin/python', '/home/simonrad/tmp/test.py')),
]
# A map from proc entry (same format as monitored_procs) to ExtraProcData object.
extra_proc_data = {
# monitored_procs[0]: ExtraProcData(True),
}
# --------------------------------------------------
# Set up logging.
logging.basicConfig(level = logging.INFO,
format = '%(asctime)s %(levelname)-8s %(message)s',
datefmt = '%m/%d %H:%M:%S %Z',
)
# --------------------------------------------------
# Parse command-line arguments.
cmdline_parser = argparse.ArgumentParser()
cmdline_parser.add_argument('-k', '--kill', action='append', default=[], help='Kill any process(es) which match the i\'th entry in monitored_procs.', metavar='i')
cmdline_parser.add_argument('--kill-signal', action='store', type=int, default=signal.SIGTERM, help='Send this signal when killing processes.', metavar='signal_number')
cmdline_parser.add_argument('-l', '--list', action='store_true', help='List the running PIDs.')
cmdline_parser.add_argument('-d', '--dont-start', action='store_true', help='Supress starting of processes that aren\'t running.')
cmdline_args = cmdline_parser.parse_args()
indices_to_kill = [int(i) for i in cmdline_args.kill]
procs_to_kill = set(entry for (index, entry) in enumerate(monitored_procs) if index in indices_to_kill)
do_start_procs = (not cmdline_args.dont_start) and (not cmdline_args.kill) # If 'kill' options are specified, don't start any processes.
# --------------------------------------------------
running_procs = {} # Map from proc entry (same format as monitored_procs) to non-empty list of psutil.Process objects.
# Find all running processes. (Only those run by the current user.)
for p in psutil.process_iter():
try:
if p.username() == getpass.getuser():
entry = (p.cwd(), tuple(p.cmdline()))
running_procs.setdefault(entry, []).append(p)
except Exception as e:
if type(e) is not psutil.AccessDenied:
logging.error('Exception while scanning running processes: ' + repr(e), exc_info=True)
# Print the running processes.
if cmdline_args.list:
for (index, entry) in enumerate(monitored_procs):
procs = running_procs.get(entry, [])
pids = [p.pid for p in procs]
logging.info('Pids for process #%d: %r' % (index, pids))
# Print debug information about processes that have multiple instances running.
for entry in monitored_procs:
if entry in running_procs and len(running_procs[entry]) > 1:
logging.info('Note: The following process has multiple instances running: ' + repr(entry))
# Kill any processes that were requested to be killed.
for entry in procs_to_kill:
if entry in running_procs:
for p in running_procs[entry]:
logging.warning('Killing process with pid %d: %r' % (p.pid, entry))
try:
p.send_signal(cmdline_args.kill_signal)
except Exception as e:
logging.error('Exception while trying to kill process: ' + repr(e), exc_info=True)
if do_start_procs:
# Find the processes which need to be started.
procs_to_start = set(monitored_procs) - set(running_procs.keys())
procs_to_start = set(entry for entry in procs_to_start if get_extra_proc_data(entry).keep_running)
# Print debug information.
if procs_to_start:
logging.warning('The following processes were not running and will be started:')
for entry in procs_to_start:
logging.warning(' ' + repr(entry))
else:
logging.info('All processes are already running.')
original_cwd = os.getcwd() # The current working directory.
# Start the processes.
for (working_dir, args_tuple) in procs_to_start:
os.chdir(working_dir)
os.spawnv(os.P_NOWAIT, args_tuple[0], args_tuple)
logging.info('Done.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment