Last active
March 3, 2017 17:46
-
-
Save basnijholt/824ee1cbf5f92f0469e30fdc7202fbb9 to your computer and use it in GitHub Desktop.
python cluster_watcher.py --interval=3 --logging=debug --log_file_prefix=watcher.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import socket | |
from tornado import ioloop, options | |
from tornado.log import app_log | |
from ipyparallel import Client | |
start_time = datetime.utcnow() | |
class EngineCuller: | |
"""An object for culling idle IPython parallel engines.""" | |
def __init__(self, client, interval): | |
"""Initialize culler, with current time.""" | |
self.client = client | |
self.interval = interval | |
self.last_active = set(client.ids) | |
self.hostnames = {eid: client[eid].apply_sync(socket.gethostname) | |
for eid in self.client.ids} | |
app_log.debug('Starting with {} engines at {}.'.format( | |
len(client.ids), start_time)) | |
def update_state(self): | |
down = self.last_active - set(self.client.ids) | |
if len(down) > 0: | |
running_time = (datetime.utcnow() - start_time).total_seconds() | |
for eid in down: | |
debug_str = 'Engine {} crashed after {} s on {}.' | |
app_log.debug(debug_str.format(eid, running_time, | |
self.hostnames[eid])) | |
new_engines = set(self.client.ids) - self.last_active | |
for eid in new_engines: | |
app_log.debug('New engine {} started.'.format(eid)) | |
self.hostnames[eid] = self.client[eid].apply_sync(socket.gethostname) | |
self.last_active = set(self.client.ids) | |
def main(): | |
options.define('interval', default=60, | |
help="""Interval (in seconds) at which the the check is | |
performed.""") | |
options.define('profile', default='pbs', | |
help="""Profile name.""") | |
options.parse_command_line() | |
loop = ioloop.IOLoop.current() | |
culler = EngineCuller(Client(profile=options.options.profile), | |
options.options.interval) | |
ioloop.PeriodicCallback( | |
culler.update_state, options.options.interval * 1000).start() | |
loop.start() | |
if __name__ == '__main__': | |
print('Running') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment