Created
September 23, 2016 14:23
-
-
Save maxfischer2781/5c3bb079fb730e7242267cdb326866ce to your computer and use it in GitHub Desktop.
Hook for HTCondor job_router that removes its job from a route
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function, with_statement | |
import sys | |
import ast | |
import subprocess | |
import time | |
route_test_performed, want_route_test = False, False | |
# read input so router does not stall | |
# we only need ClusterId and ProcId, but the others are useful for | |
# debugging and should be defined | |
for line in sys.stdin: | |
if not line.strip(): | |
continue | |
key, value = (elem.strip() for elem in line.split('=', 1)) | |
if key.lower() == "routedfromjobid": | |
real_job_id = ast.literal_eval(value) # "78249.0" | |
elif key.lower() == "routename": | |
route_name = ast.literal_eval(value) # "MyRoute" | |
elif key.lower() == "routetestperformed": | |
route_test_performed = True | |
elif key.lower() == "clusterid": | |
cluster_id = ast.literal_eval(value) # 78251 | |
elif key.lower() == "procid": | |
proc_id = ast.literal_eval(value) # 3 | |
elif key.lower() == "wantroutetest": | |
want_route_test = value | |
if not want_route_test: | |
sys.exit(0) | |
try: | |
assert cluster_id and proc_id and real_job_id and route_name | |
except NameError as err: | |
print(err, file=sys.stderr) | |
sys.exit(0) | |
job_id = '%d.%d' % (cluster_id, proc_id) | |
assert job_id != real_job_id, 'Somebody set us up the inplace transformed job! Abandon hooks!' | |
if route_test_performed: | |
with open('/tmp/htc_exit_hook.log', 'a') as log_file: | |
log_file.write('[%s] job %s => %s survived previous route test\n' % (time.asctime(), job_id, real_job_id)) | |
with open('/tmp/htc_exit_hook.log', 'a') as log_file: | |
log_file.write('[%s] job %s => %s about to be removed\n' % (time.asctime(), job_id, real_job_id)) | |
# mark us as routed already | |
print('RouteRestPerformed = True') | |
# remove the ROUTED job | |
condor_rm = subprocess.Popen(('condor_rm', str(job_id)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stderr, stdout = condor_rm.communicate() | |
if condor_rm.poll() == 0: | |
with open('/tmp/htc_exit_hook.log', 'a') as log_file: | |
log_file.write('[%s] job %s => %s marked for removal\n' % (time.asctime(), job_id, real_job_id)) | |
else: | |
with open('/tmp/htc_exit_hook.log', 'a') as log_file: | |
log_file.write('[%s] job %s => %s removal failed: %s\n' % (time.asctime(), job_id, real_job_id, condor_rm.poll())) | |
print('rmexit: ', condor_rm.poll(), file=sys.stderr) | |
print('stdout:\n', stdout, file=sys.stderr) | |
print('stderr:\n', stderr, file=sys.stderr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment