Created
May 28, 2020 14:27
-
-
Save dazzag24/ed8460bc5529ab92656fc85ab20de738 to your computer and use it in GitHub Desktop.
Maintain a pool of multiprocessing workers if any of them die or are killed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import signal | |
import multiprocessing as mp | |
def worker(id): | |
while True: | |
print("Inside worker {}".format(id)) | |
time.sleep(10) | |
def proc_stop(p_to_stop): | |
p_to_stop.terminate() | |
print("after Termination ") | |
numprocs = mp.cpu_count() | |
print("Found {} cpus".format(numprocs)) | |
try: | |
procs = list() | |
for ii in range(numprocs): | |
time.sleep(1) | |
proc = mp.Process(target=worker, name="worker", args=(ii,)) | |
proc.start() | |
procs.append(proc) | |
print("There are {} active children".format(len(mp.active_children()))) | |
for ii in range(numprocs): | |
if procs[ii].is_alive: | |
print("Child {} is alive".format(ii)) | |
else: | |
print("Child {} is dead".format(ii)) | |
input("Use htop to kill some processes and then press Enter to continue...") | |
print("There are {} active children".format(len(mp.active_children()))) | |
for ii in range(numprocs): | |
if procs[ii].is_alive(): | |
print("Child {} is alive".format(ii)) | |
else: | |
print("Child {} is dead".format(ii)) | |
print("Restart the killed workers to maintain a pool of 8 workers") | |
exitstr = "A" | |
while exitstr != "": | |
if len(mp.active_children()) < numprocs: | |
print("Some workers have died..... attempting to fix") | |
for ii in range(numprocs): | |
if not procs[ii].is_alive(): | |
signal_name = { | |
getattr(signal, _signame): _signame | |
for _signame in dir(signal) | |
if _signame.startswith('SIG') | |
}.get(abs(procs[ii].exitcode), 'Unknown') | |
print("Attempting to restart child {}, which exited with {} code. {}".format(ii, procs[ii].exitcode, signal_name)) | |
proc = mp.Process(target=worker, name="worker", args=(ii,)) | |
proc.start() | |
procs[ii] = proc | |
else: | |
print("All workers are alive") | |
time.sleep(10) | |
exitstr = input("Press enter to exit") | |
for ii in range(numprocs): | |
if procs[ii].is_alive(): | |
print("Child {} is alive".format(ii)) | |
else: | |
print("Child {} is dead".format(ii)) | |
for proc in procs: | |
proc_stop(proc) | |
for proc in procs: | |
proc.join() | |
except KeyboardInterrupt: | |
for proc in procs: | |
proc.terminate() | |
print('Keyboard Interrupted, exiting.') | |
""" | |
When run this test program produces output like so: | |
Found 8 cpus | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
Inside worker 3 | |
Inside worker 4 | |
Inside worker 5 | |
Inside worker 6 | |
There are 8 active children | |
Child 0 is alive | |
Child 1 is alive | |
Child 2 is alive | |
Child 3 is alive | |
Child 4 is alive | |
Child 5 is alive | |
Child 6 is alive | |
Child 7 is alive | |
Inside worker 7 | |
Use htop to kill some processes and then press Enter to continue...Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
Inside worker 3 | |
Inside worker 4 | |
Inside worker 5 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
Inside worker 3 | |
Inside worker 4 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
There are 4 active children | |
Child 0 is alive | |
Child 1 is alive | |
Child 2 is alive | |
Child 3 is dead | |
Child 4 is dead | |
Child 5 is alive | |
Child 6 is dead | |
Child 7 is dead | |
Restart the killed workers to maintain a pool of 8 workers | |
Some workers have died..... attempting to fix | |
Attempting to restart child 3, which exited with -6 code. SIGIOT | |
Attempting to restart child 4, which exited with -9 code. SIGKILL | |
Inside worker 3 | |
Attempting to restart child 6, which exited with -15 code. SIGTERM | |
Inside worker 4 | |
Attempting to restart child 7, which exited with -15 code. SIGTERM | |
Inside worker 6 | |
Inside worker 7 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
Inside worker 3 | |
Inside worker 4 | |
Inside worker 6 | |
Press enter to exit | |
Inside worker 7 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 3 | |
Inside worker 4 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 2 | |
Inside worker 3 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 3 | |
g | |
Some workers have died..... attempting to fix | |
Attempting to restart child 2, which exited with -9 code. SIGKILL | |
Attempting to restart child 4, which exited with -15 code. SIGTERM | |
Inside worker 2 | |
Attempting to restart child 5, which exited with -15 code. SIGTERM | |
Inside worker 4 | |
Attempting to restart child 6, which exited with -16 code. Unknown | |
Inside worker 5 | |
Attempting to restart child 7, which exited with -15 code. SIGTERM | |
Inside worker 6 | |
Inside worker 7 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 3 | |
Inside worker 2 | |
Inside worker 4 | |
Inside worker 5 | |
Inside worker 6 | |
Press enter to exit | |
Inside worker 7 | |
Inside worker 0 | |
Inside worker 1 | |
Inside worker 3 | |
Inside worker 2 | |
Inside worker 4 | |
Inside worker 5 | |
Inside worker 6 | |
Inside worker 7 | |
Inside worker 0 | |
Child 0 is alive | |
Child 1 is alive | |
Child 2 is alive | |
Child 3 is alive | |
Child 4 is alive | |
Child 5 is alive | |
Child 6 is alive | |
Child 7 is alive | |
after Termination | |
after Termination | |
after Termination | |
after Termination | |
after Termination | |
after Termination | |
after Termination | |
after Termination | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment