Last active
January 30, 2018 15:38
-
-
Save stralex7/69264fead1fb387ff3873206a3a6ec3d to your computer and use it in GitHub Desktop.
monitor and restart ethminer due to cuda errors/no responses
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import os | |
import signal | |
import subprocess | |
import sys | |
import time | |
try: | |
TIMEOUT_NO_ACTIVITY_SECONDS = int(os.getenv('TIMEOUT_NO_ACTIVITY_SECONDS', 60)) | |
except: | |
TIMEOUT_NO_ACTIVITY_SECONDS = 60 | |
class MinerException(Exception): | |
pass | |
class TimeoutException(Exception): | |
pass | |
def timeout_handler(signum, frame): | |
raise TimeoutException("No activity from ethminer for {} seconds".format(TIMEOUT_NO_ACTIVITY_SECONDS)) | |
def execute(cmd): | |
signal.signal(signal.SIGALRM, timeout_handler) | |
shutdown = False | |
last_job_time=time.time() | |
while not shutdown: | |
proc = subprocess.Popen(cmd, | |
bufsize=0, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT, | |
universal_newlines=True) | |
try: | |
signal.alarm(TIMEOUT_NO_ACTIVITY_SECONDS) | |
for line in iter(proc.stdout.readline, ""): | |
line = line.strip() | |
job_time_delta=time.time()-last_job_time | |
print(line) | |
#print("Last job was %f ms ago" % job_time_delta) | |
if job_time_delta>120: | |
last_job_time=time.time() | |
raise MinerException('****** Restarting due to not getting new jobs') | |
if (line.find('Received new job'))>=0: | |
print("Time since last job: %f ms" % job_time_delta) | |
last_job_time=time.time() | |
if line.startswith('Could not resolve host'): | |
raise MinerException('****** Restarting due to DNS error') | |
if line.startswith('CUDA error'): | |
raise MinerException('****** Restarting due to CUDA error') | |
signal.alarm(TIMEOUT_NO_ACTIVITY_SECONDS) | |
except (MinerException, TimeoutException) as e: | |
print('\n\n', str(e), '\n\n') | |
except KeyboardInterrupt: | |
shutdown = True | |
signal.alarm(0) | |
#proc.send_signal(signal.SIGINT) | |
proc.kill() | |
proc.stdout.close() | |
try: | |
proc.wait(timeout=15) | |
except subprocess.TimeoutExpired: | |
print("Miner didn't shutdown within 15 seconds") | |
proc.kill() | |
if __name__ == "__main__": | |
execute(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have a problem with etherminer where it keeps running without an error, but having no connection to ethermine.org. I'm trying to restart a process (actually kill it) when I see no new jobs from the pool for more than 120 seconds. I'm running this on a single 1070 card rig, so in theory, you might want to reduce this even further to 60 seconds.