Last active
December 30, 2020 15:36
-
-
Save nemesifier/deb9bd7dc91be1ea29bcbb5f7f6ebf75 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from openwisp2.celery import app | |
import os | |
import sys | |
import django | |
import time | |
from django.conf import settings | |
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'openwisp2.settings') | |
django.setup() | |
EMAIL_RECIPIENT = '<your_email_here>' | |
def get_celery_worker_status(): | |
i = app.control.inspect() | |
availability = i.ping() | |
active_tasks = i.active() | |
result = { | |
'availability': availability, | |
'active_tasks': active_tasks, | |
} | |
return result | |
def kill_and_alert(reason): | |
host = settings.ALLOWED_HOSTS[0] | |
sender = settings.DEFAULT_FROM_EMAIL | |
print(reason) | |
os.system( | |
f'echo "Reason: {reason}." | mail -a "FROM:{sender}" ' | |
f'-s "[{host}] Celery watchdog triggered" {EMAIL_RECIPIENT}' | |
) | |
time.sleep(0.5) | |
os.system("ps auxww | grep 'celery' | awk '{print $2}' | xargs kill -9") | |
time.sleep(2) | |
# ensure both supervisorctl processes celery and celerybeat are running | |
os.system( | |
'test "$(supervisorctl status | grep celery | grep RUNNING -c)" == "2" ' | |
' || supervisorctl restart celery celerybeat' | |
) | |
def get_total_active_tasks(result): | |
total_active_tasks = 0 | |
if not result['active_tasks']: | |
return total_active_tasks | |
for worker, active_tasks in result['active_tasks'].items(): | |
total_active_tasks += len(active_tasks) | |
return total_active_tasks | |
result = get_celery_worker_status() | |
# worker not responding to pings | |
if result['availability'] is None: | |
kill_and_alert('Worker not responding to ping') | |
sys.exit(1) | |
# number of active tasks may be too high | |
if get_total_active_tasks(result) >= 28: | |
# double check | |
time.sleep(20) | |
# avoid false positives if worker is simply busy | |
if get_total_active_tasks(get_celery_worker_status()) < 28: | |
sys.exit(0) | |
# we have a problem | |
kill_and_alert('Too many active tasks, worker is stale') | |
sys.exit(2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment