Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save manning-ncsa/c92c7ab3cf02bae31b75331eeafc8d69 to your computer and use it in GitHub Desktop.
Save manning-ncsa/c92c7ab3cf02bae31b75331eeafc8d69 to your computer and use it in GitHub Desktop.
Blast: prune duplicate registered tasks
from host.models import Transient
from host.models import TaskRegister
import yaml
def create_report(transient, tasks):
report = {
'name': transient.name,
'tasks': [{
'name': task.task.name,
'last_modified': task.last_modified,
'id': task.id,
'transient_id': task.transient_id,
'status': task.status.message,
} for task in tasks],
}
# print(yaml.dump(report))
return report
transients_processing = Transient.objects.filter(processing_status="processing")
for transient in transients_processing:
print(f'''Analyzing transient "{transient.name}"...''', end='')
# print('''Identifying and deleting duplicates...''', end='')
tasks = TaskRegister.objects.filter(transient__name=transient.name)
report_before = create_report(transient, tasks)
# Identify duplicate TaskRegister objects
task_names = [task['name'] for task in report_before['tasks']]
duplicate_task_names = [task_name for task_name in set(task_names) if task_names.count(task_name) > 1]
for duplicate_task_name in duplicate_task_names:
duplicate_tasks = [task for task in report_before['tasks'] if task['name'] == duplicate_task_name]
print(f'''Duplicate TaskRegister objects ({len(duplicate_tasks)}): {duplicate_tasks}''')
incomplete_tasks = [task for task in duplicate_tasks if task['status'] != "processed"]
print(f''' Incomplete TaskRegister objects in duplicate set ({len(incomplete_tasks)}): {incomplete_tasks}''')
# If there are is at least one duplicate tasks with status "processed", delete the others
if len(incomplete_tasks) < len(duplicate_tasks):
for incomplete_task in incomplete_tasks:
incomplete_task_id = incomplete_task['id']
print(f''' [dry-run] Deleting duplicate TaskRegister object with id = {incomplete_task_id}...''')
# print(f''' Deleting duplicate TaskRegister object with id = {incomplete_task_id}...''')
# task_reg = TaskRegister.objects.get(id=incomplete_task_id)
# task_reg.delete()
if duplicate_task_names:
print(' done. Duplicate registered tasks detected.')
print('''Before deduplication''')
report_before = create_report(transient, tasks)
print(yaml.dump(report_before))
print('''After deduplication''')
tasks = TaskRegister.objects.filter(transient__name=transient.name)
report_after = create_report(transient, tasks)
print(yaml.dump(report_after))
else:
print(' done. No duplicate registered tasks.')
# from host.base_tasks import get_processing_status
# print('''Updating processing status...''')
# transient.processing_status = get_processing_status(transient)
# transient.save()
# print(f'''"{transient.name}": {transient.processing_status}''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment