Created
May 16, 2025 14:52
-
-
Save manning-ncsa/c92c7ab3cf02bae31b75331eeafc8d69 to your computer and use it in GitHub Desktop.
Blast: prune duplicate registered tasks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from host.models import Transient | |
from host.models import TaskRegister | |
import yaml | |
def create_report(transient, tasks): | |
report = { | |
'name': transient.name, | |
'tasks': [{ | |
'name': task.task.name, | |
'last_modified': task.last_modified, | |
'id': task.id, | |
'transient_id': task.transient_id, | |
'status': task.status.message, | |
} for task in tasks], | |
} | |
# print(yaml.dump(report)) | |
return report | |
transients_processing = Transient.objects.filter(processing_status="processing") | |
for transient in transients_processing: | |
print(f'''Analyzing transient "{transient.name}"...''', end='') | |
# print('''Identifying and deleting duplicates...''', end='') | |
tasks = TaskRegister.objects.filter(transient__name=transient.name) | |
report_before = create_report(transient, tasks) | |
# Identify duplicate TaskRegister objects | |
task_names = [task['name'] for task in report_before['tasks']] | |
duplicate_task_names = [task_name for task_name in set(task_names) if task_names.count(task_name) > 1] | |
for duplicate_task_name in duplicate_task_names: | |
duplicate_tasks = [task for task in report_before['tasks'] if task['name'] == duplicate_task_name] | |
print(f'''Duplicate TaskRegister objects ({len(duplicate_tasks)}): {duplicate_tasks}''') | |
incomplete_tasks = [task for task in duplicate_tasks if task['status'] != "processed"] | |
print(f''' Incomplete TaskRegister objects in duplicate set ({len(incomplete_tasks)}): {incomplete_tasks}''') | |
# If there are is at least one duplicate tasks with status "processed", delete the others | |
if len(incomplete_tasks) < len(duplicate_tasks): | |
for incomplete_task in incomplete_tasks: | |
incomplete_task_id = incomplete_task['id'] | |
print(f''' [dry-run] Deleting duplicate TaskRegister object with id = {incomplete_task_id}...''') | |
# print(f''' Deleting duplicate TaskRegister object with id = {incomplete_task_id}...''') | |
# task_reg = TaskRegister.objects.get(id=incomplete_task_id) | |
# task_reg.delete() | |
if duplicate_task_names: | |
print(' done. Duplicate registered tasks detected.') | |
print('''Before deduplication''') | |
report_before = create_report(transient, tasks) | |
print(yaml.dump(report_before)) | |
print('''After deduplication''') | |
tasks = TaskRegister.objects.filter(transient__name=transient.name) | |
report_after = create_report(transient, tasks) | |
print(yaml.dump(report_after)) | |
else: | |
print(' done. No duplicate registered tasks.') | |
# from host.base_tasks import get_processing_status | |
# print('''Updating processing status...''') | |
# transient.processing_status = get_processing_status(transient) | |
# transient.save() | |
# print(f'''"{transient.name}": {transient.processing_status}''') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment