Created
November 20, 2021 18:00
-
-
Save aaugustin/a1d9a5c9cf67a7c34adec2221f5fdb42 to your computer and use it in GitHub Desktop.
Locate and remove unreferenced media files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Locate and remove unreferenced media files. | |
Expects DJANGO_SETTINGS_MODULE to be set in the environment. | |
""" | |
import collections | |
import itertools | |
import os.path | |
import django | |
from django.apps import apps | |
from django.db import models | |
from django.db.migrations.serializer import DeconstructableSerializer | |
django.setup() # noqa | |
def storage_key(field): | |
return DeconstructableSerializer(field.storage).serialize()[0] | |
def enumerate_database(field): | |
objects = field.model._base_manager | |
if field.blank: | |
objects = objects.exclude(**{field.name: ''}) | |
return objects.values_list(field.name, flat=True) | |
def enumerate_storage(storage, root=''): | |
directories, files = storage.listdir(root) | |
print('directories ' + root) | |
print(directories) | |
print('files' + root) | |
print(files) | |
print() | |
for directory in directories: | |
yield from enumerate_storage(storage, os.path.join(root, directory)) | |
for file in files: | |
yield os.path.join(root, file) | |
file_fields = [ | |
field | |
for model in apps.get_models() | |
for field in model._meta.get_fields() | |
if isinstance(field, models.FileField) | |
] | |
for storage_repr, fields in itertools.groupby(file_fields, storage_key): | |
fields = list(fields) | |
storage = fields[0].storage | |
database_files = collections.Counter() | |
for field in fields: | |
database_files.update(enumerate_database(field)) | |
storage_files = list(enumerate_storage(storage)) | |
print("Multiple references") | |
print("-------------------") | |
print() | |
for filepath, count in database_files.most_common(): | |
if count < 2: | |
break | |
print(count, filepath) | |
print() | |
print("Missing files") | |
print("-------------") | |
print() | |
for filepath in set(database_files) - set(storage_files): | |
print(filepath) | |
print() | |
print("Unreferenced files") | |
print("------------------") | |
print() | |
for filepath in set(storage_files) - set(database_files): | |
print(filepath) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment