Created
July 16, 2022 14:03
-
-
Save giuseppe998e/9671be639311a5251e493064dada707e to your computer and use it in GitHub Desktop.
A Python3 script that asynchronously renames files (media) in a directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright 2022 Giuseppe Eletto <[email protected]> | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
# REQUIRES: pip install aiofiles exif | |
from asyncio import run as asyncrun | |
from hashlib import md5 | |
from aiofiles import os as asyncos | |
from aiofiles import open as asyncopen | |
from exif import Image | |
from pathlib import Path | |
from random import randint | |
import re | |
# Settings | |
MEDIA_DIR = "." | |
# Constants | |
REX_STR = r"(((?:1[0-9]|20)\d{2})[-_.:]?([012][0-9]|3[01])[-_.:]?([012][0-9]|3[01]))(?:[-_\s\w]+?(([01][0-9]|2[0-3])[-_.:]?([0-5][0-9])[-_.:]?([0-5][0-9])))?" | |
# Functions | |
def get_regex_filename(string): | |
if match := re.search(REX_STR, string): | |
if match.group(1) is not None: | |
year, month, day = match.group(2, 3, 4) | |
if match.group(5) is not None: | |
hour, minute, second = match.group(6, 7, 8) | |
else: | |
hour, minute, second = ("00", randint(0, 59), randint(0, 59)) | |
minute, second = ("{:02d}".format(minute), "{:02d}".format(second)) | |
return f"media_{year}{month}{day}_{hour}{minute}{second}" | |
return None | |
def get_exif_filename(exif): | |
date_time_exif = exif.get("datetime_original") | |
if date_time_exif is None: | |
date_time_exif = exif.get("datetime_digitized") | |
if date_time_exif is None: | |
date_time_exif = exif.get("datetime") | |
return get_regex_filename(date_time_exif) | |
# Async Functions | |
async def calc_md5(path): | |
chuck_size = 8 * 1024 # KBytes | |
async with asyncopen(path, "rb") as fd: | |
first_chunk = await fd.read(chuck_size) | |
md5_hash = md5(first_chunk) | |
while chunk := await fd.read(chuck_size): | |
md5_hash.update(chunk) | |
return md5_hash | |
return None # Never called, `asyncopen` should raise an exception. | |
async def rename_using_metadata(path, exif): | |
new_name = get_exif_filename(exif) | |
await asyncos.rename(path, f"{path.parent}/{new_name}{path.suffix}") | |
async def rename_using_filename(path, bytes): | |
new_name = get_regex_filename(path.stem) | |
if new_name is None: | |
md5_hash = await calc_md5(path) if bytes is None else md5(bytes) | |
new_name = f"media_{md5_hash.hexdigest()}" | |
await asyncos.rename(path, f"{path.parent}/{new_name}{path.suffix}") | |
async def main(): | |
dir_path = Path(MEDIA_DIR) | |
for path in dir_path.glob('**/*'): | |
if path.is_dir(): continue | |
if path.match("*.jp*g") or path.match("*.png"): | |
try: | |
async with asyncopen(path, mode="rb") as img: | |
bytes = await img.read() | |
try: | |
exif = Image(bytes) | |
await rename_using_metadata(path, exif) | |
except: | |
await rename_using_filename(path, bytes) | |
except Exception as e: | |
print(e) | |
finally: | |
continue | |
await rename_using_filename(path, None) | |
# App entry point | |
if __name__ == "__main__": | |
asyncrun(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How it works
For "JPEG" and "PNG"
datetime_original
->datetime_digitized
->datetime
year
,month
,day
and, if present, alsohour
,minute
andsecond
. Otherwise a randomminute
andsecond
after midnight of that day is used.media_{year}{month}{day}_{hour}{minute}{second}.ext
N.B. If no exif data are present, the "generic" approach visible below is used.
For files other than "JPEG" or "PNG"
year
,month
,day
and, if present, alsohour
,minute
andsecond
.NB If this approach does not work, generates the MD5 hash of the file and renames it to
media_{md5}.ext
.