Created
October 7, 2025 20:42
-
-
Save petergi/edc682ac170489a012ba67f06c7536ca to your computer and use it in GitHub Desktop.
PDF file renaming utility based on metadata.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""PDF file renaming utility based on metadata.""" | |
import os | |
import re | |
from PyPDF2 import PdfReader | |
def get_pdf_metadata(pdf_path): | |
""" | |
Extracts the title and author metadata from a PDF file. | |
Args: | |
pdf_path (str): The file path to the PDF document. | |
Returns: | |
dict: A dictionary containing the 'title' and 'author' of the PDF. | |
If the metadata is not found or an error occurs, 'Unknown' is returned for each field. | |
Example: | |
>>> get_pdf_metadata('sample.pdf') | |
{'title': 'Sample Title', 'author': 'John Doe'} | |
""" | |
try: | |
reader = PdfReader(pdf_path) | |
metadata = reader.metadata | |
title = metadata.get("/Title", "Unknown") if metadata else "Unknown" | |
author = metadata.get("/Author", "Unknown") if metadata else "Unknown" | |
# Clean up metadata strings | |
if title and title != "Unknown": | |
title = str(title).strip() | |
else: | |
title = "Unknown" | |
if author and author != "Unknown": | |
author = str(author).strip() | |
else: | |
author = "Unknown" | |
return {"title": title, "author": author} | |
except FileNotFoundError: | |
return {"title": "Unknown", "author": "Unknown"} | |
except PermissionError: | |
return {"title": "Unknown", "author": "Unknown"} | |
except Exception: | |
# For unexpected errors, optionally log or print the error | |
return {"title": "Unknown", "author": "Unknown"} | |
def sanitize_filename(text): | |
""" | |
Removes invalid characters from a string to make it safe for use as a filename. | |
This function strips out characters that are not allowed in filenames on most operating systems, | |
such as: < > : " / \ | ? * | |
Args: | |
text (str): The input string to sanitize. | |
Returns: | |
str: The sanitized string, safe for use as a filename. | |
Example: | |
>>> sanitize_filename('my<invalid>:file?.txt') | |
'myinvalidfile.txt' | |
""" | |
return re.sub(r'[<>:"/\\|?*]', "", text).strip() | |
def rename_pdfs(directory="."): | |
""" | |
Renames all PDF files in the specified directory based on their metadata. | |
For each PDF file found in the directory, this function extracts the 'author' and 'title' | |
metadata using `get_pdf_metadata()`, sanitizes them for use as filenames using `sanitize_filename()`, | |
and renames the file to the format: "<author> - <title>.pdf". | |
If the file is already named correctly, it is skipped. | |
Any errors encountered during processing are printed to the console. | |
Args: | |
directory (str, optional): The path to the directory containing PDF files. | |
Defaults to the current directory (".") | |
Example: | |
>>> rename_pdfs("/path/to/pdf/folder") | |
Renamed: oldname.pdf -> Author - Title.pdf | |
Notes: | |
- Requires the `get_pdf_metadata` and `sanitize_filename` functions to be defined. | |
- Only files with a ".pdf" extension (case-insensitive) are processed. | |
- If metadata is missing, 'Unknown' is used for the author or title. | |
""" | |
for filename in os.listdir(directory): | |
if filename.lower().endswith(".pdf"): | |
filepath = os.path.join(directory, filename) | |
try: | |
metadata = get_pdf_metadata(filepath) | |
new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf" | |
new_path = os.path.join(directory, new_name) | |
if filepath != new_path: | |
os.rename(filepath, new_path) | |
print(f"Renamed: {filename} -> {new_name}") | |
except FileNotFoundError as e: | |
print(f"File not found: {filename}: {e}") | |
except PermissionError as e: | |
print(f"Permission denied: {filename}: {e}") | |
except OSError as e: | |
print(f"OS error processing {filename}: {e}") | |
if __name__ == "__main__": | |
rename_pdfs() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment