Skip to content

Instantly share code, notes, and snippets.

@petergi
Created October 7, 2025 20:42
Show Gist options
  • Save petergi/edc682ac170489a012ba67f06c7536ca to your computer and use it in GitHub Desktop.
Save petergi/edc682ac170489a012ba67f06c7536ca to your computer and use it in GitHub Desktop.
PDF file renaming utility based on metadata.
#!/usr/bin/env python3
"""PDF file renaming utility based on metadata."""
import os
import re
from PyPDF2 import PdfReader
def get_pdf_metadata(pdf_path):
"""
Extracts the title and author metadata from a PDF file.
Args:
pdf_path (str): The file path to the PDF document.
Returns:
dict: A dictionary containing the 'title' and 'author' of the PDF.
If the metadata is not found or an error occurs, 'Unknown' is returned for each field.
Example:
>>> get_pdf_metadata('sample.pdf')
{'title': 'Sample Title', 'author': 'John Doe'}
"""
try:
reader = PdfReader(pdf_path)
metadata = reader.metadata
title = metadata.get("/Title", "Unknown") if metadata else "Unknown"
author = metadata.get("/Author", "Unknown") if metadata else "Unknown"
# Clean up metadata strings
if title and title != "Unknown":
title = str(title).strip()
else:
title = "Unknown"
if author and author != "Unknown":
author = str(author).strip()
else:
author = "Unknown"
return {"title": title, "author": author}
except FileNotFoundError:
return {"title": "Unknown", "author": "Unknown"}
except PermissionError:
return {"title": "Unknown", "author": "Unknown"}
except Exception:
# For unexpected errors, optionally log or print the error
return {"title": "Unknown", "author": "Unknown"}
def sanitize_filename(text):
"""
Removes invalid characters from a string to make it safe for use as a filename.
This function strips out characters that are not allowed in filenames on most operating systems,
such as: < > : " / \ | ? *
Args:
text (str): The input string to sanitize.
Returns:
str: The sanitized string, safe for use as a filename.
Example:
>>> sanitize_filename('my<invalid>:file?.txt')
'myinvalidfile.txt'
"""
return re.sub(r'[<>:"/\\|?*]', "", text).strip()
def rename_pdfs(directory="."):
"""
Renames all PDF files in the specified directory based on their metadata.
For each PDF file found in the directory, this function extracts the 'author' and 'title'
metadata using `get_pdf_metadata()`, sanitizes them for use as filenames using `sanitize_filename()`,
and renames the file to the format: "<author> - <title>.pdf".
If the file is already named correctly, it is skipped.
Any errors encountered during processing are printed to the console.
Args:
directory (str, optional): The path to the directory containing PDF files.
Defaults to the current directory (".")
Example:
>>> rename_pdfs("/path/to/pdf/folder")
Renamed: oldname.pdf -> Author - Title.pdf
Notes:
- Requires the `get_pdf_metadata` and `sanitize_filename` functions to be defined.
- Only files with a ".pdf" extension (case-insensitive) are processed.
- If metadata is missing, 'Unknown' is used for the author or title.
"""
for filename in os.listdir(directory):
if filename.lower().endswith(".pdf"):
filepath = os.path.join(directory, filename)
try:
metadata = get_pdf_metadata(filepath)
new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf"
new_path = os.path.join(directory, new_name)
if filepath != new_path:
os.rename(filepath, new_path)
print(f"Renamed: {filename} -> {new_name}")
except FileNotFoundError as e:
print(f"File not found: {filename}: {e}")
except PermissionError as e:
print(f"Permission denied: {filename}: {e}")
except OSError as e:
print(f"OS error processing {filename}: {e}")
if __name__ == "__main__":
rename_pdfs()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment