petergi · October 7, 2025 20:42
diff --git a/rename_pdfs.py b/rename_pdfs.py
 #!/usr/bin/env python3
 """PDF file renaming utility based on metadata."""
 import os
 import re

 from PyPDF2 import PdfReader


 def get_pdf_metadata(pdf_path):
    """
    Extracts the title and author metadata from a PDF file.

    Args:
        pdf_path (str): The file path to the PDF document.

    Returns:
        dict: A dictionary containing the 'title' and 'author' of the PDF.
              If the metadata is not found or an error occurs, 'Unknown' is returned for each field.

    Example:
        >>> get_pdf_metadata('sample.pdf')
        {'title': 'Sample Title', 'author': 'John Doe'}
    """
    try:
        reader = PdfReader(pdf_path)
        metadata = reader.metadata

        title = metadata.get("/Title", "Unknown") if metadata else "Unknown"
        author = metadata.get("/Author", "Unknown") if metadata else "Unknown"

        # Clean up metadata strings
        if title and title != "Unknown":
            title = str(title).strip()
        else:
            title = "Unknown"

        if author and author != "Unknown":
            author = str(author).strip()
        else:
            author = "Unknown"

        return {"title": title, "author": author}
    except FileNotFoundError:
        return {"title": "Unknown", "author": "Unknown"}
    except PermissionError:
        return {"title": "Unknown", "author": "Unknown"}
    except Exception:
        # For unexpected errors, optionally log or print the error
        return {"title": "Unknown", "author": "Unknown"}


 def sanitize_filename(text):
    """
    Removes invalid characters from a string to make it safe for use as a filename.

    This function strips out characters that are not allowed in filenames on most operating systems,
    such as: < > : " / \ | ? *

    Args:
        text (str): The input string to sanitize.

    Returns:
        str: The sanitized string, safe for use as a filename.

    Example:
        >>> sanitize_filename('my<invalid>:file?.txt')
        'myinvalidfile.txt'
    """
    return re.sub(r'[<>:"/\\|?*]', "", text).strip()


 def rename_pdfs(directory="."):
    """
    Renames all PDF files in the specified directory based on their metadata.

    For each PDF file found in the directory, this function extracts the 'author' and 'title'
    metadata using `get_pdf_metadata()`, sanitizes them for use as filenames using `sanitize_filename()`,
    and renames the file to the format: "<author> - <title>.pdf".
    If the file is already named correctly, it is skipped.
    Any errors encountered during processing are printed to the console.

    Args:
        directory (str, optional): The path to the directory containing PDF files.
                                   Defaults to the current directory (".")

    Example:
        >>> rename_pdfs("/path/to/pdf/folder")
        Renamed: oldname.pdf -> Author - Title.pdf

    Notes:
        - Requires the `get_pdf_metadata` and `sanitize_filename` functions to be defined.
        - Only files with a ".pdf" extension (case-insensitive) are processed.
        - If metadata is missing, 'Unknown' is used for the author or title.
    """
    for filename in os.listdir(directory):
        if filename.lower().endswith(".pdf"):
            filepath = os.path.join(directory, filename)
            try:
                metadata = get_pdf_metadata(filepath)
                new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf"
                new_path = os.path.join(directory, new_name)

                if filepath != new_path:
                    os.rename(filepath, new_path)
                    print(f"Renamed: {filename} -> {new_name}")
            except FileNotFoundError as e:
                print(f"File not found: {filename}: {e}")
            except PermissionError as e:
                print(f"Permission denied: {filename}: {e}")
            except OSError as e:
                print(f"OS error processing {filename}: {e}")


 if __name__ == "__main__":
    rename_pdfs()
	#!/usr/bin/env python3
	"""PDF file renaming utility based on metadata."""
	import os
	import re

	from PyPDF2 import PdfReader


	def get_pdf_metadata(pdf_path):
	"""
	Extracts the title and author metadata from a PDF file.

	Args:
	pdf_path (str): The file path to the PDF document.

	Returns:
	dict: A dictionary containing the 'title' and 'author' of the PDF.
	If the metadata is not found or an error occurs, 'Unknown' is returned for each field.

	Example:
	>>> get_pdf_metadata('sample.pdf')
	{'title': 'Sample Title', 'author': 'John Doe'}
	"""
	try:
	reader = PdfReader(pdf_path)
	metadata = reader.metadata

	title = metadata.get("/Title", "Unknown") if metadata else "Unknown"
	author = metadata.get("/Author", "Unknown") if metadata else "Unknown"

	# Clean up metadata strings
	if title and title != "Unknown":
	title = str(title).strip()
	else:
	title = "Unknown"

	if author and author != "Unknown":
	author = str(author).strip()
	else:
	author = "Unknown"

	return {"title": title, "author": author}
	except FileNotFoundError:
	return {"title": "Unknown", "author": "Unknown"}
	except PermissionError:
	return {"title": "Unknown", "author": "Unknown"}
	except Exception:
	# For unexpected errors, optionally log or print the error
	return {"title": "Unknown", "author": "Unknown"}


	def sanitize_filename(text):
	"""
	Removes invalid characters from a string to make it safe for use as a filename.

	This function strips out characters that are not allowed in filenames on most operating systems,
	such as: < > : " / \ \| ? *

	Args:
	text (str): The input string to sanitize.

	Returns:
	str: The sanitized string, safe for use as a filename.

	Example:
	>>> sanitize_filename('my<invalid>:file?.txt')
	'myinvalidfile.txt'
	"""
	return re.sub(r'[<>:"/\\\|?*]', "", text).strip()


	def rename_pdfs(directory="."):
	"""
	Renames all PDF files in the specified directory based on their metadata.

	For each PDF file found in the directory, this function extracts the 'author' and 'title'
	metadata using `get_pdf_metadata()`, sanitizes them for use as filenames using `sanitize_filename()`,
	and renames the file to the format: "<author> - <title>.pdf".
	If the file is already named correctly, it is skipped.
	Any errors encountered during processing are printed to the console.

	Args:
	directory (str, optional): The path to the directory containing PDF files.
	Defaults to the current directory (".")

	Example:
	>>> rename_pdfs("/path/to/pdf/folder")
	Renamed: oldname.pdf -> Author - Title.pdf

	Notes:
	- Requires the `get_pdf_metadata` and `sanitize_filename` functions to be defined.
	- Only files with a ".pdf" extension (case-insensitive) are processed.
	- If metadata is missing, 'Unknown' is used for the author or title.
	"""
	for filename in os.listdir(directory):
	if filename.lower().endswith(".pdf"):
	filepath = os.path.join(directory, filename)
	try:
	metadata = get_pdf_metadata(filepath)
	new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.pdf"
	new_path = os.path.join(directory, new_name)

	if filepath != new_path:
	os.rename(filepath, new_path)
	print(f"Renamed: {filename} -> {new_name}")
	except FileNotFoundError as e:
	print(f"File not found: {filename}: {e}")
	except PermissionError as e:
	print(f"Permission denied: {filename}: {e}")
	except OSError as e:
	print(f"OS error processing {filename}: {e}")


	if __name__ == "__main__":
	rename_pdfs()