petergi · October 7, 2025 20:41
diff --git a/rename_epubs.py b/rename_epubs.py
 #!/usr/bin/env python3
 """
 The Python script extracts metadata (title and author) from EPUB files and renames them based on the extracted information.

 :param epub_path: The `epub_path` parameter in the `get_epub_metadata` function is the file path to the EPUB file from which you want to extract the title and author metadata. You should provide the full path to the EPUB file as a string when calling this function. For example, if
 :return: Defines a Python script that extracts metadata (title and author) from EPUB files and renames the files based on this metadata. The `get_epub_metadata` function extracts the metadata from an EPUB file and returns a dictionary containing the title and author. The `rename_epubs` function renames EPUB files in a specified directory based on the extracted metadata.
 """
 import os
 import re
 import xml.etree.ElementTree as ET
 import zipfile


 def get_epub_metadata(epub_path):
    """
    Extracts the title and author metadata from an EPUB file.

    This function opens the given EPUB file (which is a ZIP archive), locates the OPF (Open Packaging Format) file
    by reading the 'META-INF/container.xml', and then parses the OPF file to extract the book's title and author
    using the Dublin Core metadata standard.

    Args:
        epub_path (str): The file path to the EPUB file.

    Returns:
        dict: A dictionary containing the 'title' and 'author' of the EPUB.
              If the metadata is not found, 'Unknown' is returned for the respective field.

    Raises:
        zipfile.BadZipFile: If the provided file is not a valid ZIP archive.
        ET.ParseError: If the XML files inside the EPUB are malformed.
        KeyError: If the OPF file path cannot be found in the container.xml.

    Example:
        >>> get_epub_metadata("book.epub")
        {'title': 'Foundation', 'author': 'Isaac Asimov'}
    """
    with zipfile.ZipFile(epub_path, "r") as zip_file:
        # Read the EPUB's container.xml to find the OPF file path
        container = zip_file.read("META-INF/container.xml")
        root = ET.fromstring(container)
        opf_path = root.find(
            ".//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile"
        ).get("full-path")

        # Read and parse the OPF file for metadata
        opf_content = zip_file.read(opf_path)
        opf_root = ET.fromstring(opf_content)

        # Define the namespace for Dublin Core metadata
        ns = {"dc": "http://purl.org/dc/elements/1.1/"}
        # Extract title and author (creator) elements
        title = opf_root.find(".//dc:title", ns)
        creator = opf_root.find(".//dc:creator", ns)

        return {
            "title": title.text if title is not None else "Unknown",
            "author": creator.text if creator is not None else "Unknown",
        }


 def sanitize_filename(text):
    """
    Removes characters from a string that are invalid in filenames on most filesystems.

    This function strips out the following characters, which are not allowed in filenames on Windows and are problematic on other operating systems as well:
        < > : " / \ | ? *

    Args:
        text (str): The input string to be sanitized for use as a filename.

    Returns:
        str: The sanitized string with invalid filename characters removed and leading/trailing whitespace stripped.

    Example:
        >>> sanitize_filename('My:Book/Title?.epub')
        'MyBookTitle.epub'
    """
    return re.sub(r'[<>:"/\\|?*]', "", text).strip()


 def rename_epubs(directory="."):
    """
    Renames all EPUB files in the specified directory based on their internal metadata.

    For each `.epub` file found in the directory, this function:
      - Extracts the author and title metadata using `get_epub_metadata()`.
      - Sanitizes the author and title strings to ensure they are safe for filenames.
      - Renames the file to the format: "Author - Title.epub".
      - Skips renaming if the filename is already correct.
      - Prints a message for each successful rename or error encountered.

    Args:
        directory (str, optional): The path to the directory containing EPUB files.
                                   Defaults to the current directory (".")

    Returns:
        None

    Example:
        >>> rename_epubs("/path/to/epub/files")
        Renamed: file000001.epub -> Isaac Asimov - Foundation.epub
        Renamed: file000002.epub -> Ursula K Le Guin - The Dispossessed.epub
        Error processing file000003.epub: Not a valid EPUB file

    Notes:
        - Requires the helper functions `get_epub_metadata(filepath)` and `sanitize_filename(text)`.
        - Handles exceptions gracefully, printing errors but continuing with other files.
    """
    for filename in os.listdir(directory):
        # Process only files with .epub extension (case-insensitive)
        if filename.lower().endswith(".epub"):
            filepath = os.path.join(directory, filename)
            try:
                # Extract metadata (author and title) from the EPUB file
                metadata = get_epub_metadata(filepath)
                # Sanitize metadata to create a safe filename
                new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.epub"
                new_path = os.path.join(directory, new_name)

                # Only rename if the new name is different
                if filepath != new_path:
                    os.rename(filepath, new_path)
                    print(f"Renamed: {filename} -> {new_name}")
            except (zipfile.BadZipFile, ET.ParseError, KeyError) as e:
                # Print error and continue with the next file
                print(f"Error processing {filename}: {e}")


 if __name__ == "__main__":
    rename_epubs()
	#!/usr/bin/env python3
	"""
	The Python script extracts metadata (title and author) from EPUB files and renames them based on the extracted information.

	:param epub_path: The `epub_path` parameter in the `get_epub_metadata` function is the file path to the EPUB file from which you want to extract the title and author metadata. You should provide the full path to the EPUB file as a string when calling this function. For example, if
	:return: Defines a Python script that extracts metadata (title and author) from EPUB files and renames the files based on this metadata. The `get_epub_metadata` function extracts the metadata from an EPUB file and returns a dictionary containing the title and author. The `rename_epubs` function renames EPUB files in a specified directory based on the extracted metadata.
	"""
	import os
	import re
	import xml.etree.ElementTree as ET
	import zipfile


	def get_epub_metadata(epub_path):
	"""
	Extracts the title and author metadata from an EPUB file.

	This function opens the given EPUB file (which is a ZIP archive), locates the OPF (Open Packaging Format) file
	by reading the 'META-INF/container.xml', and then parses the OPF file to extract the book's title and author
	using the Dublin Core metadata standard.

	Args:
	epub_path (str): The file path to the EPUB file.

	Returns:
	dict: A dictionary containing the 'title' and 'author' of the EPUB.
	If the metadata is not found, 'Unknown' is returned for the respective field.

	Raises:
	zipfile.BadZipFile: If the provided file is not a valid ZIP archive.
	ET.ParseError: If the XML files inside the EPUB are malformed.
	KeyError: If the OPF file path cannot be found in the container.xml.

	Example:
	>>> get_epub_metadata("book.epub")
	{'title': 'Foundation', 'author': 'Isaac Asimov'}
	"""
	with zipfile.ZipFile(epub_path, "r") as zip_file:
	# Read the EPUB's container.xml to find the OPF file path
	container = zip_file.read("META-INF/container.xml")
	root = ET.fromstring(container)
	opf_path = root.find(
	".//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile"
	).get("full-path")

	# Read and parse the OPF file for metadata
	opf_content = zip_file.read(opf_path)
	opf_root = ET.fromstring(opf_content)

	# Define the namespace for Dublin Core metadata
	ns = {"dc": "http://purl.org/dc/elements/1.1/"}
	# Extract title and author (creator) elements
	title = opf_root.find(".//dc:title", ns)
	creator = opf_root.find(".//dc:creator", ns)

	return {
	"title": title.text if title is not None else "Unknown",
	"author": creator.text if creator is not None else "Unknown",
	}


	def sanitize_filename(text):
	"""
	Removes characters from a string that are invalid in filenames on most filesystems.

	This function strips out the following characters, which are not allowed in filenames on Windows and are problematic on other operating systems as well:
	< > : " / \ \| ? *

	Args:
	text (str): The input string to be sanitized for use as a filename.

	Returns:
	str: The sanitized string with invalid filename characters removed and leading/trailing whitespace stripped.

	Example:
	>>> sanitize_filename('My:Book/Title?.epub')
	'MyBookTitle.epub'
	"""
	return re.sub(r'[<>:"/\\\|?*]', "", text).strip()


	def rename_epubs(directory="."):
	"""
	Renames all EPUB files in the specified directory based on their internal metadata.

	For each `.epub` file found in the directory, this function:
	- Extracts the author and title metadata using `get_epub_metadata()`.
	- Sanitizes the author and title strings to ensure they are safe for filenames.
	- Renames the file to the format: "Author - Title.epub".
	- Skips renaming if the filename is already correct.
	- Prints a message for each successful rename or error encountered.

	Args:
	directory (str, optional): The path to the directory containing EPUB files.
	Defaults to the current directory (".")

	Returns:
	None

	Example:
	>>> rename_epubs("/path/to/epub/files")
	Renamed: file000001.epub -> Isaac Asimov - Foundation.epub
	Renamed: file000002.epub -> Ursula K Le Guin - The Dispossessed.epub
	Error processing file000003.epub: Not a valid EPUB file

	Notes:
	- Requires the helper functions `get_epub_metadata(filepath)` and `sanitize_filename(text)`.
	- Handles exceptions gracefully, printing errors but continuing with other files.
	"""
	for filename in os.listdir(directory):
	# Process only files with .epub extension (case-insensitive)
	if filename.lower().endswith(".epub"):
	filepath = os.path.join(directory, filename)
	try:
	# Extract metadata (author and title) from the EPUB file
	metadata = get_epub_metadata(filepath)
	# Sanitize metadata to create a safe filename
	new_name = f"{sanitize_filename(metadata['author'])} - {sanitize_filename(metadata['title'])}.epub"
	new_path = os.path.join(directory, new_name)

	# Only rename if the new name is different
	if filepath != new_path:
	os.rename(filepath, new_path)
	print(f"Renamed: {filename} -> {new_name}")
	except (zipfile.BadZipFile, ET.ParseError, KeyError) as e:
	# Print error and continue with the next file
	print(f"Error processing {filename}: {e}")


	if __name__ == "__main__":
	rename_epubs()