Skip to content

Instantly share code, notes, and snippets.

@noaione
Created November 28, 2023 10:36
Show Gist options
  • Save noaione/6ad248e48e7a042b6d03ea5bf57555ac to your computer and use it in GitHub Desktop.
Save noaione/6ad248e48e7a042b6d03ea5bf57555ac to your computer and use it in GitHub Desktop.
quick and dirty script to repair epub that is not properly tagged as EPUB mimetype
# Quick script to repair broken epub
# Need libmagic and python-magic to work
# Use case:
import argparse
import sys
from pathlib import Path
from typing import List
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
import magic
ZIP_MAGIC = b"PK\x03\x04"
parser = argparse.ArgumentParser(description="Repair broken epubs")
parser.add_argument("path", type=str, help="Path to directory")
parser.add_argument("-r", "--recursive", action="store_true", help="Recursive")
args = parser.parse_args()
path = Path(args.path)
recursive = bool(args.recursive)
collect_all_epubs: List[Path] = []
if recursive:
for p in path.rglob("*.epub"):
collect_all_epubs.append(p)
else:
for p in path.glob("*.epub"):
collect_all_epubs.append(p)
to_be_fixed_epubs: List[Path] = []
for epub in collect_all_epubs:
with epub.open("rb") as fp:
read_meta = fp.read(128)
if not read_meta.startswith(ZIP_MAGIC):
continue
mimetypes = magic.from_buffer(read_meta, mime=True)
if not mimetypes:
continue
if "epub+zip" not in mimetypes:
print(f"Found broken epub: {epub}")
to_be_fixed_epubs.append(epub)
print("===============================================")
print(f"Found {len(to_be_fixed_epubs)} broken epubs")
continue_it = input("Continue? [y/n]: ").lower()
if continue_it != "y":
sys.exit(0)
for epub in to_be_fixed_epubs:
print(f"Fixing {epub}")
# We fix it by resaving with proper mimetype
save_target = epub.parent / f"{epub.stem}.temp.epub"
new_epub = ZipFile(save_target, "w", compression=ZIP_DEFLATED)
new_epub.writestr("mimetype", "application/epub+zip", compress_type=ZIP_STORED)
with ZipFile(epub, "r") as original_epub:
# Iterate through all files
for path in original_epub.infolist():
if "mimetype" in path.filename:
continue
new_epub.writestr(path, original_epub.read(path))
new_epub.close()
epub.unlink(missing_ok=True)
save_target.rename(epub)
print(f" | Fixed {epub}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment