Created
November 28, 2023 10:36
-
-
Save noaione/6ad248e48e7a042b6d03ea5bf57555ac to your computer and use it in GitHub Desktop.
quick and dirty script to repair epub that is not properly tagged as EPUB mimetype
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Quick script to repair broken epub | |
# Need libmagic and python-magic to work | |
# Use case: | |
import argparse | |
import sys | |
from pathlib import Path | |
from typing import List | |
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile | |
import magic | |
ZIP_MAGIC = b"PK\x03\x04" | |
parser = argparse.ArgumentParser(description="Repair broken epubs") | |
parser.add_argument("path", type=str, help="Path to directory") | |
parser.add_argument("-r", "--recursive", action="store_true", help="Recursive") | |
args = parser.parse_args() | |
path = Path(args.path) | |
recursive = bool(args.recursive) | |
collect_all_epubs: List[Path] = [] | |
if recursive: | |
for p in path.rglob("*.epub"): | |
collect_all_epubs.append(p) | |
else: | |
for p in path.glob("*.epub"): | |
collect_all_epubs.append(p) | |
to_be_fixed_epubs: List[Path] = [] | |
for epub in collect_all_epubs: | |
with epub.open("rb") as fp: | |
read_meta = fp.read(128) | |
if not read_meta.startswith(ZIP_MAGIC): | |
continue | |
mimetypes = magic.from_buffer(read_meta, mime=True) | |
if not mimetypes: | |
continue | |
if "epub+zip" not in mimetypes: | |
print(f"Found broken epub: {epub}") | |
to_be_fixed_epubs.append(epub) | |
print("===============================================") | |
print(f"Found {len(to_be_fixed_epubs)} broken epubs") | |
continue_it = input("Continue? [y/n]: ").lower() | |
if continue_it != "y": | |
sys.exit(0) | |
for epub in to_be_fixed_epubs: | |
print(f"Fixing {epub}") | |
# We fix it by resaving with proper mimetype | |
save_target = epub.parent / f"{epub.stem}.temp.epub" | |
new_epub = ZipFile(save_target, "w", compression=ZIP_DEFLATED) | |
new_epub.writestr("mimetype", "application/epub+zip", compress_type=ZIP_STORED) | |
with ZipFile(epub, "r") as original_epub: | |
# Iterate through all files | |
for path in original_epub.infolist(): | |
if "mimetype" in path.filename: | |
continue | |
new_epub.writestr(path, original_epub.read(path)) | |
new_epub.close() | |
epub.unlink(missing_ok=True) | |
save_target.rename(epub) | |
print(f" | Fixed {epub}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment