Created
January 2, 2026 04:04
-
-
Save saahityaedams/6f2f6a01c825cde7c243b9a58bf71c85 to your computer and use it in GitHub Desktop.
Utility script to scrub images from an epub
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "pillow", | |
| # ] | |
| # /// | |
| """Remove images from an EPUB file with interactive prompts.""" | |
| import zipfile | |
| import os | |
| import sys | |
| import re | |
| import base64 | |
| from pathlib import Path | |
| import tempfile | |
| from io import BytesIO | |
| from PIL import Image | |
| def is_iterm() -> bool: | |
| """Check if running in iTerm2.""" | |
| return os.environ.get('TERM_PROGRAM') == 'iTerm.app' | |
| def display_image_iterm(data: bytes, max_width: int = 40): | |
| """Display image using iTerm2 inline image protocol.""" | |
| b64_data = base64.b64encode(data).decode('ascii') | |
| print(f"\033]1337;File=inline=1;width={max_width}:{b64_data}\a") | |
| def display_image_preview(data: bytes, filename: str): | |
| """Display image preview - iTerm2 native or ASCII fallback.""" | |
| if is_iterm(): | |
| try: | |
| # For SVG, convert to PNG first | |
| if filename.lower().endswith('.svg'): | |
| img = Image.open(BytesIO(data)) | |
| buf = BytesIO() | |
| img.save(buf, format='PNG') | |
| data = buf.getvalue() | |
| display_image_iterm(data) | |
| return | |
| except Exception: | |
| pass | |
| # ASCII fallback | |
| try: | |
| img = Image.open(BytesIO(data)) | |
| max_width = 60 | |
| max_height = 20 | |
| aspect = img.width / img.height | |
| preview_width = min(max_width, img.width) | |
| preview_height = int(preview_width / aspect / 2) | |
| preview_height = min(max_height, preview_height) | |
| img = img.convert('L').resize((preview_width, preview_height)) | |
| chars = ' .:-=+*#%@' | |
| print("\n┌" + "─" * preview_width + "┐") | |
| for y in range(preview_height): | |
| row = "│" | |
| for x in range(preview_width): | |
| pixel = img.getpixel((x, y)) | |
| char_idx = int(pixel / 256 * len(chars)) | |
| char_idx = min(char_idx, len(chars) - 1) | |
| row += chars[char_idx] | |
| row += "│" | |
| print(row) | |
| print("└" + "─" * preview_width + "┘") | |
| except Exception as e: | |
| print(f" (Could not preview: {e})") | |
| def get_image_info(data: bytes, filename: str) -> str: | |
| """Get basic image information.""" | |
| size_kb = len(data) / 1024 | |
| info = f"{filename} ({size_kb:.1f} KB)" | |
| try: | |
| img = Image.open(BytesIO(data)) | |
| info += f" - {img.width}x{img.height} {img.format}" | |
| except Exception: | |
| pass | |
| return info | |
| def prompt_for_image(filename: str, data: bytes, show_preview: bool = True) -> bool: | |
| """ | |
| Prompt user whether to remove an image. | |
| Returns: | |
| True if image should be REMOVED, False to KEEP, or a string command | |
| """ | |
| print("\n" + "=" * 60) | |
| print(f"Image: {get_image_info(data, filename)}") | |
| if show_preview: | |
| display_image_preview(data, filename) | |
| while True: | |
| response = input("\nRemove this image? [y/n/v/a/k/q/?]: ").strip().lower() | |
| if response == 'y': | |
| return True | |
| elif response == 'n': | |
| return False | |
| elif response == 'v': | |
| display_image_preview(data, filename) | |
| elif response == 'a': | |
| return 'remove_all' | |
| elif response == 'k': | |
| return 'keep_all' | |
| elif response == 'q': | |
| return 'quit' | |
| elif response == '?': | |
| print(""" | |
| Options: | |
| y - Yes, remove this image | |
| n - No, keep this image | |
| v - View preview again | |
| a - Remove ALL remaining images (no more prompts) | |
| k - Keep ALL remaining images (no more prompts) | |
| q - Quit without saving | |
| ? - Show this help | |
| """) | |
| else: | |
| print("Invalid option. Enter 'y', 'n', or '?' for help.") | |
| def scrub_images_from_epub(input_path: str, output_path: str = None, show_preview: bool = True) -> str: | |
| """Remove selected images from an EPUB file with interactive prompts.""" | |
| input_path = Path(input_path) | |
| if not input_path.exists(): | |
| raise FileNotFoundError(f"File not found: {input_path}") | |
| if not input_path.suffix.lower() == '.epub': | |
| raise ValueError("Input file must be an EPUB file") | |
| if output_path is None: | |
| output_path = input_path.parent / f"{input_path.stem}_scrubbed.epub" | |
| else: | |
| output_path = Path(output_path) | |
| image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.tiff', '.tif'} | |
| images_to_remove = set() | |
| remove_all = False | |
| keep_all = False | |
| print(f"\nScanning EPUB: {input_path.name}") | |
| if is_iterm(): | |
| print("(iTerm2 detected - showing native image previews)") | |
| print("-" * 60) | |
| with zipfile.ZipFile(input_path, 'r') as zf: | |
| image_files = [ | |
| name for name in zf.namelist() | |
| if Path(name).suffix.lower() in image_extensions | |
| ] | |
| print(f"Found {len(image_files)} images\n") | |
| if not image_files: | |
| print("No images found in EPUB.") | |
| return str(input_path) | |
| for i, name in enumerate(image_files, 1): | |
| print(f"[{i}/{len(image_files)}]", end=" ") | |
| if remove_all: | |
| print(f"Removing: {name}") | |
| images_to_remove.add(name) | |
| continue | |
| if keep_all: | |
| print(f"Keeping: {name}") | |
| continue | |
| data = zf.read(name) | |
| result = prompt_for_image(name, data, show_preview) | |
| if result == 'remove_all': | |
| print("Removing all remaining images...") | |
| remove_all = True | |
| images_to_remove.add(name) | |
| elif result == 'keep_all': | |
| print("Keeping all remaining images...") | |
| keep_all = True | |
| elif result == 'quit': | |
| print("\nQuitting without saving.") | |
| sys.exit(0) | |
| elif result: | |
| images_to_remove.add(name) | |
| if not images_to_remove: | |
| print("\nNo images selected for removal.") | |
| return str(input_path) | |
| print("\n" + "=" * 60) | |
| print(f"Summary: Removing {len(images_to_remove)} of {len(image_files)} images") | |
| print("=" * 60) | |
| confirm = input("\nProceed with removal? [y/n]: ").strip().lower() | |
| if confirm != 'y': | |
| print("Cancelled.") | |
| sys.exit(0) | |
| removed_basenames = {Path(name).name for name in images_to_remove} | |
| modified_files = [] | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| with zipfile.ZipFile(input_path, 'r') as zf: | |
| zf.extractall(temp_dir) | |
| for root, dirs, files in os.walk(temp_dir): | |
| for filename in files: | |
| filepath = Path(root) / filename | |
| ext = filepath.suffix.lower() | |
| rel_path = str(filepath.relative_to(temp_dir)) | |
| if rel_path in images_to_remove or rel_path.replace('\\', '/') in images_to_remove: | |
| filepath.unlink() | |
| continue | |
| if ext in {'.html', '.xhtml', '.htm'}: | |
| try: | |
| content = filepath.read_text(encoding='utf-8') | |
| original = content | |
| for basename in removed_basenames: | |
| pattern = re.compile( | |
| rf'<img[^>]*src="[^"]*{re.escape(basename)}"[^>]*/?>', | |
| re.IGNORECASE | |
| ) | |
| content = pattern.sub('', content) | |
| pattern = re.compile( | |
| rf'<image[^>]*href="[^"]*{re.escape(basename)}"[^>]*/?>', | |
| re.IGNORECASE | |
| ) | |
| content = pattern.sub('', content) | |
| if content != original: | |
| filepath.write_text(content, encoding='utf-8') | |
| modified_files.append(filename) | |
| except UnicodeDecodeError: | |
| pass | |
| if ext == '.opf': | |
| try: | |
| content = filepath.read_text(encoding='utf-8') | |
| original = content | |
| for basename in removed_basenames: | |
| pattern = re.compile( | |
| rf'<item[^>]*href="[^"]*{re.escape(basename)}"[^>]*/?>', | |
| re.IGNORECASE | |
| ) | |
| content = pattern.sub('', content) | |
| if content != original: | |
| filepath.write_text(content, encoding='utf-8') | |
| modified_files.append(filename) | |
| except UnicodeDecodeError: | |
| pass | |
| with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: | |
| mimetype_path = Path(temp_dir) / 'mimetype' | |
| if mimetype_path.exists(): | |
| zf.write(mimetype_path, 'mimetype', compress_type=zipfile.ZIP_STORED) | |
| for root, dirs, files in os.walk(temp_dir): | |
| for filename in files: | |
| if filename == 'mimetype': | |
| continue | |
| filepath = Path(root) / filename | |
| arcname = filepath.relative_to(temp_dir) | |
| zf.write(filepath, arcname) | |
| print(f"\n✓ Removed {len(images_to_remove)} images") | |
| print(f"✓ Modified {len(modified_files)} content files") | |
| print(f"✓ Output saved to: {output_path}") | |
| return str(output_path) | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(""" | |
| EPUB Image Scrubber - Interactive Mode | |
| ====================================== | |
| Usage: uv run scrub_epub_images.py <input.epub> [output.epub] [--no-preview] | |
| Options: | |
| --no-preview Don't show image previews | |
| Interactive commands: | |
| y - Remove the image | |
| n - Keep the image | |
| v - View preview again | |
| a - Remove ALL remaining images | |
| k - Keep ALL remaining images | |
| q - Quit without saving | |
| ? - Show help | |
| """) | |
| sys.exit(1) | |
| input_file = sys.argv[1] | |
| output_file = None | |
| show_preview = True | |
| for arg in sys.argv[2:]: | |
| if arg == '--no-preview': | |
| show_preview = False | |
| elif not arg.startswith('-'): | |
| output_file = arg | |
| try: | |
| scrub_images_from_epub(input_file, output_file, show_preview) | |
| except KeyboardInterrupt: | |
| print("\n\nInterrupted. Exiting without saving.") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment