saahityaedams · January 2, 2026 04:04
diff --git a/scrub_epub_images.py b/scrub_epub_images.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 #     "pillow",
 # ]
 # ///
 """Remove images from an EPUB file with interactive prompts."""

 import zipfile
 import os
 import sys
 import re
 import base64
 from pathlib import Path
 import tempfile
 from io import BytesIO
 from PIL import Image


 def is_iterm() -> bool:
    """Check if running in iTerm2."""
    return os.environ.get('TERM_PROGRAM') == 'iTerm.app'


 def display_image_iterm(data: bytes, max_width: int = 40):
    """Display image using iTerm2 inline image protocol."""
    b64_data = base64.b64encode(data).decode('ascii')
    print(f"\033]1337;File=inline=1;width={max_width}:{b64_data}\a")


 def display_image_preview(data: bytes, filename: str):
    """Display image preview - iTerm2 native or ASCII fallback."""
    if is_iterm():
        try:
            # For SVG, convert to PNG first
            if filename.lower().endswith('.svg'):
                img = Image.open(BytesIO(data))
                buf = BytesIO()
                img.save(buf, format='PNG')
                data = buf.getvalue()

            display_image_iterm(data)
            return
        except Exception:
            pass

    # ASCII fallback
    try:
        img = Image.open(BytesIO(data))

        max_width = 60
        max_height = 20

        aspect = img.width / img.height
        preview_width = min(max_width, img.width)
        preview_height = int(preview_width / aspect / 2)
        preview_height = min(max_height, preview_height)

        img = img.convert('L').resize((preview_width, preview_height))

        chars = ' .:-=+*#%@'

        print("\n┌" + "─" * preview_width + "┐")
        for y in range(preview_height):
            row = "│"
            for x in range(preview_width):
                pixel = img.getpixel((x, y))
                char_idx = int(pixel / 256 * len(chars))
                char_idx = min(char_idx, len(chars) - 1)
                row += chars[char_idx]
            row += "│"
            print(row)
        print("└" + "─" * preview_width + "┘")
    except Exception as e:
        print(f"  (Could not preview: {e})")


 def get_image_info(data: bytes, filename: str) -> str:
    """Get basic image information."""
    size_kb = len(data) / 1024
    info = f"{filename} ({size_kb:.1f} KB)"

    try:
        img = Image.open(BytesIO(data))
        info += f" - {img.width}x{img.height} {img.format}"
    except Exception:
        pass

    return info


 def prompt_for_image(filename: str, data: bytes, show_preview: bool = True) -> bool:
    """
    Prompt user whether to remove an image.

    Returns:
        True if image should be REMOVED, False to KEEP, or a string command
    """
    print("\n" + "=" * 60)
    print(f"Image: {get_image_info(data, filename)}")

    if show_preview:
        display_image_preview(data, filename)

    while True:
        response = input("\nRemove this image? [y/n/v/a/k/q/?]: ").strip().lower()

        if response == 'y':
            return True
        elif response == 'n':
            return False
        elif response == 'v':
            display_image_preview(data, filename)
        elif response == 'a':
            return 'remove_all'
        elif response == 'k':
            return 'keep_all'
        elif response == 'q':
            return 'quit'
        elif response == '?':
            print("""
 Options:
  y - Yes, remove this image
  n - No, keep this image
  v - View preview again
  a - Remove ALL remaining images (no more prompts)
  k - Keep ALL remaining images (no more prompts)
  q - Quit without saving
  ? - Show this help
            """)
        else:
            print("Invalid option. Enter 'y', 'n', or '?' for help.")


 def scrub_images_from_epub(input_path: str, output_path: str = None, show_preview: bool = True) -> str:
    """Remove selected images from an EPUB file with interactive prompts."""
    input_path = Path(input_path)

    if not input_path.exists():
        raise FileNotFoundError(f"File not found: {input_path}")

    if not input_path.suffix.lower() == '.epub':
        raise ValueError("Input file must be an EPUB file")

    if output_path is None:
        output_path = input_path.parent / f"{input_path.stem}_scrubbed.epub"
    else:
        output_path = Path(output_path)

    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.tiff', '.tif'}

    images_to_remove = set()
    remove_all = False
    keep_all = False

    print(f"\nScanning EPUB: {input_path.name}")
    if is_iterm():
        print("(iTerm2 detected - showing native image previews)")
    print("-" * 60)

    with zipfile.ZipFile(input_path, 'r') as zf:
        image_files = [
            name for name in zf.namelist()
            if Path(name).suffix.lower() in image_extensions
        ]

        print(f"Found {len(image_files)} images\n")

        if not image_files:
            print("No images found in EPUB.")
            return str(input_path)

        for i, name in enumerate(image_files, 1):
            print(f"[{i}/{len(image_files)}]", end=" ")

            if remove_all:
                print(f"Removing: {name}")
                images_to_remove.add(name)
                continue

            if keep_all:
                print(f"Keeping: {name}")
                continue

            data = zf.read(name)
            result = prompt_for_image(name, data, show_preview)

            if result == 'remove_all':
                print("Removing all remaining images...")
                remove_all = True
                images_to_remove.add(name)
            elif result == 'keep_all':
                print("Keeping all remaining images...")
                keep_all = True
            elif result == 'quit':
                print("\nQuitting without saving.")
                sys.exit(0)
            elif result:
                images_to_remove.add(name)

    if not images_to_remove:
        print("\nNo images selected for removal.")
        return str(input_path)

    print("\n" + "=" * 60)
    print(f"Summary: Removing {len(images_to_remove)} of {len(image_files)} images")
    print("=" * 60)

    confirm = input("\nProceed with removal? [y/n]: ").strip().lower()
    if confirm != 'y':
        print("Cancelled.")
        sys.exit(0)

    removed_basenames = {Path(name).name for name in images_to_remove}
    modified_files = []

    with tempfile.TemporaryDirectory() as temp_dir:
        with zipfile.ZipFile(input_path, 'r') as zf:
            zf.extractall(temp_dir)

        for root, dirs, files in os.walk(temp_dir):
            for filename in files:
                filepath = Path(root) / filename
                ext = filepath.suffix.lower()

                rel_path = str(filepath.relative_to(temp_dir))
                if rel_path in images_to_remove or rel_path.replace('\\', '/') in images_to_remove:
                    filepath.unlink()
                    continue

                if ext in {'.html', '.xhtml', '.htm'}:
                    try:
                        content = filepath.read_text(encoding='utf-8')
                        original = content

                        for basename in removed_basenames:
                            pattern = re.compile(
                                rf'<img[^>]*src="[^"]*{re.escape(basename)}"[^>]*/?>',
                                re.IGNORECASE
                            )
                            content = pattern.sub('', content)

                            pattern = re.compile(
                                rf'<image[^>]*href="[^"]*{re.escape(basename)}"[^>]*/?>',
                                re.IGNORECASE
                            )
                            content = pattern.sub('', content)

                        if content != original:
                            filepath.write_text(content, encoding='utf-8')
                            modified_files.append(filename)
                    except UnicodeDecodeError:
                        pass

                if ext == '.opf':
                    try:
                        content = filepath.read_text(encoding='utf-8')
                        original = content

                        for basename in removed_basenames:
                            pattern = re.compile(
                                rf'<item[^>]*href="[^"]*{re.escape(basename)}"[^>]*/?>',
                                re.IGNORECASE
                            )
                            content = pattern.sub('', content)

                        if content != original:
                            filepath.write_text(content, encoding='utf-8')
                            modified_files.append(filename)
                    except UnicodeDecodeError:
                        pass

        with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            mimetype_path = Path(temp_dir) / 'mimetype'
            if mimetype_path.exists():
                zf.write(mimetype_path, 'mimetype', compress_type=zipfile.ZIP_STORED)

            for root, dirs, files in os.walk(temp_dir):
                for filename in files:
                    if filename == 'mimetype':
                        continue
                    filepath = Path(root) / filename
                    arcname = filepath.relative_to(temp_dir)
                    zf.write(filepath, arcname)

    print(f"\n✓ Removed {len(images_to_remove)} images")
    print(f"✓ Modified {len(modified_files)} content files")
    print(f"✓ Output saved to: {output_path}")

    return str(output_path)


 def main():
    if len(sys.argv) < 2:
        print("""
 EPUB Image Scrubber - Interactive Mode
 ======================================

 Usage: uv run scrub_epub_images.py <input.epub> [output.epub] [--no-preview]

 Options:
  --no-preview    Don't show image previews

 Interactive commands:
  y - Remove the image
  n - Keep the image
  v - View preview again
  a - Remove ALL remaining images
  k - Keep ALL remaining images
  q - Quit without saving
  ? - Show help
        """)
        sys.exit(1)

    input_file = sys.argv[1]
    output_file = None
    show_preview = True

    for arg in sys.argv[2:]:
        if arg == '--no-preview':
            show_preview = False
        elif not arg.startswith('-'):
            output_file = arg

    try:
        scrub_images_from_epub(input_file, output_file, show_preview)
    except KeyboardInterrupt:
        print("\n\nInterrupted. Exiting without saving.")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "pillow",
	# ]
	# ///
	"""Remove images from an EPUB file with interactive prompts."""

	import zipfile
	import os
	import sys
	import re
	import base64
	from pathlib import Path
	import tempfile
	from io import BytesIO
	from PIL import Image


	def is_iterm() -> bool:
	"""Check if running in iTerm2."""
	return os.environ.get('TERM_PROGRAM') == 'iTerm.app'


	def display_image_iterm(data: bytes, max_width: int = 40):
	"""Display image using iTerm2 inline image protocol."""
	b64_data = base64.b64encode(data).decode('ascii')
	print(f"\033]1337;File=inline=1;width={max_width}:{b64_data}\a")


	def display_image_preview(data: bytes, filename: str):
	"""Display image preview - iTerm2 native or ASCII fallback."""
	if is_iterm():
	try:
	# For SVG, convert to PNG first
	if filename.lower().endswith('.svg'):
	img = Image.open(BytesIO(data))
	buf = BytesIO()
	img.save(buf, format='PNG')
	data = buf.getvalue()

	display_image_iterm(data)
	return
	except Exception:
	pass

	# ASCII fallback
	try:
	img = Image.open(BytesIO(data))

	max_width = 60
	max_height = 20

	aspect = img.width / img.height
	preview_width = min(max_width, img.width)
	preview_height = int(preview_width / aspect / 2)
	preview_height = min(max_height, preview_height)

	img = img.convert('L').resize((preview_width, preview_height))

	chars = ' .:-=+*#%@'

	print("\n┌" + "─" * preview_width + "┐")
	for y in range(preview_height):
	row = "│"
	for x in range(preview_width):
	pixel = img.getpixel((x, y))
	char_idx = int(pixel / 256 * len(chars))
	char_idx = min(char_idx, len(chars) - 1)
	row += chars[char_idx]
	row += "│"
	print(row)
	print("└" + "─" * preview_width + "┘")
	except Exception as e:
	print(f" (Could not preview: {e})")


	def get_image_info(data: bytes, filename: str) -> str:
	"""Get basic image information."""
	size_kb = len(data) / 1024
	info = f"{filename} ({size_kb:.1f} KB)"

	try:
	img = Image.open(BytesIO(data))
	info += f" - {img.width}x{img.height} {img.format}"
	except Exception:
	pass

	return info


	def prompt_for_image(filename: str, data: bytes, show_preview: bool = True) -> bool:
	"""
	Prompt user whether to remove an image.

	Returns:
	True if image should be REMOVED, False to KEEP, or a string command
	"""
	print("\n" + "=" * 60)
	print(f"Image: {get_image_info(data, filename)}")

	if show_preview:
	display_image_preview(data, filename)

	while True:
	response = input("\nRemove this image? [y/n/v/a/k/q/?]: ").strip().lower()

	if response == 'y':
	return True
	elif response == 'n':
	return False
	elif response == 'v':
	display_image_preview(data, filename)
	elif response == 'a':
	return 'remove_all'
	elif response == 'k':
	return 'keep_all'
	elif response == 'q':
	return 'quit'
	elif response == '?':
	print("""
	Options:
	y - Yes, remove this image
	n - No, keep this image
	v - View preview again
	a - Remove ALL remaining images (no more prompts)
	k - Keep ALL remaining images (no more prompts)
	q - Quit without saving
	? - Show this help
	""")
	else:
	print("Invalid option. Enter 'y', 'n', or '?' for help.")


	def scrub_images_from_epub(input_path: str, output_path: str = None, show_preview: bool = True) -> str:
	"""Remove selected images from an EPUB file with interactive prompts."""
	input_path = Path(input_path)

	if not input_path.exists():
	raise FileNotFoundError(f"File not found: {input_path}")

	if not input_path.suffix.lower() == '.epub':
	raise ValueError("Input file must be an EPUB file")

	if output_path is None:
	output_path = input_path.parent / f"{input_path.stem}_scrubbed.epub"
	else:
	output_path = Path(output_path)

	image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.tiff', '.tif'}

	images_to_remove = set()
	remove_all = False
	keep_all = False

	print(f"\nScanning EPUB: {input_path.name}")
	if is_iterm():
	print("(iTerm2 detected - showing native image previews)")
	print("-" * 60)

	with zipfile.ZipFile(input_path, 'r') as zf:
	image_files = [
	name for name in zf.namelist()
	if Path(name).suffix.lower() in image_extensions
	]

	print(f"Found {len(image_files)} images\n")

	if not image_files:
	print("No images found in EPUB.")
	return str(input_path)

	for i, name in enumerate(image_files, 1):
	print(f"[{i}/{len(image_files)}]", end=" ")

	if remove_all:
	print(f"Removing: {name}")
	images_to_remove.add(name)
	continue

	if keep_all:
	print(f"Keeping: {name}")
	continue

	data = zf.read(name)
	result = prompt_for_image(name, data, show_preview)

	if result == 'remove_all':
	print("Removing all remaining images...")
	remove_all = True
	images_to_remove.add(name)
	elif result == 'keep_all':
	print("Keeping all remaining images...")
	keep_all = True
	elif result == 'quit':
	print("\nQuitting without saving.")
	sys.exit(0)
	elif result:
	images_to_remove.add(name)

	if not images_to_remove:
	print("\nNo images selected for removal.")
	return str(input_path)

	print("\n" + "=" * 60)
	print(f"Summary: Removing {len(images_to_remove)} of {len(image_files)} images")
	print("=" * 60)

	confirm = input("\nProceed with removal? [y/n]: ").strip().lower()
	if confirm != 'y':
	print("Cancelled.")
	sys.exit(0)

	removed_basenames = {Path(name).name for name in images_to_remove}
	modified_files = []

	with tempfile.TemporaryDirectory() as temp_dir:
	with zipfile.ZipFile(input_path, 'r') as zf:
	zf.extractall(temp_dir)

	for root, dirs, files in os.walk(temp_dir):
	for filename in files:
	filepath = Path(root) / filename
	ext = filepath.suffix.lower()

	rel_path = str(filepath.relative_to(temp_dir))
	if rel_path in images_to_remove or rel_path.replace('\\', '/') in images_to_remove:
	filepath.unlink()
	continue

	if ext in {'.html', '.xhtml', '.htm'}:
	try:
	content = filepath.read_text(encoding='utf-8')
	original = content

	for basename in removed_basenames:
	pattern = re.compile(
	rf'<img[^>]src="[^"]{re.escape(basename)}"[^>]*/?>',
	re.IGNORECASE
	)
	content = pattern.sub('', content)

	pattern = re.compile(
	rf'<image[^>]href="[^"]{re.escape(basename)}"[^>]*/?>',
	re.IGNORECASE
	)
	content = pattern.sub('', content)

	if content != original:
	filepath.write_text(content, encoding='utf-8')
	modified_files.append(filename)
	except UnicodeDecodeError:
	pass

	if ext == '.opf':
	try:
	content = filepath.read_text(encoding='utf-8')
	original = content

	for basename in removed_basenames:
	pattern = re.compile(
	rf'<item[^>]href="[^"]{re.escape(basename)}"[^>]*/?>',
	re.IGNORECASE
	)
	content = pattern.sub('', content)

	if content != original:
	filepath.write_text(content, encoding='utf-8')
	modified_files.append(filename)
	except UnicodeDecodeError:
	pass

	with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
	mimetype_path = Path(temp_dir) / 'mimetype'
	if mimetype_path.exists():
	zf.write(mimetype_path, 'mimetype', compress_type=zipfile.ZIP_STORED)

	for root, dirs, files in os.walk(temp_dir):
	for filename in files:
	if filename == 'mimetype':
	continue
	filepath = Path(root) / filename
	arcname = filepath.relative_to(temp_dir)
	zf.write(filepath, arcname)

	print(f"\n✓ Removed {len(images_to_remove)} images")
	print(f"✓ Modified {len(modified_files)} content files")
	print(f"✓ Output saved to: {output_path}")

	return str(output_path)


	def main():
	if len(sys.argv) < 2:
	print("""
	EPUB Image Scrubber - Interactive Mode
	======================================

	Usage: uv run scrub_epub_images.py <input.epub> [output.epub] [--no-preview]

	Options:
	--no-preview Don't show image previews

	Interactive commands:
	y - Remove the image
	n - Keep the image
	v - View preview again
	a - Remove ALL remaining images
	k - Keep ALL remaining images
	q - Quit without saving
	? - Show help
	""")
	sys.exit(1)

	input_file = sys.argv[1]
	output_file = None
	show_preview = True

	for arg in sys.argv[2:]:
	if arg == '--no-preview':
	show_preview = False
	elif not arg.startswith('-'):
	output_file = arg

	try:
	scrub_images_from_epub(input_file, output_file, show_preview)
	except KeyboardInterrupt:
	print("\n\nInterrupted. Exiting without saving.")
	sys.exit(1)
	except Exception as e:
	print(f"Error: {e}")
	sys.exit(1)


	if __name__ == '__main__':
	main()
No results found