Last active
March 10, 2024 12:23
-
-
Save jeryjs/256e06db532c81381565143d00c97baa to your computer and use it in GitHub Desktop.
Compress epub files in batch. The script works recursively, and maintains the same input directory's file structure in the output directory. Usage: py ./compress_epubs.py 'path/to/epubs/dir' 'path/to/output/dir'
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import shutil | |
import zipfile | |
from PIL import Image | |
# min filesize threshold in MB | |
FILE_THRESHOLD = 1.5 | |
# image compression threshold in % | |
COMPRESSION_QUALITY = 80 | |
def compress_image(image_path): | |
print(f"\t\t\t{image_path.split('\\')[-3:]} ({os.path.getsize(image_path)})", end=' -> ') | |
image = Image.open(image_path) | |
width, height = image.size | |
max_dim = max(width, height) | |
if max_dim > 1080: | |
ratio = 1080.0 / max_dim | |
new_size = (int(ratio * width), int(ratio * height)) | |
image = image.resize(new_size, Image.LANCZOS) | |
# if image_path.lower().endswith('.png') or os.path.getsize(image_path) > 200 * 1024: | |
image.save(image_path, "WEBP", quality=COMPRESSION_QUALITY) | |
print(f"({os.path.getsize(image_path)})") | |
def compress_epub(epub_path, output_dir): | |
print(f"\t\tCompressing: {epub_path.split('\\')[-1]}") | |
# Create a new output directory for each epub file | |
epub_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(epub_path))[0]) | |
os.makedirs(epub_dir, exist_ok=True) | |
# Rename to zip | |
zip_path = os.path.join(epub_dir, os.path.basename(epub_path).rsplit('.', 1)[0] + '.zip') | |
shutil.copy(epub_path, zip_path) | |
# Extract zip | |
with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
zip_ref.extractall(epub_dir) | |
# Remove the zip file | |
os.remove(zip_path) | |
# Compress images | |
print(' ->', end=" ") | |
for root, dirs, files in os.walk(epub_dir): | |
for file in files: | |
if file.lower().endswith(('.png', '.jpg', '.jpeg')): | |
compress_image(os.path.join(root, file)) | |
# Re-zip | |
print(f"\t\tRe-Zipping: ", end='') | |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zipf: | |
for root, dirs, files in os.walk(epub_dir): | |
for file in files: | |
if (file == zip_path.split('\\')[-1]): | |
continue | |
print(file, end=' | ') | |
zipf.write(os.path.join(root, file), arcname=os.path.relpath(os.path.join(root, file), epub_dir)) | |
# Rename back to epub | |
os.rename(zip_path, os.path.join(epub_dir, os.path.basename(epub_path))) | |
# replace the temp epub dir and replace it with the compressed epub | |
shutil.move(os.path.join(epub_dir, os.path.basename(epub_path)), os.path.join(output_dir, os.path.basename(epub_path))) | |
shutil.rmtree(epub_dir) | |
def traverse_and_compress(dir_path, output_dir): | |
for root, dirs, files in os.walk(dir_path): | |
print(f"Traversing and compressing directory: {root}") | |
for file in files: | |
if file.lower().endswith('.epub'): | |
# print(file, end=" | ") | |
epub_path = os.path.join(root, file) | |
file_size_mb = os.path.getsize(epub_path) / (1024 * 1024) | |
# print(f"Size: {file_size_mb:.2f} MB") | |
if file_size_mb >= FILE_THRESHOLD: | |
print(f"\n\tProcessing: {file}") | |
# Create a new output directory for each epub file | |
epub_output_dir = os.path.join(output_dir, os.path.relpath(root, dir_path)) | |
os.makedirs(epub_output_dir, exist_ok=True) | |
compress_epub(epub_path, epub_output_dir) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Compress epub files in a directory.') | |
parser.add_argument('dir', nargs='?', default='.', help='Directory to compress files from.') | |
parser.add_argument('output', nargs='?', default='./Compressed_Epubs', help='Directory to output compressed files to.') | |
args = parser.parse_args() | |
# Call the function with the directory and output directory | |
traverse_and_compress(args.dir, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment