Skip to content

Instantly share code, notes, and snippets.

@quxf2012
Created October 27, 2023 07:46
Show Gist options
  • Save quxf2012/832bee40b585ec534f97fdfed847fbec to your computer and use it in GitHub Desktop.
Save quxf2012/832bee40b585ec534f97fdfed847fbec to your computer and use it in GitHub Desktop.
压缩图片到指定的大小 MB 默认2MB; 原文件过大可能达不到指定的效果; 可以拿中间产物再跑一次脚本试试;
# pip install "pypdf[image]"
# https://pypdf.readthedocs.io/en/stable/user/file-size.html?highlight=size
# 压缩图片到指定的大小 MB 默认;
# python reduce_pdf.py source.pdf 2 #2MB
import io
import os
import sys # NOQA
from pypdf import PdfReader, PdfWriter
source_pdf = sys.argv[1]
source_size = os.path.getsize(source_pdf)
require_size = 2 * 1024 * 1024 # MB
try:
require_size = int(sys.argv[2]) * 1024 * 1024
except:
pass
print(f"file:{source_pdf},require size={require_size / 1024 / 1024:.2f}; file size={source_size / 1024 / 1024:.2f}")
if require_size > source_size:
exit(1)
_dir, _filename = os.path.split(source_pdf)
dest_pdf = f"{_dir}/reduce-{_filename}"
quality = round((require_size / source_size) * 100)
# quality = 40
reader = PdfReader(source_pdf)
for i in range(10):
# print(f"Run Count:{i} {quality=}")
if quality < 1:
print(f"压缩失败,{quality=}")
break
writer = PdfWriter(dest_pdf)
for page in reader.pages:
writer.add_page(page)
for page in writer.pages:
for img in page.images:
img.replace(img.image, quality=quality)
_io_1 = io.BytesIO()
writer.write(_io_1)
dest_size = _io_1.getbuffer().nbytes
print(f"{i}: {quality=},current_size={dest_size / 1024 / 1024:.2f}")
if dest_size <= require_size:
with open(dest_pdf, "wb") as f:
writer.write(f)
print(f"out file: < {dest_pdf} > ;size:{(os.path.getsize(dest_pdf) / 1024 / 1024):.2f}MB,Run Count:{i},{quality=}")
break
quality -= 4
del writer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment