partrita · June 22, 2025 05:09
diff --git a/pdf_compressor.py b/pdf_compressor.py
 from pypdf import PdfReader, PdfWriter
 import argparse
 import os
 import sys
 import logging

 # Configure basic logging for better user feedback
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

 def compress_pdf_lossless(input_path: str, output_path: str) -> None:
    """
    Applies lossless compression to a PDF file by compressing its content streams.
    This can significantly reduce file size, especially for PDFs with redundant data.

    Args:
        input_path (str): Path to the input PDF file.
        output_path (str): Path where the compressed PDF will be saved.
    """
    logging.info(f"Starting lossless compression for '{input_path}'...")
    try:
        reader = PdfReader(input_path)
        writer = PdfWriter()

        for page in reader.pages:
            writer.add_page(page)

        # Iterate through pages in the writer object and apply compression
        # Level 9 is the highest compression level (most CPU intensive)
        for i, page in enumerate(writer.pages):
            logging.debug(f"Compressing content streams for page {i+1}...")
            page.compress_content_streams(level=9)

        with open(output_path, "wb") as f:
            writer.write(f)
        
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction_percent = ((original_size - compressed_size) / original_size) * 100 if original_size > 0 else 0

        logging.info(f"Lossless compression complete! Saved to '{output_path}'.")
        logging.info(f"Original size: {original_size / (1024*1024):.2f} MB")
        logging.info(f"Compressed size: {compressed_size / (1024*1024):.2f} MB")
        logging.info(f"Size reduction: {reduction_percent:.2f}%")

    except FileNotFoundError:
        logging.error(f"Error: Input file not found at '{input_path}'. Please check the path.")
        sys.exit(1)
    except Exception as e:
        logging.error(f"An unexpected error occurred during lossless compression: {e}", exc_info=True)
        sys.exit(1)

 def compress_pdf_lossy(input_path: str, output_path: str, quality: int = 80) -> None:
    """
    Applies lossy compression to a PDF file by re-compressing its images.
    Higher quality values result in larger files but better image quality.

    Args:
        input_path (str): Path to the input PDF file.
        output_path (str): Path where the compressed PDF will be saved.
        quality (int): Image compression quality (0-100). Default is 80.
    """
    if not (0 <= quality <= 100):
        logging.error("Error: Image quality must be between 0 and 100.")
        sys.exit(1)

    logging.info(f"Starting lossy compression (quality={quality}) for '{input_path}'...")
    try:
        reader = PdfReader(input_path)
        writer = PdfWriter()

        for page in reader.pages:
            writer.add_page(page)

        # Iterate through pages and images within each page to apply lossy compression
        images_compressed = 0
        for i, page in enumerate(writer.pages):
            logging.debug(f"Processing images on page {i+1}...")
            for img_index, img in enumerate(page.images):
                # Replace the image with a re-compressed version
                # The 'quality' parameter affects the JPEG compression level
                img.replace(img.image, quality=quality)
                images_compressed += 1
        
        if images_compressed == 0:
            logging.warning("No images found for lossy compression. The file size might not change significantly.")

        with open(output_path, "wb") as f:
            writer.write(f)

        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction_percent = ((original_size - compressed_size) / original_size) * 100 if original_size > 0 else 0

        logging.info(f"Lossy compression complete! Saved to '{output_path}'.")
        logging.info(f"Original size: {original_size / (1024*1024):.2f} MB")
        logging.info(f"Compressed size: {compressed_size / (1024*1024):.2f} MB")
        logging.info(f"Size reduction: {reduction_percent:.2f}%")

    except FileNotFoundError:
        logging.error(f"Error: Input file not found at '{input_path}'. Please check the path.")
        sys.exit(1)
    except Exception as e:
        logging.error(f"An unexpected error occurred during lossy compression: {e}", exc_info=True)
        sys.exit(1)

 ---

 ## Command-Line Interface (CLI) Setup

 ```python
 def main():
    parser = argparse.ArgumentParser(
        description="Compress PDF files using lossless or lossy methods.",
        formatter_class=argparse.RawTextHelpFormatter # For better formatting of help message
    )

    # Required arguments
    parser.add_argument("input_file", type=str, help="Path to the input PDF file.")
    parser.add_argument("output_file", type=str, help="Path for the output compressed PDF file.")

    # Optional arguments for compression type and quality
    compression_group = parser.add_mutually_exclusive_group(required=True)
    compression_group.add_argument(
        "-l", "--lossless", action="store_true",
        help="Perform lossless compression (compresses content streams)."
    )
    compression_group.add_argument(
        "-q", "--quality", type=int, choices=range(0, 101), metavar="[0-100]",
        help="Perform lossy compression on images with specified quality (0-100). Lower is smaller file, worse quality."
    )

    args = parser.parse_args()

    # Validate input/output paths to prevent overwriting or non-existent files prematurely
    if not os.path.exists(args.input_file):
        logging.error(f"Input file '{args.input_file}' does not exist.")
        sys.exit(1)
    
    if os.path.abspath(args.input_file) == os.path.abspath(args.output_file):
        logging.error("Input and output file paths cannot be the same. This would overwrite the original file.")
        sys.exit(1)

    if args.lossless:
        compress_pdf_lossless(args.input_file, args.output_file)
    elif args.quality is not None:
        compress_pdf_lossy(args.input_file, args.output_file, args.quality)
    else:
        # This case should ideally not be reached due to mutually_exclusive_group and required=True
        logging.critical("No compression method specified. Use --lossless or --quality.")
        sys.exit(1)

 if __name__ == "__main__":
    main()
	from pypdf import PdfReader, PdfWriter
	import argparse
	import os
	import sys
	import logging

	# Configure basic logging for better user feedback
	logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

	def compress_pdf_lossless(input_path: str, output_path: str) -> None:
	"""
	Applies lossless compression to a PDF file by compressing its content streams.
	This can significantly reduce file size, especially for PDFs with redundant data.

	Args:
	input_path (str): Path to the input PDF file.
	output_path (str): Path where the compressed PDF will be saved.
	"""
	logging.info(f"Starting lossless compression for '{input_path}'...")
	try:
	reader = PdfReader(input_path)
	writer = PdfWriter()

	for page in reader.pages:
	writer.add_page(page)

	# Iterate through pages in the writer object and apply compression
	# Level 9 is the highest compression level (most CPU intensive)
	for i, page in enumerate(writer.pages):
	logging.debug(f"Compressing content streams for page {i+1}...")
	page.compress_content_streams(level=9)

	with open(output_path, "wb") as f:
	writer.write(f)

	original_size = os.path.getsize(input_path)
	compressed_size = os.path.getsize(output_path)
	reduction_percent = ((original_size - compressed_size) / original_size) * 100 if original_size > 0 else 0

	logging.info(f"Lossless compression complete! Saved to '{output_path}'.")
	logging.info(f"Original size: {original_size / (1024*1024):.2f} MB")
	logging.info(f"Compressed size: {compressed_size / (1024*1024):.2f} MB")
	logging.info(f"Size reduction: {reduction_percent:.2f}%")

	except FileNotFoundError:
	logging.error(f"Error: Input file not found at '{input_path}'. Please check the path.")
	sys.exit(1)
	except Exception as e:
	logging.error(f"An unexpected error occurred during lossless compression: {e}", exc_info=True)
	sys.exit(1)

	def compress_pdf_lossy(input_path: str, output_path: str, quality: int = 80) -> None:
	"""
	Applies lossy compression to a PDF file by re-compressing its images.
	Higher quality values result in larger files but better image quality.

	Args:
	input_path (str): Path to the input PDF file.
	output_path (str): Path where the compressed PDF will be saved.
	quality (int): Image compression quality (0-100). Default is 80.
	"""
	if not (0 <= quality <= 100):
	logging.error("Error: Image quality must be between 0 and 100.")
	sys.exit(1)

	logging.info(f"Starting lossy compression (quality={quality}) for '{input_path}'...")
	try:
	reader = PdfReader(input_path)
	writer = PdfWriter()

	for page in reader.pages:
	writer.add_page(page)

	# Iterate through pages and images within each page to apply lossy compression
	images_compressed = 0
	for i, page in enumerate(writer.pages):
	logging.debug(f"Processing images on page {i+1}...")
	for img_index, img in enumerate(page.images):
	# Replace the image with a re-compressed version
	# The 'quality' parameter affects the JPEG compression level
	img.replace(img.image, quality=quality)
	images_compressed += 1

	if images_compressed == 0:
	logging.warning("No images found for lossy compression. The file size might not change significantly.")

	with open(output_path, "wb") as f:
	writer.write(f)

	original_size = os.path.getsize(input_path)
	compressed_size = os.path.getsize(output_path)
	reduction_percent = ((original_size - compressed_size) / original_size) * 100 if original_size > 0 else 0

	logging.info(f"Lossy compression complete! Saved to '{output_path}'.")
	logging.info(f"Original size: {original_size / (1024*1024):.2f} MB")
	logging.info(f"Compressed size: {compressed_size / (1024*1024):.2f} MB")
	logging.info(f"Size reduction: {reduction_percent:.2f}%")

	except FileNotFoundError:
	logging.error(f"Error: Input file not found at '{input_path}'. Please check the path.")
	sys.exit(1)
	except Exception as e:
	logging.error(f"An unexpected error occurred during lossy compression: {e}", exc_info=True)
	sys.exit(1)

	---

	## Command-Line Interface (CLI) Setup

	```python
	def main():
	parser = argparse.ArgumentParser(
	description="Compress PDF files using lossless or lossy methods.",
	formatter_class=argparse.RawTextHelpFormatter # For better formatting of help message
	)

	# Required arguments
	parser.add_argument("input_file", type=str, help="Path to the input PDF file.")
	parser.add_argument("output_file", type=str, help="Path for the output compressed PDF file.")

	# Optional arguments for compression type and quality
	compression_group = parser.add_mutually_exclusive_group(required=True)
	compression_group.add_argument(
	"-l", "--lossless", action="store_true",
	help="Perform lossless compression (compresses content streams)."
	)
	compression_group.add_argument(
	"-q", "--quality", type=int, choices=range(0, 101), metavar="[0-100]",
	help="Perform lossy compression on images with specified quality (0-100). Lower is smaller file, worse quality."
	)

	args = parser.parse_args()

	# Validate input/output paths to prevent overwriting or non-existent files prematurely
	if not os.path.exists(args.input_file):
	logging.error(f"Input file '{args.input_file}' does not exist.")
	sys.exit(1)

	if os.path.abspath(args.input_file) == os.path.abspath(args.output_file):
	logging.error("Input and output file paths cannot be the same. This would overwrite the original file.")
	sys.exit(1)

	if args.lossless:
	compress_pdf_lossless(args.input_file, args.output_file)
	elif args.quality is not None:
	compress_pdf_lossy(args.input_file, args.output_file, args.quality)
	else:
	# This case should ideally not be reached due to mutually_exclusive_group and required=True
	logging.critical("No compression method specified. Use --lossless or --quality.")
	sys.exit(1)

	if __name__ == "__main__":
	main()
No results found