Skip to content

Instantly share code, notes, and snippets.

@v--
Last active September 26, 2024 20:59
Show Gist options
  • Save v--/d87f71cd8736232bd3f66b55aefd21c8 to your computer and use it in GitHub Desktop.
Save v--/d87f71cd8736232bd3f66b55aefd21c8 to your computer and use it in GitHub Desktop.
A script to rescale all pages of a PDF file that are larger than the given reference page. Running it (i.e. `python -m scalepdf`) requires two packages, which can be obtained via `pip install click pypdf`.
import pathlib
import click
from pypdf import PdfReader, PdfWriter
class ScalePdfError(click.ClickException):
pass
@click.argument('dest', type=click.Path(exists=False, resolve_path=True, path_type=pathlib.Path))
@click.argument('src', type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path))
@click.option('-r', '--reference-page', type=click.IntRange(min=1), required=True, help='A page to use as a reference')
@click.option('-v', '--verbose', is_flag=True, help='Display information for every page')
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite the destination file if it exists')
@click.option('-w', '--width', 'use_width', is_flag=True, help='Scale the width to the width of the reference page')
@click.option('-h', '--height', 'use_height', is_flag=True, help='Scale the height to the height of the reference page')
@click.option('-d', '--downscale', is_flag=True, help='Allow downscaling pages')
@click.option('-u', '--upscale', is_flag=True, help='Allow upscaling pages')
@click.command()
def scalepdf(src: pathlib.Path, dest: pathlib.Path, reference_page: int, *, use_width: bool, use_height: bool, upscale: bool, downscale: bool, overwrite: bool, verbose: bool) -> None:
"""Rescale all pages of a PDF file that are larger than the given reference page.
Oftentimes publishers add a title page that is considerably larger than the rest. This script tries to deal with that annoyance.
"""
if dest.exists() and not (overwrite or click.confirm(f'File already exists: {dest.as_posix()}. Overwrite?')):
return
reader = PdfReader(src)
if reference_page >= len(reader.pages):
raise ScalePdfError(f'Page {reference_page} not found')
writer = PdfWriter()
writer.clone_reader_document_root(reader)
_, _, ref_width, ref_height = map(float, reader.pages[reference_page - 1].mediabox)
for i, page in enumerate(writer.pages, start=1):
_, _, width, height = map(float, page.mediabox)
factor = round(min(ref_width / width if use_width else float('inf'), ref_height / height if use_height else float('inf')), 3)
if factor == 1:
if verbose:
click.echo(f'No need to rescale page {i}')
elif factor > 1 and not upscale:
click.echo(f'Page {i} needs to be scaled to {100 * factor:.1f}%, but the --upscale option was not given')
elif factor < 1 and not downscale:
click.echo(f'Page {i} needs to be scaled to {100 * factor:.1f}%, but the --downscale option was not given')
else:
click.echo(f'Scaling page {i} to {100 * factor:.1f}%')
page.scale_by(factor)
with open(dest, 'w'):
writer.write(dest)
if __name__ == '__main__':
scalepdf()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment