Last active
September 26, 2024 20:59
-
-
Save v--/d87f71cd8736232bd3f66b55aefd21c8 to your computer and use it in GitHub Desktop.
A script to rescale all pages of a PDF file that are larger than the given reference page. Running it (i.e. `python -m scalepdf`) requires two packages, which can be obtained via `pip install click pypdf`.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import click | |
from pypdf import PdfReader, PdfWriter | |
class ScalePdfError(click.ClickException): | |
pass | |
@click.argument('dest', type=click.Path(exists=False, resolve_path=True, path_type=pathlib.Path)) | |
@click.argument('src', type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True, path_type=pathlib.Path)) | |
@click.option('-r', '--reference-page', type=click.IntRange(min=1), required=True, help='A page to use as a reference') | |
@click.option('-v', '--verbose', is_flag=True, help='Display information for every page') | |
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite the destination file if it exists') | |
@click.option('-w', '--width', 'use_width', is_flag=True, help='Scale the width to the width of the reference page') | |
@click.option('-h', '--height', 'use_height', is_flag=True, help='Scale the height to the height of the reference page') | |
@click.option('-d', '--downscale', is_flag=True, help='Allow downscaling pages') | |
@click.option('-u', '--upscale', is_flag=True, help='Allow upscaling pages') | |
@click.command() | |
def scalepdf(src: pathlib.Path, dest: pathlib.Path, reference_page: int, *, use_width: bool, use_height: bool, upscale: bool, downscale: bool, overwrite: bool, verbose: bool) -> None: | |
"""Rescale all pages of a PDF file that are larger than the given reference page. | |
Oftentimes publishers add a title page that is considerably larger than the rest. This script tries to deal with that annoyance. | |
""" | |
if dest.exists() and not (overwrite or click.confirm(f'File already exists: {dest.as_posix()}. Overwrite?')): | |
return | |
reader = PdfReader(src) | |
if reference_page >= len(reader.pages): | |
raise ScalePdfError(f'Page {reference_page} not found') | |
writer = PdfWriter() | |
writer.clone_reader_document_root(reader) | |
_, _, ref_width, ref_height = map(float, reader.pages[reference_page - 1].mediabox) | |
for i, page in enumerate(writer.pages, start=1): | |
_, _, width, height = map(float, page.mediabox) | |
factor = round(min(ref_width / width if use_width else float('inf'), ref_height / height if use_height else float('inf')), 3) | |
if factor == 1: | |
if verbose: | |
click.echo(f'No need to rescale page {i}') | |
elif factor > 1 and not upscale: | |
click.echo(f'Page {i} needs to be scaled to {100 * factor:.1f}%, but the --upscale option was not given') | |
elif factor < 1 and not downscale: | |
click.echo(f'Page {i} needs to be scaled to {100 * factor:.1f}%, but the --downscale option was not given') | |
else: | |
click.echo(f'Scaling page {i} to {100 * factor:.1f}%') | |
page.scale_by(factor) | |
with open(dest, 'w'): | |
writer.write(dest) | |
if __name__ == '__main__': | |
scalepdf() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment