Created
May 13, 2023 13:19
-
-
Save boki1/c0a28e94c1806bc1ff94fdc2ea026157 to your computer and use it in GitHub Desktop.
Given a PDF file which contains scanned "split-page" views of each pair of pages, this script helps you reformat the document into one which contains each page as a separate one.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MIT License, Kristiyan Stoimenov 2023 | |
# | |
# Use `--help` for a nice overview of the options. | |
import PyPDF2 | |
import argparse | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument("-o", "--output_file", help="output file") | |
argparser.add_argument("-i", "--input_file", help="input file") | |
argparser.add_argument('-s', '--skip-first-page', action='store_true', help='skip first page of the input file', default=False) | |
args = argparser.parse_args() | |
pdf_file = open(args.input_file, 'rb') | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
pdf_writer = PyPDF2.PdfWriter() | |
for index, page in enumerate(pdf_reader.pages): | |
(page_width, page_height) = page.mediabox.upper_right | |
page.mediabox.right = int(page_width / 2) | |
page.mediabox.left = 0 | |
if not (args.skip_first_page and index == 0): | |
pdf_writer.add_page(page) | |
page.mediabox.left = int(page_width / 2) | |
page.mediabox.right = int(page_width) | |
pdf_writer.add_page(page) | |
with open(args.output_file, 'wb') as output_file: | |
pdf_writer.write(output_file) | |
pdf_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment