-
-
Save bskinn/6f1b769d9ca0338c5056c6878c70be62 to your computer and use it in GitHub Desktop.
# | |
# Helper script to properly merge two one-sided scans of a PDF, | |
# where the odd-numbered pages are scanned in order but the | |
# even-numbered pages are scanned in reverse order. | |
# | |
# Requires PyPDF2 ('pip install PyPDF2') | |
# | |
# Input PDFs should be named the same thing, with _odd and _even as | |
# suffixes to the filename (e.g., doc_odd.pdf and doc_even.pdf). | |
# Pass the base filename into the script, | |
# WITHOUT the .pdf extension (e.g., 'python scanmerge.py doc'). | |
# | |
import itertools as itt | |
import sys | |
import PyPDF2 as PDF | |
def main(): | |
fbase = sys.argv[1] | |
pdf_out = PDF.PdfWriter() | |
with open(fbase + "_odd.pdf", 'rb') as f_odd: | |
with open(fbase + "_even.pdf", 'rb') as f_even: | |
pdf_odd = PDF.PdfReader(f_odd) | |
pdf_even = PDF.PdfReader(f_even) | |
for p in itt.chain.from_iterable( | |
itt.zip_longest( | |
pdf_odd.pages, | |
reversed(pdf_even.pages), | |
) | |
): | |
if p: | |
pdf_out.add_page(p) | |
with open(fbase + ".pdf", 'wb') as f_out: | |
pdf_out.write(f_out) | |
return 0 | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Wrong number of arguments!") | |
sys.exit(1) | |
sys.exit(main()) |
Cannot commit to your repository.
Here is an upgrade to ensure compatibility with PyPDF2 3.0.0.
#
# Helper script to properly merge two one-sided scans of a PDF,
# where the odd-numbered pages are scanned in order but the
# even-numbered pages are scanned in reverse order.
#
# Requires PyPDF2 ('pip install PyPDF2')
#
# Input PDFs should be named the same thing, with _odd and _even as
# suffixes to the filename (e.g., doc_odd.pdf and doc_even.pdf).
# Pass the base filename into the script,
# WITHOUT the .pdf extension (e.g., 'python scanmerge.py doc').
#
import itertools as itt
import sys
import PyPDF2 as PDF
def main():
fbase = sys.argv[1]
pdf_out = PDF.PdfWriter()
with open(fbase + "_odd.pdf", 'rb') as f_odd:
with open(fbase + "_even.pdf", 'rb') as f_even:
pdf_odd = PDF.PdfReader(f_odd)
pdf_even = PDF.PdfReader(f_even)
for p in itt.chain.from_iterable(
itt.zip_longest(
pdf_odd.pages,
reversed(pdf_even.pages),
)
):
if p:
pdf_out.add_page(p)
with open(fbase + ".pdf", 'wb') as f_out:
pdf_out.write(f_out)
return 0
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Wrong number of arguments!")
sys.exit(1)
sys.exit(main())
Glad it's been useful for you!
It looks like the only change is from pdf_out = PDF.PdfFileWriter()
to pdf_out = PDF.PdfWriter()
?
Is this backward-compatible to PyPDF2<3
?
Sorry, only understood now that a gist is not a repository, so I cannot commit.
There's 3 changes of this nature to make it compatible with PyPDF2 v 3.0.1.
- pdf_out = PDF.PdfFileWriter()
+ pdf_out = PDF.PdfWriter()
- pdf_odd = PDF.PdfFileReader(f_odd)
- pdf_even = PDF.PdfFileReader(f_even)
+ pdf_odd = PDF.PdfReader(f_odd)
+ pdf_even = PDF.PdfReader(f_even)
- pdf_out.addPage(p)
+ pdf_out.add_page(p)
Tested: code still works with PyPDF2-2.12.1
Thanks for this script and thanks for the updates @evolunis-ws. It's nice to have a way to do this offline without uploading your PDFs to an online service.
Glad it was helpful, @dprothero!
@evolunis-ws, finally got around to making the compat edits you suggested -- thank you!
Works like a charm, thanks a lot man, I appreciate your effort.
Works like a charm!