Skip to content

Instantly share code, notes, and snippets.

@aqzlpm11
Created June 25, 2018 13:52
Show Gist options
  • Save aqzlpm11/2cd70b15b69a4a05a37cbbe8053ac91b to your computer and use it in GitHub Desktop.
Save aqzlpm11/2cd70b15b69a4a05a37cbbe8053ac91b to your computer and use it in GitHub Desktop.
pdf 重排
#!/usr/bin/env python
import sys
try:
from PyPDF2 import PdfFileReader, PdfFileWriter
except ImportError:
from pyPdf import PdfFileReader, PdfFileWriter
import os
def concatenate(input_files, output_file):
input_streams = []
output_stream = open(output_file, 'wb')
try:
for input_file in input_files:
input_streams.append(open(input_file, 'rb'))
writer = PdfFileWriter()
for reader in map(PdfFileReader, input_streams):
for n in range(reader.getNumPages()):
writer.addPage(reader.getPage(n))
writer.write(output_stream)
finally:
for f in input_streams:
f.close()
output_stream.close()
def reverse(input_file, output_file):
with open(input_file, 'rb') as fin, open(output_file, 'wb') as fout:
reader = PdfFileReader(fin)
writer = PdfFileWriter()
for i in range(reader.getNumPages()-1, -1, -1):
writer.addPage(reader.getPage(i))
writer.write(fout)
def cross(fileA, fileB, output_file):
with open(fileA, 'rb') as fA, open(fileB, 'rb') as fB, open(output_file, 'wb') as fout:
reader_A = PdfFileReader(fA)
reader_B = PdfFileReader(fB)
if reader_A.getNumPages() != reader_B.getNumPages():
raise ValueError("Pages should be equal, now {} != {}".format(reader_A.getNumPages(), reader_B.getNumPages()))
writer = PdfFileWriter()
for i in range(reader_A.getNumPages()):
writer.addPage(reader_A.getPage(i))
writer.addPage(reader_B.getPage(i))
writer.write(fout)
def double_scan_pdf_merge(front_side_pdfs, reverse_side_pdfs, output_file):
""" 双面扫描的PDF进行合并
Param:
input_file_tuples: tuple, or list of tuple. format (front_side_pdf, reverse_side_pdf).
If the scan is split into sections, passed in order as list.
output_file: the output file
"""
if type(front_side_pdfs) == str:
front_side_pdfs = [front_side_pdfs]
if type(reverse_side_pdfs) == str:
reverse_side_pdfs = [reverse_side_pdfs]
concatenate(front_side_pdfs, 'tmp1.pdf')
concatenate(reverse_side_pdfs, 'tmp2.pdf')
reverse('tmp2.pdf', 'tmp3.pdf')
cross('tmp1.pdf', 'tmp3.pdf', output_file)
os.remove('tmp1.pdf')
os.remove('tmp2.pdf')
os.remove('tmp3.pdf')
if __name__ == '__main__':
double_scan_pdf_merge('a.pdf', 'b.pdf', 'output.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment