Created
June 25, 2018 13:52
-
-
Save aqzlpm11/2cd70b15b69a4a05a37cbbe8053ac91b to your computer and use it in GitHub Desktop.
pdf 重排
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
try: | |
from PyPDF2 import PdfFileReader, PdfFileWriter | |
except ImportError: | |
from pyPdf import PdfFileReader, PdfFileWriter | |
import os | |
def concatenate(input_files, output_file): | |
input_streams = [] | |
output_stream = open(output_file, 'wb') | |
try: | |
for input_file in input_files: | |
input_streams.append(open(input_file, 'rb')) | |
writer = PdfFileWriter() | |
for reader in map(PdfFileReader, input_streams): | |
for n in range(reader.getNumPages()): | |
writer.addPage(reader.getPage(n)) | |
writer.write(output_stream) | |
finally: | |
for f in input_streams: | |
f.close() | |
output_stream.close() | |
def reverse(input_file, output_file): | |
with open(input_file, 'rb') as fin, open(output_file, 'wb') as fout: | |
reader = PdfFileReader(fin) | |
writer = PdfFileWriter() | |
for i in range(reader.getNumPages()-1, -1, -1): | |
writer.addPage(reader.getPage(i)) | |
writer.write(fout) | |
def cross(fileA, fileB, output_file): | |
with open(fileA, 'rb') as fA, open(fileB, 'rb') as fB, open(output_file, 'wb') as fout: | |
reader_A = PdfFileReader(fA) | |
reader_B = PdfFileReader(fB) | |
if reader_A.getNumPages() != reader_B.getNumPages(): | |
raise ValueError("Pages should be equal, now {} != {}".format(reader_A.getNumPages(), reader_B.getNumPages())) | |
writer = PdfFileWriter() | |
for i in range(reader_A.getNumPages()): | |
writer.addPage(reader_A.getPage(i)) | |
writer.addPage(reader_B.getPage(i)) | |
writer.write(fout) | |
def double_scan_pdf_merge(front_side_pdfs, reverse_side_pdfs, output_file): | |
""" 双面扫描的PDF进行合并 | |
Param: | |
input_file_tuples: tuple, or list of tuple. format (front_side_pdf, reverse_side_pdf). | |
If the scan is split into sections, passed in order as list. | |
output_file: the output file | |
""" | |
if type(front_side_pdfs) == str: | |
front_side_pdfs = [front_side_pdfs] | |
if type(reverse_side_pdfs) == str: | |
reverse_side_pdfs = [reverse_side_pdfs] | |
concatenate(front_side_pdfs, 'tmp1.pdf') | |
concatenate(reverse_side_pdfs, 'tmp2.pdf') | |
reverse('tmp2.pdf', 'tmp3.pdf') | |
cross('tmp1.pdf', 'tmp3.pdf', output_file) | |
os.remove('tmp1.pdf') | |
os.remove('tmp2.pdf') | |
os.remove('tmp3.pdf') | |
if __name__ == '__main__': | |
double_scan_pdf_merge('a.pdf', 'b.pdf', 'output.pdf') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment