Created
March 25, 2018 14:36
-
-
Save lukas2511/850425d9da51c45b9aeab4d989015dc8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import collections | |
import PyPDF2 | |
def main(): | |
argp = argparse.ArgumentParser(description="Merge PDF presentation slide overlay foo") | |
argp.add_argument('infile', type=str, metavar="infile.pdf", help="source pdf") | |
argp.add_argument('outfile', type=str, metavar="outfile.pdf", help="target pdf") | |
args = argp.parse_args() | |
reader = PyPDF2.PdfFileReader(open(args.infile, "rb")) | |
print("Document Title: %s" % reader.getDocumentInfo().title) | |
print("Input file has %d slides" % reader.getNumPages()) | |
# Build dictionary of page number labels | |
nums = reader.trailer["/Root"]["/PageLabels"]["/Nums"] | |
numdict = dict(list((nums[i], int(nums[i+1]['/P'])) for i in range(0, len(nums), 2))) | |
# Find last page for each page-number-change | |
pages = collections.OrderedDict() | |
for i in range(0, reader.getNumPages()): | |
j = i | |
while not j in numdict: | |
j-=1 | |
pages[j] = i | |
# Write new pdf | |
writer = PyPDF2.PdfFileWriter() | |
writer.addMetadata(reader.getDocumentInfo()) | |
for page in pages.values(): | |
writer.addPage(reader.getPage(page)) | |
print("Output file has %d slides" % writer.getNumPages()) | |
oneoutputfiletorulethemall = open(args.outfile, "wb") | |
writer.write(oneoutputfiletorulethemall) | |
oneoutputfiletorulethemall.close() | |
print("All done :)") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment