Skip to content

Instantly share code, notes, and snippets.

@lukas2511
Created March 25, 2018 14:36
Show Gist options
  • Save lukas2511/850425d9da51c45b9aeab4d989015dc8 to your computer and use it in GitHub Desktop.
Save lukas2511/850425d9da51c45b9aeab4d989015dc8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import collections
import PyPDF2
def main():
argp = argparse.ArgumentParser(description="Merge PDF presentation slide overlay foo")
argp.add_argument('infile', type=str, metavar="infile.pdf", help="source pdf")
argp.add_argument('outfile', type=str, metavar="outfile.pdf", help="target pdf")
args = argp.parse_args()
reader = PyPDF2.PdfFileReader(open(args.infile, "rb"))
print("Document Title: %s" % reader.getDocumentInfo().title)
print("Input file has %d slides" % reader.getNumPages())
# Build dictionary of page number labels
nums = reader.trailer["/Root"]["/PageLabels"]["/Nums"]
numdict = dict(list((nums[i], int(nums[i+1]['/P'])) for i in range(0, len(nums), 2)))
# Find last page for each page-number-change
pages = collections.OrderedDict()
for i in range(0, reader.getNumPages()):
j = i
while not j in numdict:
j-=1
pages[j] = i
# Write new pdf
writer = PyPDF2.PdfFileWriter()
writer.addMetadata(reader.getDocumentInfo())
for page in pages.values():
writer.addPage(reader.getPage(page))
print("Output file has %d slides" % writer.getNumPages())
oneoutputfiletorulethemall = open(args.outfile, "wb")
writer.write(oneoutputfiletorulethemall)
oneoutputfiletorulethemall.close()
print("All done :)")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment