Skip to content

Instantly share code, notes, and snippets.

@quantenProjects
Last active February 9, 2019 12:15
Show Gist options
  • Save quantenProjects/7e77ce93aa22eaaeedbeaf35e31f0fd3 to your computer and use it in GitHub Desktop.
Save quantenProjects/7e77ce93aa22eaaeedbeaf35e31f0fd3 to your computer and use it in GitHub Desktop.
combine the site with annotations of many PDFs to one file
import poppler, os.path
import os
filenames = []
handels = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for name in os.listdir('.'):
if name[-4:] == ".pdf":
filenames.append(name)
filenames.sort()
pdfjam_str = "pdftk "
for i in range(0,len(filenames)):
pdfjam_str += str(handels[i]) + "=" + filenames[i] + " "
pdfjam_str += "cat "
for file_id in range(0,len(filenames)):
filename=filenames[file_id]
print(filename)
path = 'file://%s' % os.path.realpath(filename)
doc = poppler.document_new_from_file(path, None)
pages = [doc.get_page(i) for i in range(doc.get_n_pages())]
pages_with_annot = []
for page_no, page in enumerate(pages):
#items = [i.annot.get_contents() for i in page.get_annot_mapping()]
items = page.get_annot_mapping()
for i in items:
if i != None:
#print (i.annot.get_annot_type().value_name)
#if i.annot.get_annot_type().value_name != 'POPPLER_ANNOT_INK' and i.annot.get_annot_type().value_name != 'POPPLER_ANNOT_LINK':
# print(i.annot.get_annot_type().value_name)
if i.annot.get_annot_type().value_name == 'POPPLER_ANNOT_INK':
print(page_no)
pages_with_annot.append(page_no)
break
#items = [i for i in items if i]
#print "page: %s comments: %s " % (page_no + 1, items)
if len(pages_with_annot) > 0:
#pdfjam_str += filename + " " + str(min(pages_with_annot)) + ":" + str(max(pages_with_annot)) + " "
pdfjam_str += handels[file_id] + str(min(pages_with_annot)+1) + "-" + str(max(pages_with_annot)+1) + " "
#print()
pdfjam_str += " output merged.pdf"
print(pdfjam_str)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment