Skip to content

Instantly share code, notes, and snippets.

@justpeanuts
Forked from Samathy/dumppdfcomments.py
Created April 4, 2021 13:38

Revisions

  1. @Samathy Samathy created this gist Jan 5, 2018.
    40 changes: 40 additions & 0 deletions dumppdfcomments.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    import popplerqt4
    import sys
    import PyQt4


    def main():

    doc = popplerqt4.Poppler.Document.load(sys.argv[1])
    total_annotations = 0
    for i in range(doc.numPages()):
    #print("========= PAGE {} =========".format(i+1))
    page = doc.page(i)
    annotations = page.annotations()
    (pwidth, pheight) = (page.pageSize().width(), page.pageSize().height())
    if len(annotations) > 0:
    for annotation in annotations:
    if isinstance(annotation, popplerqt4.Poppler.Annotation):
    total_annotations += 1
    if(isinstance(annotation, popplerqt4.Poppler.HighlightAnnotation)):
    quads = annotation.highlightQuads()
    txt = ""
    for quad in quads:
    rect = (quad.points[0].x() * pwidth,
    quad.points[0].y() * pheight,
    quad.points[2].x() * pwidth,
    quad.points[2].y() * pheight)
    bdy = PyQt4.QtCore.QRectF()
    bdy.setCoords(*rect)
    txt = txt + str(page.text(bdy)) + ' '

    #print("========= ANNOTATION =========")
    print(txt)

    if total_annotations > 0:
    print (str(total_annotations) + " annotation(s) found")
    else:
    print ("no annotations found")

    if __name__ == "__main__":
    main()