retrography · November 20, 2024 14:45
diff --git a/annotex.py b/annotex.py
 #!/usr/bin/env python

 __author__ = 'Mahmood S. Zargar'

 import poppler
 import sys
 import urllib
 import os

 def main():
    if sys.argv.__len__() < 2:
        print 'Input file required. Please mention at least one.'
        print 'Syntax: annotex input_file1.pdf [input_file2.pdf ...]'
        sys.exit(1)

    for file_name in sys.argv[1:]:
        print

        document = poppler.document_new_from_file('file://%s' % \
                                                  urllib.pathname2url(os.path.abspath(file_name)), None)
        print os.path.basename(file_name)
        doc_title = document.get_property('title')
        if doc_title is not None and doc_title != '':
            print '(' + doc_title + ')'
        print '-----\n'

        n_pages = document.get_n_pages()
        all_annots = 0

        for i in range(n_pages):
            page = document.get_page(i)
            annot_mappings = page.get_annot_mapping()
            num_annots = len(annot_mappings)
            if num_annots > 0:
                for annot_mapping in annot_mappings:
                    annot = annot_mapping.annot
                    annot_type = annot.get_annot_type().value_nick
                    annot_type = annot_type[0].upper() + annot_type[1:]
                    if annot_type in ['Underline', 'Highlight', 'Strike-out', 'Squiggly', 'Text', 'Free-text', 'Caret']:
                        all_annots += 1
                        page_no = str(page.get_index() + 1)
                        page_label = page.props.label
                        page_prompt = '[p. ' + page_no + ']'
                        if page_no != page_label:
                            page_prompt = page_prompt + '(' + page_label + ')'
                        page_prompt += ': \n'
                        area = annot_mapping.area.copy()

                        (width, height) = page.get_size()
                        area.x1, area.x2 = area.x2, area.x1
                        area.y1, area.y2 = area.y2, area.y1
                        area.y1 = height - area.y1
                        area.y2 = height - area.y2

                        annot_text = page.get_selected_text("POPPLER_SELECTION_WORD", area).strip()
                        annot_cont = annot.get_contents()
                        if annot_text is not None and annot_text not in ['', page_no, page_label]:
                            print annot_type + ' Text ' + page_prompt + annot_text + '\n'
                        if annot_cont is not None and annot_cont != '':
                            print annot_type + ' Note ' + page_prompt + annot_cont + '\n'

        print '-----\n' + str(all_annots) + " annotation(s) found" + '\n\n'

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python

	__author__ = 'Mahmood S. Zargar'

	import poppler
	import sys
	import urllib
	import os

	def main():
	if sys.argv.__len__() < 2:
	print 'Input file required. Please mention at least one.'
	print 'Syntax: annotex input_file1.pdf [input_file2.pdf ...]'
	sys.exit(1)

	for file_name in sys.argv[1:]:
	print

	document = poppler.document_new_from_file('file://%s' % \
	urllib.pathname2url(os.path.abspath(file_name)), None)
	print os.path.basename(file_name)
	doc_title = document.get_property('title')
	if doc_title is not None and doc_title != '':
	print '(' + doc_title + ')'
	print '-----\n'

	n_pages = document.get_n_pages()
	all_annots = 0

	for i in range(n_pages):
	page = document.get_page(i)
	annot_mappings = page.get_annot_mapping()
	num_annots = len(annot_mappings)
	if num_annots > 0:
	for annot_mapping in annot_mappings:
	annot = annot_mapping.annot
	annot_type = annot.get_annot_type().value_nick
	annot_type = annot_type[0].upper() + annot_type[1:]
	if annot_type in ['Underline', 'Highlight', 'Strike-out', 'Squiggly', 'Text', 'Free-text', 'Caret']:
	all_annots += 1
	page_no = str(page.get_index() + 1)
	page_label = page.props.label
	page_prompt = '[p. ' + page_no + ']'
	if page_no != page_label:
	page_prompt = page_prompt + '(' + page_label + ')'
	page_prompt += ': \n'
	area = annot_mapping.area.copy()

	(width, height) = page.get_size()
	area.x1, area.x2 = area.x2, area.x1
	area.y1, area.y2 = area.y2, area.y1
	area.y1 = height - area.y1
	area.y2 = height - area.y2

	annot_text = page.get_selected_text("POPPLER_SELECTION_WORD", area).strip()
	annot_cont = annot.get_contents()
	if annot_text is not None and annot_text not in ['', page_no, page_label]:
	print annot_type + ' Text ' + page_prompt + annot_text + '\n'
	if annot_cont is not None and annot_cont != '':
	print annot_type + ' Note ' + page_prompt + annot_cont + '\n'

	print '-----\n' + str(all_annots) + " annotation(s) found" + '\n\n'

	if __name__ == "__main__":
	main()