lizettepreiss · January 20, 2023 03:59
diff --git a/FDFParser.py b/FDFParser.py

 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdftypes import resolve1

 # I exported the comments I had made in an Adobe Reader DC document to f:temp/stn.fdf.
 # Now I wanted to access those comments outside of the Adobe Reader. Here is how I extracted the comments.

 fdf_file = open("F:/temp/stn.fdf", 'rb')

 parser = PDFParser(fdf_file)
 doc = PDFDocument(parser)

 # Note re the next line in this code:
 # You might need to put a breakpoint here and actually look at the 'doc.catalog' variable to see what
 # the catalog value is and replace the one below with yours. 'FDF' and 'Annots' were what they were in my case,
 # but I've seen other example source code online of how to parse FDF files that had different catalog values.
 # I have no idea whether the catalog values vary between different PDF readers and even between
 # versions.
 # The fdf file I used in this example originated from exporting my comments I made in a PDF when using
 # Adobe Reader DC version 2020.006.20042


 fields = resolve1(doc.catalog['FDF'])['Annots']

 for i in fields:
    field = resolve1(i)

    # Note re the next line in this code:
    # You might need to put a breakpoint here and actually look at the 'field' variable to see what
    # the field names are that you want to extract. 'Page' and 'Content' were what they were in my case,
    # but I've seen other example source code online showing how to parse FDF files that had different
    # field names. I have no idea whether the field names vary between different PDF readers and even between
    # versions.
    # The fdf file I used in this example originated from exporting my comments I made in a PDF when using
    # Adobe Reader DC version 2020.006.20042

    page, b_contents = field.get('Page'), field.get('Contents')

    if b_contents is not None:
        try:
            contents = b_contents.decode()
        except Exception as e:
            pass

        print("The page number where the comment was made is " + str(page))
        print("The contents of your comment is " + contents)

 fdf_file.close()

 # I then wrote these values to a .xlsx file so that I could use them elsewhere 
 # (Writing to .xlsx will follow in another gist in due course).

	from pdfminer.pdfparser import PDFParser
	from pdfminer.pdfdocument import PDFDocument
	from pdfminer.pdftypes import resolve1

	# I exported the comments I had made in an Adobe Reader DC document to f:temp/stn.fdf.
	# Now I wanted to access those comments outside of the Adobe Reader. Here is how I extracted the comments.

	fdf_file = open("F:/temp/stn.fdf", 'rb')

	parser = PDFParser(fdf_file)
	doc = PDFDocument(parser)

	# Note re the next line in this code:
	# You might need to put a breakpoint here and actually look at the 'doc.catalog' variable to see what
	# the catalog value is and replace the one below with yours. 'FDF' and 'Annots' were what they were in my case,
	# but I've seen other example source code online of how to parse FDF files that had different catalog values.
	# I have no idea whether the catalog values vary between different PDF readers and even between
	# versions.
	# The fdf file I used in this example originated from exporting my comments I made in a PDF when using
	# Adobe Reader DC version 2020.006.20042


	fields = resolve1(doc.catalog['FDF'])['Annots']

	for i in fields:
	field = resolve1(i)

	# Note re the next line in this code:
	# You might need to put a breakpoint here and actually look at the 'field' variable to see what
	# the field names are that you want to extract. 'Page' and 'Content' were what they were in my case,
	# but I've seen other example source code online showing how to parse FDF files that had different
	# field names. I have no idea whether the field names vary between different PDF readers and even between
	# versions.
	# The fdf file I used in this example originated from exporting my comments I made in a PDF when using
	# Adobe Reader DC version 2020.006.20042

	page, b_contents = field.get('Page'), field.get('Contents')

	if b_contents is not None:
	try:
	contents = b_contents.decode()
	except Exception as e:
	pass

	print("The page number where the comment was made is " + str(page))
	print("The contents of your comment is " + contents)

	fdf_file.close()

	# I then wrote these values to a .xlsx file so that I could use them elsewhere
	# (Writing to .xlsx will follow in another gist in due course).
No results found