Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save documentprocessing/9aa38bf170ac59f13c84b3d76282e2d8 to your computer and use it in GitHub Desktop.
Save documentprocessing/9aa38bf170ac59f13c84b3d76282e2d8 to your computer and use it in GitHub Desktop.
Add and extract annotations from PDF documents using pypdf library. Check https://products.documentprocessing.com/annotation/python/pypdf/ for more details.
# Import necessary modules from pypdf library
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Rectangle
# Read the PDF file using PdfReader
reader = PdfReader("documentprocessing.pdf")
# Get the first page of the PDF
page = reader.pages[0]
# Create a new PdfWriter object to write the modified PDF
writer = PdfWriter()
# Add the page from the original PDF to the new writer
writer.add_page(page)
# Define the rectangle annotation with coordinates (35, 690, 170, 720)
annotation = Rectangle(rect=(35, 690, 170, 720))
# Add the rectangle annotation to the first page of the new PDF
writer.add_annotation(page_number=0, annotation=annotation)
# Write the modified PDF to a new file ("annotated-pdf.pdf")
with open("annotated-pdf.pdf", "wb") as fp:
writer.write(fp)
# Import necessary classes and modules from PyPDF library
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Text
# Open the original PDF file for reading
reader = PdfReader("documentprocessing.pdf")
# Get the first page of the original PDF
page = reader.pages[0]
# Create a new PDF writer object
writer = PdfWriter()
# Add the first page of the original PDF to the new PDF
writer.add_page(page)
# Create a Text annotation with specified text and rectangle coordinates
annotation = Text(
text="Hi there,\nWelcome to DocumentProcessing.com",
rect=(180, 705, 240, 755)
)
# Add the annotation to the first page of the new PDF
writer.add_annotation(page_number=0, annotation=annotation)
# Open the new PDF file for writing in binary mode
with open("annotated.pdf", "wb") as fp:
# Write the content of the new PDF to the file
writer.write(fp)
# Import necessary modules from pypdf library
from pypdf import PdfReader
# Create a PdfReader object to read the PDF file
reader = PdfReader("documentprocessing.pdf")
# Iterate through all pages in the PDF
for page in reader.pages:
# Check if the page contains annotations ("/Annots" key)
if "/Annots" in page:
# Iterate through each annotation on the page
for annot in page["/Annots"]:
# Get the annotation object
obj = annot.get_object()
# Extract relevant information from the annotation object
# - "subtype" represents the type of annotation (e.g., text, link)
# - "location" represents the rectangle coordinates of the annotation
annotation = {"subtype": obj["/Subtype"], "location": obj["/Rect"]}
# Print the extracted annotation information
print(annotation)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment