Last active
November 28, 2023 09:14
-
-
Save documentprocessing/9aa38bf170ac59f13c84b3d76282e2d8 to your computer and use it in GitHub Desktop.
Add and extract annotations from PDF documents using pypdf library. Check https://products.documentprocessing.com/annotation/python/pypdf/ for more details.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary classes and modules from the pypdf library | |
from pypdf import PdfReader, PdfWriter | |
from pypdf.annotations import Link, Rectangle | |
# Read the existing PDF file named "documentprocessing.pdf" | |
reader = PdfReader("documentprocessing.pdf") | |
# Get the first page of the PDF | |
page = reader.pages[0] | |
# Create a new PDF writer | |
writer = PdfWriter() | |
# Add the read page to the new PDF writer | |
writer.add_page(page) | |
# Create a Link annotation | |
annotation = Link( | |
# Provide the URL | |
url="https://example.com/", | |
#Provide the size and coordinates for the link annotation | |
rect=(50, 600, 200, 650), | |
) | |
# Create a Rectangle annotation | |
annotation2 = Rectangle( | |
rect=(50, 600, 200, 650), | |
) | |
# Add the Link and Rectangle annotations to the first page of the new PDF | |
writer.add_annotation(page_number=0, annotation=annotation) | |
writer.add_annotation(page_number=0, annotation=annotation2) | |
# Write the annotated PDF to a new file named "link-annotated-pdf.pdf" | |
with open("link-annotated-pdf.pdf", "wb") as fp: | |
writer.write(fp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary modules from pypdf library | |
from pypdf import PdfReader, PdfWriter | |
from pypdf.annotations import Rectangle | |
# Read the PDF file using PdfReader | |
reader = PdfReader("documentprocessing.pdf") | |
# Get the first page of the PDF | |
page = reader.pages[0] | |
# Create a new PdfWriter object to write the modified PDF | |
writer = PdfWriter() | |
# Add the page from the original PDF to the new writer | |
writer.add_page(page) | |
# Define the rectangle annotation with coordinates (35, 690, 170, 720) | |
annotation = Rectangle(rect=(35, 690, 170, 720)) | |
# Add the rectangle annotation to the first page of the new PDF | |
writer.add_annotation(page_number=0, annotation=annotation) | |
# Write the modified PDF to a new file ("annotated-pdf.pdf") | |
with open("annotated-pdf.pdf", "wb") as fp: | |
writer.write(fp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary classes and modules from PyPDF library | |
from pypdf import PdfReader, PdfWriter | |
from pypdf.annotations import Text | |
# Open the original PDF file for reading | |
reader = PdfReader("documentprocessing.pdf") | |
# Get the first page of the original PDF | |
page = reader.pages[0] | |
# Create a new PDF writer object | |
writer = PdfWriter() | |
# Add the first page of the original PDF to the new PDF | |
writer.add_page(page) | |
# Create a Text annotation with specified text and rectangle coordinates | |
annotation = Text( | |
text="Hi there,\nWelcome to DocumentProcessing.com", | |
rect=(180, 705, 240, 755) | |
) | |
# Add the annotation to the first page of the new PDF | |
writer.add_annotation(page_number=0, annotation=annotation) | |
# Open the new PDF file for writing in binary mode | |
with open("annotated.pdf", "wb") as fp: | |
# Write the content of the new PDF to the file | |
writer.write(fp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary modules from pypdf library | |
from pypdf import PdfReader | |
# Create a PdfReader object to read the PDF file | |
reader = PdfReader("documentprocessing.pdf") | |
# Iterate through all pages in the PDF | |
for page in reader.pages: | |
# Check if the page contains annotations ("/Annots" key) | |
if "/Annots" in page: | |
# Iterate through each annotation on the page | |
for annot in page["/Annots"]: | |
# Get the annotation object | |
obj = annot.get_object() | |
# Extract relevant information from the annotation object | |
# - "subtype" represents the type of annotation (e.g., text, link) | |
# - "location" represents the rectangle coordinates of the annotation | |
annotation = {"subtype": obj["/Subtype"], "location": obj["/Rect"]} | |
# Print the extracted annotation information | |
print(annotation) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment