Created
October 30, 2024 17:01
-
-
Save sskanishk/575b101254d1c7c536970b7a1c6ab993 to your computer and use it in GitHub Desktop.
Highlight specified text strings in a PDF using Python and PyMuPDF.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install Required Library | |
# pip install pymupdf | |
# -------------------------------------------- | |
import fitz # PyMuPDF | |
def highlight_text_in_pdf(pdf_path, output_path, targets): | |
pdf_document = fitz.open(pdf_path) | |
targets = [str(target) for target in targets] | |
for page_num in range(pdf_document.page_count): | |
page = pdf_document[page_num] | |
for target in targets: | |
areas = page.search_for(target) | |
if areas: | |
for area in areas: | |
highlight = page.add_highlight_annot(area) | |
highlight.update() | |
print(f"Highlighted: '{target}' on page {page_num + 1}") | |
else: | |
print(f"Target not found on page {page_num + 1}: '{target}'") | |
pdf_document.save(output_path) | |
pdf_document.close() | |
print(f"Saved highlighted PDF as: {output_path}") | |
targets = ["123456", "Important Note", "RN789012", "2024"] | |
highlight_text_in_pdf("input_file.pdf", "output_highlighted_file.pdf", targets) | |
# ----------------------------------------------- | |
# To run | |
# python/python3 highlight_text_in_pdf.py | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment