Last active
March 22, 2024 10:58
-
-
Save satish860/e1813cf5e227b64be50ba42ca0764c58 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.pagesizes import letter | |
# Load the OCR results from the JSON file | |
with open('results.json') as f: | |
ocr_results = json.load(f) | |
# Create a new PDF document | |
pdf_filename = 'output.pdf' | |
c = canvas.Canvas(pdf_filename, pagesize=letter) | |
# Iterate over each page in the OCR results | |
for filename, pages in ocr_results.items(): | |
for page_data in pages: | |
# Create a new page in the PDF | |
c.setFont("Helvetica", 12) | |
# Iterate over the bounding boxes and draw the text | |
for bbox_data in page_data['bboxes']: | |
bbox = bbox_data['bbox'] | |
text = bbox_data['text'] | |
x, y = bbox[0], bbox[1] | |
c.drawString(x, y, text) | |
# Optionally, draw vertical and horizontal lines | |
for line_data in page_data['vertical_lines']: | |
line = line_data['bbox'] | |
c.line(line[0], line[1], line[0], line[3]) | |
for line_data in page_data['horizontal_lines']: | |
line = line_data['bbox'] | |
c.line(line[0], line[1], line[2], line[1]) | |
# Save the page and start a new one | |
c.showPage() | |
# Save the PDF document | |
c.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment