Last active
January 17, 2024 06:11
-
-
Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
Python code for marking regions of interest in an image for OCR
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use this command to install open cv2 | |
# pip install opencv-python | |
# use this command to install PIL | |
# pip install Pillow | |
import cv2 | |
from PIL import Image | |
def mark_region(imagE_path): | |
im = cv2.imread(image_path) | |
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray, (9,9), 0) | |
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30) | |
# Dilate to combine adjacent text contours | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)) | |
dilate = cv2.dilate(thresh, kernel, iterations=4) | |
# Find contours, highlight text areas, and extract ROIs | |
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
line_items_coordinates = [] | |
for c in cnts: | |
area = cv2.contourArea(c) | |
x,y,w,h = cv2.boundingRect(c) | |
if y >= 600 and x <= 1000: | |
if area > 10000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
if y >= 2400 and x<= 2000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
return image, line_items_coordinates |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
if we want to optimize this code:
import cv2
from PIL import Image
from pdf2image import convert_from_path
def mark_region(im):
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 30)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if y >= 600 and x <= 1000 and area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
elif y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates
poppler_path = r 'you should write poppler bin folder path '
pages = convert_from_path("you should write here pdf path", 480, poppler_path=poppler_path)
for i, page in enumerate(pages):
image_name = f"Deneme_{i}.JPEG"
page.save(image_name, "JPEG")
im = cv2.imread(image_name)
marked_image, coordinates = mark_region(im)
cv2.imwrite(f"Marked_{image_name}", marked_image)