-
-
Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
# use this command to install open cv2 | |
# pip install opencv-python | |
# use this command to install PIL | |
# pip install Pillow | |
import cv2 | |
from PIL import Image | |
def mark_region(imagE_path): | |
im = cv2.imread(image_path) | |
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray, (9,9), 0) | |
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30) | |
# Dilate to combine adjacent text contours | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)) | |
dilate = cv2.dilate(thresh, kernel, iterations=4) | |
# Find contours, highlight text areas, and extract ROIs | |
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
line_items_coordinates = [] | |
for c in cnts: | |
area = cv2.contourArea(c) | |
x,y,w,h = cv2.boundingRect(c) | |
if y >= 600 and x <= 1000: | |
if area > 10000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
if y >= 2400 and x<= 2000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
return image, line_items_coordinates |
if we want to optimize this code:
import cv2
from PIL import Image
from pdf2image import convert_from_path
def mark_region(im):
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 30)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if y >= 600 and x <= 1000 and area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
elif y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates
poppler_path = r 'you should write poppler bin folder path '
pages = convert_from_path("you should write here pdf path", 480, poppler_path=poppler_path)
for i, page in enumerate(pages):
image_name = f"Deneme_{i}.JPEG"
page.save(image_name, "JPEG")
im = cv2.imread(image_name)
marked_image, coordinates = mark_region(im)
cv2.imwrite(f"Marked_{image_name}", marked_image)
Thanks a lot man, Before I was only getting few area marked but after revamp and using your code it's marking every single area
Thank you once again. Keep up the good work