-
-
Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
# use this command to install open cv2 | |
# pip install opencv-python | |
# use this command to install PIL | |
# pip install Pillow | |
import cv2 | |
from PIL import Image | |
def mark_region(imagE_path): | |
im = cv2.imread(image_path) | |
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray, (9,9), 0) | |
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30) | |
# Dilate to combine adjacent text contours | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)) | |
dilate = cv2.dilate(thresh, kernel, iterations=4) | |
# Find contours, highlight text areas, and extract ROIs | |
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
line_items_coordinates = [] | |
for c in cnts: | |
area = cv2.contourArea(c) | |
x,y,w,h = cv2.boundingRect(c) | |
if y >= 600 and x <= 1000: | |
if area > 10000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
if y >= 2400 and x<= 2000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
return image, line_items_coordinates |
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just callpython3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post themark_regions()
code can be used the following way:# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image) plt.savefig("image-with-regions.png") # <--- added this to output an imagewhere obviously you have to edit
FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).Of course thank you for the time spent on “improving” and “correcting” the original author’s scripts. The way I work and pretty everyone in my field is if you’re going to do the work; do it well.
“improved” details would be helpful to know what someone is getting them self into. A simple one line stating to expect modifying the code because it is not complete would help users (or myself) know that it needs to be modified to work.
I’ll try this out again; see if it works.
EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.
Thank you for the constructive feedback. My article on medium was particularly written for users exploring the document intelligence domain. As mentioned by @jlumbroso, It was just an introduction to a concept which I thought might be worth sharing with the community. I don't know about your ways of working but for me if Its all about getting the concept first and then jumping on to the code.
The gists here on Github were specifically made as a supporting material for the original article. Again @jlumbroso thank you for helping out with the spell checks and typos.
Hope this answers your questions about the article intent and typos in code.
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just call
python3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post themark_regions()
code can be used the following way:# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image) plt.savefig("image-with-regions.png") # <--- added this to output an imagewhere obviously you have to edit
FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).
@jlumbroso - Thank you for the kind words 👍
Dear @akash-ch2812 or @jlumbroso,
I'm having some trouble using this code on some PDFs I'm working on, I thought either you might have some ideas on what's going wrong.
I have a single PDF called, "DNR_WFH.pdf". I was able to use the first section of code below to split the PDF into separate pages and name them accordingly.
from pdf2image import convert_from_path
pdfs = r"C:\Users\mhiebing\Desktop\DNR_WFH.pdf"
pages = convert_from_path(pdfs, 350)
i = 1
for page in pages:
image_name = "Page_" + str(i) + ".jpg"
page.save(image_name, "JPEG")
i = i+1
For the second section, I'm only looking at the first JPEG to keep things simple. When I call the mark_region(image_path)
function nothing happens. Is there supposed to be a window where I outline the boxes we want to extract text from? Here's what I have for the second section:
import cv2
import matplotlib.pyplot as plt
def mark_region(image_path):
#define the mark_region method
FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image
image = cv2.imread(image_path)
# define threshold of regions to ignore
THRESHOLD_REGION_IGNORE = 40
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
continue
image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (x+w, y+h)])
return image, line_items_coordinates
mark_region
Thank you for putting up the article and supplying the code!
@Matthew-Hiebing
I'm just trying to adapt the existing fantastic code and I found your question.
This code snippet of your code is inside the function mark_region but should be outside. This because you are calling the function mark_region(FILENAME)
and pass a FILENAME:
#define the mark_region method
FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image
Thank you so much for your work.
There were problems with opencv-python-4.5.5.62
.
Use opencv-python==4.1.2.30
.
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE: continue image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3) line_items_coordinates.append([(x,y), (x+w, y+h)])
Thanks a lot man, Before I was only getting few area marked but after revamp and using your code it's marking every single area
Thank you once again. Keep up the good work
if we want to optimize this code:
import cv2
from PIL import Image
from pdf2image import convert_from_path
def mark_region(im):
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 30)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if y >= 600 and x <= 1000 and area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
elif y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates
poppler_path = r 'you should write poppler bin folder path '
pages = convert_from_path("you should write here pdf path", 480, poppler_path=poppler_path)
for i, page in enumerate(pages):
image_name = f"Deneme_{i}.JPEG"
page.save(image_name, "JPEG")
im = cv2.imread(image_name)
marked_image, coordinates = mark_region(im)
cv2.imwrite(f"Marked_{image_name}", marked_image)
You're welcome.