-
-
Save lpillmann/d76eb4f4eea0320bb35dcd1b2a4575ee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
'''Crop an image to just the portions containing text. | |
Usage: | |
./crop_morphology.py path/to/image.jpg | |
This will place the cropped image in path/to/image.crop.png. | |
For details on the methodology, see | |
http://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html | |
Script created by Dan Vanderkam (https://github.com/danvk) | |
Adapted to Python 3 by Lui Pillmann (https://github.com/luipillmann) | |
''' | |
import glob | |
import os | |
import random | |
import sys | |
import random | |
import math | |
import json | |
from collections import defaultdict | |
import cv2 | |
from PIL import Image, ImageDraw | |
import numpy as np | |
from scipy.ndimage.filters import rank_filter | |
def dilate(ary, N, iterations): | |
"""Dilate using an NxN '+' sign shape. ary is np.uint8.""" | |
kernel = np.zeros((N,N), dtype=np.uint8) | |
kernel[(N-1)//2,:] = 1 # Bug solved with // (integer division) | |
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations) | |
kernel = np.zeros((N,N), dtype=np.uint8) | |
kernel[:,(N-1)//2] = 1 # Bug solved with // (integer division) | |
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations) | |
return dilated_image | |
def props_for_contours(contours, ary): | |
"""Calculate bounding box & the number of set pixels for each contour.""" | |
c_info = [] | |
for c in contours: | |
x,y,w,h = cv2.boundingRect(c) | |
c_im = np.zeros(ary.shape) | |
cv2.drawContours(c_im, [c], 0, 255, -1) | |
c_info.append({ | |
'x1': x, | |
'y1': y, | |
'x2': x + w - 1, | |
'y2': y + h - 1, | |
'sum': np.sum(ary * (c_im > 0))/255 | |
}) | |
return c_info | |
def union_crops(crop1, crop2): | |
"""Union two (x1, y1, x2, y2) rects.""" | |
x11, y11, x21, y21 = crop1 | |
x12, y12, x22, y22 = crop2 | |
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22) | |
def intersect_crops(crop1, crop2): | |
x11, y11, x21, y21 = crop1 | |
x12, y12, x22, y22 = crop2 | |
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22) | |
def crop_area(crop): | |
x1, y1, x2, y2 = crop | |
return max(0, x2 - x1) * max(0, y2 - y1) | |
def find_border_components(contours, ary): | |
borders = [] | |
area = ary.shape[0] * ary.shape[1] | |
for i, c in enumerate(contours): | |
x,y,w,h = cv2.boundingRect(c) | |
if w * h > 0.5 * area: | |
borders.append((i, x, y, x + w - 1, y + h - 1)) | |
return borders | |
def angle_from_right(deg): | |
return min(deg % 90, 90 - (deg % 90)) | |
def remove_border(contour, ary): | |
"""Remove everything outside a border contour.""" | |
# Use a rotated rectangle (should be a good approximation of a border). | |
# If it's far from a right angle, it's probably two sides of a border and | |
# we should use the bounding box instead. | |
c_im = np.zeros(ary.shape) | |
r = cv2.minAreaRect(contour) | |
degs = r[2] | |
if angle_from_right(degs) <= 10.0: | |
box = cv2.boxPoints(r) | |
box = np.int0(box) | |
cv2.drawContours(c_im, [box], 0, 255, -1) | |
cv2.drawContours(c_im, [box], 0, 0, 4) | |
else: | |
x1, y1, x2, y2 = cv2.boundingRect(contour) | |
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1) | |
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4) | |
return np.minimum(c_im, ary) | |
def find_components(edges, max_components=16): | |
"""Dilate the image until there are just a few connected components. | |
Returns contours for these components.""" | |
# Perform increasingly aggressive dilation until there are just a few | |
# connected components. | |
count = 21 | |
dilation = 5 | |
n = 1 | |
while count > 16: | |
n += 1 | |
dilated_image = dilate(edges, N=3, iterations=n) | |
dilated_image = np.uint8(dilated_image) | |
_, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
count = len(contours) | |
#print dilation | |
#Image.fromarray(edges).show() | |
#Image.fromarray(255 * dilated_image).show() | |
return contours | |
def find_optimal_components_subset(contours, edges): | |
"""Find a crop which strikes a good balance of coverage/compactness. | |
Returns an (x1, y1, x2, y2) tuple. | |
""" | |
c_info = props_for_contours(contours, edges) | |
c_info.sort(key=lambda x: -x['sum']) | |
total = np.sum(edges) / 255 | |
area = edges.shape[0] * edges.shape[1] | |
c = c_info[0] | |
del c_info[0] | |
this_crop = c['x1'], c['y1'], c['x2'], c['y2'] | |
crop = this_crop | |
covered_sum = c['sum'] | |
while covered_sum < total: | |
changed = False | |
recall = 1.0 * covered_sum / total | |
prec = 1 - 1.0 * crop_area(crop) / area | |
f1 = 2 * (prec * recall / (prec + recall)) | |
#print '----' | |
for i, c in enumerate(c_info): | |
this_crop = c['x1'], c['y1'], c['x2'], c['y2'] | |
new_crop = union_crops(crop, this_crop) | |
new_sum = covered_sum + c['sum'] | |
new_recall = 1.0 * new_sum / total | |
new_prec = 1 - 1.0 * crop_area(new_crop) / area | |
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall) | |
# Add this crop if it improves f1 score, | |
# _or_ it adds 25% of the remaining pixels for <15% crop expansion. | |
# ^^^ very ad-hoc! make this smoother | |
remaining_frac = c['sum'] / (total - covered_sum) | |
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1 | |
if new_f1 > f1 or ( | |
remaining_frac > 0.25 and new_area_frac < 0.15): | |
print('%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % ( | |
i, covered_sum, new_sum, total, remaining_frac, | |
crop_area(crop), crop_area(new_crop), area, new_area_frac, | |
f1, new_f1)) | |
crop = new_crop | |
covered_sum = new_sum | |
del c_info[i] | |
changed = True | |
break | |
if not changed: | |
break | |
return crop | |
def pad_crop(crop, contours, edges, border_contour, pad_px=15): | |
"""Slightly expand the crop to get full contours. | |
This will expand to include any contours it currently intersects, but will | |
not expand past a border. | |
""" | |
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1] | |
if border_contour is not None and len(border_contour) > 0: | |
c = props_for_contours([border_contour], edges)[0] | |
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5 | |
def crop_in_border(crop): | |
x1, y1, x2, y2 = crop | |
x1 = max(x1 - pad_px, bx1) | |
y1 = max(y1 - pad_px, by1) | |
x2 = min(x2 + pad_px, bx2) | |
y2 = min(y2 + pad_px, by2) | |
return crop | |
crop = crop_in_border(crop) | |
c_info = props_for_contours(contours, edges) | |
changed = False | |
for c in c_info: | |
this_crop = c['x1'], c['y1'], c['x2'], c['y2'] | |
this_area = crop_area(this_crop) | |
int_area = crop_area(intersect_crops(crop, this_crop)) | |
new_crop = crop_in_border(union_crops(crop, this_crop)) | |
if 0 < int_area < this_area and crop != new_crop: | |
print('%s -> %s' % (str(crop), str(new_crop))) | |
changed = True | |
crop = new_crop | |
if changed: | |
return pad_crop(crop, contours, edges, border_contour, pad_px) | |
else: | |
return crop | |
def downscale_image(im, max_dim=2048): | |
"""Shrink im until its longest dimension is <= max_dim. | |
Returns new_image, scale (where scale <= 1). | |
""" | |
a, b = im.size | |
if max(a, b) <= max_dim: | |
return 1.0, im | |
scale = 1.0 * max_dim / max(a, b) | |
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS) | |
return scale, new_im | |
def process_image(path, out_path): | |
orig_im = Image.open(path) | |
scale, im = downscale_image(orig_im) | |
edges = cv2.Canny(np.asarray(im), 100, 200) | |
# TODO: dilate image _before_ finding a border. This is crazy sensitive! | |
_, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
borders = find_border_components(contours, edges) | |
borders.sort(key=lambda i_x1_y1_x2_y2: (i_x1_y1_x2_y2[3] - i_x1_y1_x2_y2[1]) * (i_x1_y1_x2_y2[4] - i_x1_y1_x2_y2[2])) | |
border_contour = None | |
if len(borders): | |
border_contour = contours[borders[0][0]] | |
edges = remove_border(border_contour, edges) | |
edges = 255 * (edges > 0).astype(np.uint8) | |
# Remove ~1px borders using a rank filter. | |
maxed_rows = rank_filter(edges, -4, size=(1, 20)) | |
maxed_cols = rank_filter(edges, -4, size=(20, 1)) | |
debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols) | |
edges = debordered | |
contours = find_components(edges) | |
if len(contours) == 0: | |
print('%s -> (no text!)' % path) | |
return | |
crop = find_optimal_components_subset(contours, edges) | |
crop = pad_crop(crop, contours, edges, border_contour) | |
crop = [int(x / scale) for x in crop] # upscale to the original image size. | |
#draw = ImageDraw.Draw(im) | |
#c_info = props_for_contours(contours, edges) | |
#for c in c_info: | |
# this_crop = c['x1'], c['y1'], c['x2'], c['y2'] | |
# draw.rectangle(this_crop, outline='blue') | |
#draw.rectangle(crop, outline='red') | |
#im.save(out_path) | |
#draw.text((50, 50), path, fill='red') | |
#orig_im.save(out_path) | |
#im.show() | |
text_im = orig_im.crop(crop) | |
text_im.save(out_path) | |
print('%s -> %s' % (path, out_path)) | |
if __name__ == '__main__': | |
if len(sys.argv) == 2 and '*' in sys.argv[1]: | |
files = glob.glob(sys.argv[1]) | |
random.shuffle(files) | |
else: | |
files = sys.argv[1:] | |
for path in files: | |
out_path = path.replace('.jpg', '.crop.png') | |
#out_path = path.replace('.png', '.crop.png') # .png as input | |
if os.path.exists(out_path): continue | |
try: | |
process_image(path, out_path) | |
except Exception as e: | |
print('%s %s' % (path, e)) |
same error here "not enough values to unpack"
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # fails with error "not enough values to unpack (expected 3, got 2)"
Change it to:
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
Thank you, Deepak!!
Hi Lui, Ive been implementing the code for some time now, I always get borders=[] after every run and the file in outpath is saved as the inpath file, without any contouring. Contours and Edges are both populated, when I run find_border_components(contours, edges) in the terminal, I get []. Im debugging as dont use Ubuntu and not sure how to run bash files.
permission denied is my error
@ srikanthsampathi
I hope this answer is still valuable to you. If you are using an Ubuntu system, you need to use the following commands in succession for the bash to work:
sudo chmod +x crop_morphology.py
./crop_morphology.py image_name.png
Same error as @faridelnasire
not enough values to unpack (expected 3, got 2)