Skip to content

Instantly share code, notes, and snippets.

@uzl
Created February 18, 2020 07:08
Show Gist options
  • Save uzl/b7c5360561b5bc3a01e02e8eea863534 to your computer and use it in GitHub Desktop.
Save uzl/b7c5360561b5bc3a01e02e8eea863534 to your computer and use it in GitHub Desktop.
Cropping important part
from pathlib import Path
from pprint import pprint
import cv2
import numpy as np
import matplotlib.pyplot as plt
import concurrent
from tqdm import tqdm
# Just for visualization. By default it is inactive
def display_img(img_list):
fig = plt.figure(figsize=(12,8))
for i in range(len(img_list)):
ax = fig.add_subplot((len(img_list)//4)+1, 4, i+1)
if len(img_list[i].shape) == 2:
ax.imshow(img_list[i], cmap='gray')
else:
ax.imshow(img_list[i])
plt.show()
# Track filename if ther is any error during cropping
def write_error_log(file_name):
with open('cropping_error_log.txt', 'a') as fp:
fp.write(str(file_name))
fp.write('\n')
# cropping text part
def crop_important_region(img_src_path, is_display=False):
# read image as grayscale
img = cv2.imread(img_src_path, 0)
org = img.copy()
# apply bilateralFilter to clean background noise preserving edge sharpness
smooth_img = cv2.bilateralFilter(img, 9, 75, 75)
# binarize image (inverse binary is needed for Contours detection)
binary_img = cv2.adaptiveThreshold(smooth_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV,11,2)
# apply some morphology operation to make edge more thicker
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
dialated_img = cv2.dilate(binary_img, kernel, iterations=11)
# detect Contours
contours, _ = cv2.findContours(dialated_img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# create an color image. This is only for displaying green bounding box
# find the largest contours bounding position and drow the bounding box
new_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
c = max(contours, key = cv2.contourArea)
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(new_img,(x,y),(x+w,y+h),(0,255,0),2)
# crop only text region
cropped_image = img[y:y+h, x:x+w]
# make a square shape image and put the cropped text region in center
center_x, center_y = x+(w/2), y+(h/2)
larg_side = max(w, h)
square_img = np.ones((larg_side, larg_side), dtype=np.uint8) * 255
p = int(larg_side//2 - center_x)
q = int(larg_side//2 - center_y)
square_img[y+q:y+h+q, x+p:x+w+p] = cropped_image #
resized_128 = cv2.resize(square_img, (128, 128))
# display
if is_display:
display_img([org, smooth_img, binary_img, dialated_img, new_img, cropped_image, resized_128])
return resized_128
# ------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------
# collect path of all images
train_data_path = Path.cwd()/'dataset'/'bengali_ai_dataset'/'test'
iamge_path_list = []
for extension in ['*.png', '*.jpg']:
iamge_path_list.extend(list(train_data_path.rglob(extension)))
print(f'Found {len(iamge_path_list)} images')
# create mapping of filename to image path
filename_to_path_mapping = dict()
for path in iamge_path_list:
filename = path.stem # taking name without extention
assert not filename in filename_to_path_mapping, 'Deuplicate file name.'
filename_to_path_mapping[filename] = path
output_dir = Path.cwd()/'dataset'/ 'resized_128'
for filename in tqdm(filename_to_path_mapping):
path = filename_to_path_mapping[filename]
try:
img_128 = crop_important_region(str(path), is_display=False)
target_path = output_dir/ path.parent.name
target_path.mkdir(parents=True, exist_ok=True)
target_path = target_path/path.name
cv2.imwrite(str(target_path), img_128)
except:
write_error_log(str(path))
# break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment