Last active
December 30, 2015 05:48
-
-
Save olsososo/7784794 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
from __future__ import division | |
import sys | |
import math | |
import string | |
from PIL import Image | |
from collections import Counter | |
def q(): | |
sys.exit() | |
if __name__ == '__main__': | |
if len(sys.argv) < 1: | |
print "Usage: %s image.jpg [dir]" % sys.argv[0] | |
else: | |
w = 50 | |
h = 100 | |
im = sys.argv[1] | |
im = Image.open(im) | |
im.load() | |
width,height = im.size | |
im = im.resize((w, h), Image.BILINEAR).convert('L') | |
data = list(im.getdata()) | |
f = open('./data.txt','w') | |
border = 11000 | |
real_width = 80 | |
real_height = 80 | |
left_threshold = 240 | |
right_threshold = 240 | |
min_space_threshold = int(math.ceil(0.8*(real_width / width) * w)) | |
max_space_threshold = int(math.ceil(1.2*(real_width / width) * w)) | |
left_min_deviation = 2 | |
right_min_deviation = 2 | |
distance_min_deviation = 2 | |
starts = [] | |
stops = [] | |
amount = [] | |
rows = {} | |
coordinate = [] | |
original_rows = [] | |
text_rows = [] | |
for i in xrange(h): | |
row = data[i*w:i*w+w] | |
total = reduce(lambda x, y: x + y,row) | |
amount.append(total) | |
if total > border: | |
f.write(''.ljust(50,'-')+"\r\n") | |
f.write(str(i)+" "+repr(row)+" "+str(total)+"\r\n") | |
else: | |
f.write(str(i)+" "+repr(row)+" "+str(total)+"\r\n") | |
rows[i] = {} | |
for r in xrange(len(row)): | |
if i not in starts and row[r] >= left_threshold and r+1 < w and row[r+1] < left_threshold: | |
starts.append(i) | |
rows[i]['start'] = r+1 | |
elif i in starts and row[r] < right_threshold and r+1 < w and row[r+1] >= right_threshold: | |
rows[i]['stop'] = r | |
stops.append(i) | |
break | |
if i not in stops and r+1 == w: | |
del rows[i] | |
f.close() | |
start_list = [] | |
stop_list = [] | |
start_times = [] | |
stop_times = [] | |
for k,v in rows.items(): | |
start_list.append(v['start']) | |
stop_list.append(v['stop']) | |
for i in start_list: | |
start_times.append(start_list.count(i)) | |
m = max(start_times) | |
start_index = start_times.index(m) | |
for i in stop_list: | |
stop_times.append(stop_list.count(i)) | |
m = max(stop_times) | |
stop_index = stop_times.index(m) | |
start = start_list[start_index] | |
stop = stop_list[stop_index] | |
distance = stop - start | |
#filter | |
for k,v in rows.items(): | |
if math.fabs(v['start'] - start) > left_min_deviation and math.fabs(v['stop'] - stop) > right_min_deviation \ | |
or (math.fabs(v['stop'] - v['start'] - distance) > distance_min_deviation): | |
del rows[k] | |
width_proportion = width / w | |
height_proportion = height / h | |
#coordinate,pixel | |
keys = rows.keys() | |
for index,item in enumerate(keys): | |
if (item - 1) not in keys: | |
rect = [int(math.ceil(width_proportion*(start-1))),int(math.ceil(height_proportion*(item-1)))] | |
original_rect = [start-1,item-1] | |
rect_text = [int(math.ceil(width_proportion*(stop+1))),int(math.ceil(height_proportion*(item-1)))] | |
elif index == len(keys) - 1 or keys[index + 1] - item != 1: | |
rect.extend([int(math.ceil(width_proportion*(stop+1))) ,int(math.ceil(height_proportion*(item+1)))]) | |
original_rect.extend([stop+1,item+1]) | |
rect_text.extend([width,int(math.ceil(height_proportion*(item+1)))]) | |
coordinate.append(rect) | |
original_rows.append(original_rect) | |
text_rows.append(rect_text) | |
target_dir = './thumb/' | |
text_name = [] | |
p_text_name = [] | |
name = [] | |
name_list = list(string.lowercase) | |
for index,item in enumerate(coordinate): | |
text_name.append(name_list[index]+'_text.jpg') | |
p_text_name.append('p'+name_list[index]+'_text.jpg') | |
name.append(name_list[index]+'.jpg') | |
im = Image.open(sys.argv[1]) | |
for index,item in enumerate(text_rows): | |
box = tuple(item) | |
newIm = im.crop(box) | |
newIm.save(target_dir+text_name[index]) | |
#tailor | |
im = Image.open(sys.argv[1]) | |
for index,item in enumerate(coordinate): | |
box = tuple(item) | |
newIm = im.crop(box) | |
newIm.save(target_dir+name[index]) | |
t_w = 180 | |
t_h = 40 | |
t_left_threshold = 200 | |
t_stop_threshold = 200 | |
t_left_min_deviation = 2 | |
t_stop_min_deviation = 2 | |
t_min_length = 15 | |
t_error = 10 | |
for p in text_name: | |
t_start_list = [] | |
t_stop_list = [] | |
t_start_times = [] | |
t_stop_times = [] | |
t_rows = [] | |
t_im = Image.open(target_dir+p) | |
t_width,t_height = t_im.size | |
t_im = t_im.resize((t_w, t_h), Image.BILINEAR).convert('L') | |
t_data = list(t_im.getdata()) | |
for t in xrange(t_h): | |
t_row = t_data[t*t_w:t*t_w+t_w] | |
t_rows.append(t_row) | |
total = reduce(lambda x, y: x + y,t_row) | |
for r in xrange(len(t_row)): | |
if t_row[r] >= t_left_threshold and r + 1 <t_w and t_row[r+1] < t_left_threshold: | |
t_start_list.append(r) | |
break | |
for i in t_start_list: | |
t_start_times.append(t_start_list.count(i)) | |
t_m = max(t_start_times) | |
t_start_index = t_start_times.index(t_m) #left | |
t_left_top = None #top | |
t_left_bottom = None #bottom | |
t_width_proportion = t_width / t_w | |
t_height_proportion = t_height / t_h | |
t_stop = [] | |
for i in xrange(len(t_start_list)): | |
if t_left_top is None and math.fabs(t_start_list[i] - t_start_index) <= t_left_min_deviation and i+2 < t_h and \ | |
math.fabs(t_start_list[i+1] - t_start_index) <= t_left_min_deviation and \ | |
math.fabs(t_start_list[i+2] - t_start_index) <= t_left_min_deviation: | |
t_left_top = i | |
if t_left_top is not None and math.fabs(t_start_list[i] - t_start_index) <= t_left_min_deviation and i+2 < t_h and \ | |
math.fabs(t_start_list[i+1] - t_start_index) > t_left_min_deviation and \ | |
math.fabs(t_start_list[i+2] - t_start_index) > t_left_min_deviation: | |
t_left_bottom = i | |
break | |
for t_r in xrange(len(t_rows)): | |
if t_r >= t_left_top and t_r <= t_left_bottom: | |
for t_rk in xrange(len(t_rows[t_r])): | |
if t_rk > t_start_index and t_rk <= t_w - t_min_length: | |
temp = t_rows[t_r][t_rk:t_rk+t_min_length] | |
t_b = True | |
for rp in temp: | |
if rp < t_left_threshold: | |
t_b = False | |
break | |
if t_b: | |
t_stop.append(t_rk) | |
break | |
t_stop_avg = reduce(lambda x, y: x + y,t_stop) / len(t_stop) | |
for abc in t_stop: | |
if math.fabs(abc - t_stop_avg) > t_error: | |
del t_stop[t_stop.index(abc)] | |
t_stop_index = max(t_stop) | |
t_box = [int(math.ceil(t_start_index*t_width_proportion)),int(math.ceil((t_left_top-1)*t_height_proportion))] | |
t_box.extend([int(math.ceil(t_stop_index*t_width_proportion)),int(math.ceil((t_left_bottom+1)*t_height_proportion))]) | |
t_im = Image.open(target_dir+p) | |
t_box = tuple(t_box) | |
newIm = t_im.crop(t_box) | |
newIm.save(target_dir+'p'+p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment