Last active
November 20, 2017 11:08
-
-
Save vsooda/dfdc993d79061e6ebd081ebd64b7a36c to your computer and use it in GitHub Desktop.
icdar2013 converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python2.7 | |
#coding=utf-8 | |
import os | |
import cv2 | |
import codecs | |
import ast | |
from lxml import etree | |
# do_crop the image for recognization | |
def draw_rects(img, rects, color): | |
for x1, y1, x2, y2 in rects: | |
cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) | |
def do_convert(label_name, img_name): | |
f = open(label_name, 'r') | |
lines = f.readlines() | |
f.close() | |
lines = [line.strip() for line in lines] | |
rects = [] | |
texts = [] | |
for line in lines: | |
#todo: 处理文本有空格的情况 | |
words = line.split(' ') | |
assert len(words) == 5 | |
left = int(words[0].replace(',','')) | |
top = int(words[1].replace(',','')) | |
right = int(words[2].replace(',','')) | |
bottom = int(words[3].replace(',','')) | |
text = words[4] | |
text = text.replace('"','') | |
rect = [left, top, right, bottom] | |
rects.append(rect) | |
texts.append(text) | |
return rects, texts | |
def crop_and_save(anno_text, img, rects, texts): | |
text_region = img[top:bottom, left:right].copy() | |
cv2.imshow("text", text_region) | |
def read_xml(xml_path): | |
print xml_path | |
tree = etree.parse(xml_path) | |
results = [] | |
# get bbox | |
for obj in tree.xpath('//object'): | |
#print obj.name.text | |
#name = etree.SubElement(obj, 'name') | |
name = obj.find('name') | |
text = name.text | |
if text.strip() == 'text': | |
continue | |
text = text.replace(' ', '') | |
bbox = obj.find('bndbox') | |
left = bbox.find('xmin').text | |
right = bbox.find('xmax').text | |
top = bbox.find('ymin').text | |
bottom = bbox.find('ymax').text | |
print left, top, right, bottom, text | |
results.append([left,top,right,bottom,text]) | |
return results | |
def write_xml(gen_name, folder, base_name, rects, texts, width, height, depth): | |
annotation = etree.XML('''<?xml version="1.0" encoding="utf-8"?><annotation></annotation>''') | |
folder_node = etree.Element("folder") | |
folder_node.text = folder | |
annotation.append(folder_node) | |
filename_node = etree.Element("filename") | |
filename_node.text = base_name+".jpg" | |
annotation.append(filename_node) | |
size_node = etree.Element("size") | |
width_node = etree.Element("width") | |
width_node.text = str(width) | |
size_node.append(width_node) | |
height_node = etree.Element('height') | |
height_node.text = str(height) | |
size_node.append(height_node) | |
depth_node = etree.Element('depth') | |
depth_node.text = str(depth) | |
size_node.append(depth_node) | |
annotation.append(size_node) | |
for left,top,right,bottom in rects: | |
object_node = etree.Element("object") | |
name_node = etree.Element('name') | |
object_node.append(name_node) | |
name_node.text = 'text' | |
bbox_node = etree.Element('bndbox') | |
xmin_node = etree.Element('xmin') | |
xmin_node.text = str(left) | |
bbox_node.append(xmin_node) | |
ymin_node = etree.Element('ymin') | |
ymin_node.text = str(top) | |
bbox_node.append(ymin_node) | |
xmax_node = etree.Element('xmax') | |
xmax_node.text = str(right) | |
bbox_node.append(xmax_node) | |
ymax_node = etree.Element('ymax') | |
ymax_node.text = str(bottom) | |
bbox_node.append(ymax_node) | |
object_node.append(bbox_node) | |
annotation.append(object_node) | |
tree = etree.ElementTree(annotation) | |
tree.write(gen_name, xml_declaration=True, encoding="utf-8", pretty_print=True) | |
def do_crop(img, rects, texts, base_name, word_path, word_label): | |
assert len(texts) == len(rects) | |
index = 0 | |
with codecs.open(word_label, 'a', encoding='utf-8') as f: | |
for left, top, right, bottom in rects: | |
text_region = img[top:bottom, left:right].copy() | |
save_name = "%s_%02d.jpg" % (base_name, index) | |
cv2.imwrite(word_path+save_name, text_region) | |
f.write('%s %s\n' % (save_name, texts[index])) | |
index = index + 1 | |
def convert_folder(label_path, gen_path, img_path, dataset, word_path=None, word_label=None): | |
if not os.path.exists(gen_path): | |
os.makedirs(gen_path) | |
if word_path is not None: | |
if not os.path.exists(word_path): | |
os.makedirs(word_path) | |
if os.path.exists(word_label): | |
os.remove(word_label) | |
folder = "" | |
verbose = True | |
f = open(dataset+'.txt', 'w') | |
for root, dirs, files in os.walk(label_path): | |
for name in files: | |
label_name = os.path.join(root, name) | |
base_name = os.path.basename(name) | |
base_name = os.path.splitext(base_name)[0] | |
bases = base_name.split('_') | |
base_name = bases[1] | |
#if bases[2] == 'ocr1040': | |
# verbose = True | |
#else: | |
# continue | |
if len(bases) > 2: | |
base_name = base_name + '_' + bases[2] | |
gen_name = gen_path + base_name + ".xml" | |
img_name = img_path + base_name + ".jpg" | |
#f.write(base_name+".jpg" + " " + base_name + ".xml" + '\n') | |
f.write(img_name + " " + gen_name + '\n') | |
print label_name, base_name, gen_name,img_name | |
rects, texts = do_convert(label_name, img_name) | |
img = cv2.imread(img_name) | |
width = img.shape[1] | |
height = img.shape[0] | |
depth = img.shape[2] | |
write_xml(gen_name, folder, base_name, rects, texts, width, height, depth) | |
if word_path is not None: | |
do_crop(img, rects, texts, base_name, word_path, word_label) | |
if verbose: | |
draw_rects(img, rects, (255, 0,0)) | |
ratio = max(width/700.0, height/700.0) | |
resize_w = int(width /ratio) | |
resize_h = int(height / ratio) | |
dst_size = (resize_w, resize_h) | |
resize_img = cv2.resize(img, dst_size) | |
#cv2.imwrite('1.jpg', resize_img) | |
cv2.imshow("anno", resize_img) | |
cv2.waitKey() | |
f.close() | |
def convert_icdar13(): | |
dataset = 'train' | |
#dataset = 'val' | |
convert_dataset(dataset) | |
def parse_icdar17(label_name): | |
results = [] | |
f = open(label_name, 'r') | |
lines = f.readlines() | |
f.close() | |
lines = [line.strip() for line in lines] | |
for line in lines: | |
annos = line.split(',') | |
assert len(annos) >= 10 | |
difficult = annos[8] | |
if difficult == '1': | |
continue | |
for i in range(0, 8): | |
annos[i] = int(annos[i].decode('utf-8').strip()) | |
left = min(annos[0], annos[2], annos[4], annos[6]) | |
right = max(annos[0], annos[2], annos[4], annos[6]) | |
top = min(annos[1], annos[3], annos[5], annos[7]) | |
bottom = max(annos[1], annos[3], annos[5], annos[7]) | |
if left == right or top == bottom: | |
continue | |
if len(annos) > 10: | |
text = ','.join(annos[i] for i in range(9, len(annos))) | |
else: | |
text = annos[9] | |
text = text.replace('"','').replace('(', '(').replace(')', ')').replace(':', ':').replace(' ', '') | |
print left, top, right, bottom, text | |
results.append([left,top,right,bottom,text]) | |
return results | |
def convert_icdar17_labels(source_dirs, target_dirs): | |
if not os.path.exists(target_dirs): | |
os.makedirs(target_dirs) | |
for root, dirs, files in os.walk(source_dirs): | |
for name in files: | |
source_name = os.path.join(root, name) | |
try: | |
results = parse_icdar17(source_name) | |
except: | |
print source_name, 'parse failed' | |
continue | |
base_name = os.path.basename(source_name) | |
base_name = os.path.splitext(base_name)[0] | |
text_name = '%s/gt_%s.txt' % (target_dirs, base_name) | |
with codecs.open(text_name, 'w', encoding='utf-8') as f: | |
for obj in results: | |
assert len(obj) == 5 | |
f.write('%s %s %s %s %s\n' %(obj[0],obj[1],obj[2],obj[3],obj[4])) | |
def convert_icdar17(): | |
dataset = 'val' | |
#dataset = 'val' | |
source_label_dir = dataset + '/gt/' | |
target_label_dir = dataset + '/labels/' | |
convert_icdar17_labels(source_label_dir, target_label_dir) | |
convert_dataset(dataset) | |
def convert_dataset(dataset): | |
base_path = dataset + '/' | |
label_path = base_path + '/labels/' | |
gen_path = base_path + '/gen/' | |
img_path = base_path + '/img/' | |
word_path = base_path + '/crop/' | |
word_label = base_path + '/word.txt' | |
convert_folder(label_path, gen_path, img_path, dataset, word_path, word_label) | |
#convert_folder(label_path, gen_path, img_path, dataset) | |
def convert_card_format(label_name): | |
convert_text_path = 'labels/' | |
if not os.path.exists(convert_text_path): | |
os.makedirs(convert_text_path) | |
f = open(label_name, 'r') | |
lines = f.readlines() | |
f.close() | |
lines = [line.strip() for line in lines] | |
for line in lines: | |
words = line.split(' ') | |
base_name = words[0] | |
text_name = 'gt_%s.txt' % base_name | |
text_name = convert_text_path + text_name | |
with codecs.open(text_name, 'w', encoding='utf-8') as f: | |
anno_str = '' | |
for i in xrange(1, len(words)): | |
anno_str = anno_str + words[i] | |
#print base_name, ' ---> ', anno_str | |
anno_dict = ast.literal_eval(anno_str) | |
for key, value in anno_dict.iteritems(): | |
assert len(value) == 2 | |
left_top = value[0] | |
right_bottom = value[1] | |
left = left_top[0] | |
top = left_top[1] | |
right = right_bottom[0] | |
bottom = right_bottom[1] | |
key = key.decode('utf-8').strip() | |
f.write('%d %d %d %d %s\n' % (left, top, right, bottom, key)) | |
#print left, top, right, bottom, key | |
def convert_card(): | |
label_name = 'orig.txt' | |
convert_card_format(label_name) | |
dataset='./' | |
convert_dataset(dataset) | |
def convert_card_xml(): | |
xml_dir = 'annotations' | |
text_dir = 'labels' | |
convert_xml(xml_dir, text_dir) | |
dataset = './' | |
convert_dataset(dataset) | |
def test_convert_xml(): | |
xml_file = 'annotations/card_ocr334.xml' | |
read_xml(xml_file) | |
def convert_xml(xml_dir, text_dir): | |
if not os.path.exists(text_dir): | |
os.makedirs(text_dir) | |
for root, dirs, files in os.walk(xml_dir): | |
for name in files: | |
xml_name = os.path.join(root, name) | |
results = read_xml(xml_name) | |
base_name = os.path.basename(xml_name) | |
base_name = os.path.splitext(base_name)[0] | |
text_name = '%s/gt_%s.txt' % (text_dir, base_name) | |
with codecs.open(text_name, 'w', encoding='utf-8') as f: | |
for obj in results: | |
assert len(obj) == 5 | |
f.write('%s %s %s %s %s\n' %(obj[0],obj[1],obj[2],obj[3],obj[4])) | |
def test_parse_icdar17(): | |
label_name = 'gt/image_0.txt' | |
parse_icdar17(label_name) | |
if __name__ == '__main__': | |
#convert_icdar13() | |
#convert_card() | |
#convert_xml() | |
#convert_card_xml() | |
#test_parse_icdar17() | |
convert_icdar17() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment