Skip to content

Instantly share code, notes, and snippets.

@vsooda
Last active November 20, 2017 11:08
Show Gist options
  • Save vsooda/dfdc993d79061e6ebd081ebd64b7a36c to your computer and use it in GitHub Desktop.
Save vsooda/dfdc993d79061e6ebd081ebd64b7a36c to your computer and use it in GitHub Desktop.
icdar2013 converter
#! /usr/bin/env python2.7
#coding=utf-8
import os
import cv2
import codecs
import ast
from lxml import etree
# do_crop the image for recognization
def draw_rects(img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
def do_convert(label_name, img_name):
f = open(label_name, 'r')
lines = f.readlines()
f.close()
lines = [line.strip() for line in lines]
rects = []
texts = []
for line in lines:
#todo: 处理文本有空格的情况
words = line.split(' ')
assert len(words) == 5
left = int(words[0].replace(',',''))
top = int(words[1].replace(',',''))
right = int(words[2].replace(',',''))
bottom = int(words[3].replace(',',''))
text = words[4]
text = text.replace('"','')
rect = [left, top, right, bottom]
rects.append(rect)
texts.append(text)
return rects, texts
def crop_and_save(anno_text, img, rects, texts):
text_region = img[top:bottom, left:right].copy()
cv2.imshow("text", text_region)
def read_xml(xml_path):
print xml_path
tree = etree.parse(xml_path)
results = []
# get bbox
for obj in tree.xpath('//object'):
#print obj.name.text
#name = etree.SubElement(obj, 'name')
name = obj.find('name')
text = name.text
if text.strip() == 'text':
continue
text = text.replace(' ', '')
bbox = obj.find('bndbox')
left = bbox.find('xmin').text
right = bbox.find('xmax').text
top = bbox.find('ymin').text
bottom = bbox.find('ymax').text
print left, top, right, bottom, text
results.append([left,top,right,bottom,text])
return results
def write_xml(gen_name, folder, base_name, rects, texts, width, height, depth):
annotation = etree.XML('''<?xml version="1.0" encoding="utf-8"?><annotation></annotation>''')
folder_node = etree.Element("folder")
folder_node.text = folder
annotation.append(folder_node)
filename_node = etree.Element("filename")
filename_node.text = base_name+".jpg"
annotation.append(filename_node)
size_node = etree.Element("size")
width_node = etree.Element("width")
width_node.text = str(width)
size_node.append(width_node)
height_node = etree.Element('height')
height_node.text = str(height)
size_node.append(height_node)
depth_node = etree.Element('depth')
depth_node.text = str(depth)
size_node.append(depth_node)
annotation.append(size_node)
for left,top,right,bottom in rects:
object_node = etree.Element("object")
name_node = etree.Element('name')
object_node.append(name_node)
name_node.text = 'text'
bbox_node = etree.Element('bndbox')
xmin_node = etree.Element('xmin')
xmin_node.text = str(left)
bbox_node.append(xmin_node)
ymin_node = etree.Element('ymin')
ymin_node.text = str(top)
bbox_node.append(ymin_node)
xmax_node = etree.Element('xmax')
xmax_node.text = str(right)
bbox_node.append(xmax_node)
ymax_node = etree.Element('ymax')
ymax_node.text = str(bottom)
bbox_node.append(ymax_node)
object_node.append(bbox_node)
annotation.append(object_node)
tree = etree.ElementTree(annotation)
tree.write(gen_name, xml_declaration=True, encoding="utf-8", pretty_print=True)
def do_crop(img, rects, texts, base_name, word_path, word_label):
assert len(texts) == len(rects)
index = 0
with codecs.open(word_label, 'a', encoding='utf-8') as f:
for left, top, right, bottom in rects:
text_region = img[top:bottom, left:right].copy()
save_name = "%s_%02d.jpg" % (base_name, index)
cv2.imwrite(word_path+save_name, text_region)
f.write('%s %s\n' % (save_name, texts[index]))
index = index + 1
def convert_folder(label_path, gen_path, img_path, dataset, word_path=None, word_label=None):
if not os.path.exists(gen_path):
os.makedirs(gen_path)
if word_path is not None:
if not os.path.exists(word_path):
os.makedirs(word_path)
if os.path.exists(word_label):
os.remove(word_label)
folder = ""
verbose = True
f = open(dataset+'.txt', 'w')
for root, dirs, files in os.walk(label_path):
for name in files:
label_name = os.path.join(root, name)
base_name = os.path.basename(name)
base_name = os.path.splitext(base_name)[0]
bases = base_name.split('_')
base_name = bases[1]
#if bases[2] == 'ocr1040':
# verbose = True
#else:
# continue
if len(bases) > 2:
base_name = base_name + '_' + bases[2]
gen_name = gen_path + base_name + ".xml"
img_name = img_path + base_name + ".jpg"
#f.write(base_name+".jpg" + " " + base_name + ".xml" + '\n')
f.write(img_name + " " + gen_name + '\n')
print label_name, base_name, gen_name,img_name
rects, texts = do_convert(label_name, img_name)
img = cv2.imread(img_name)
width = img.shape[1]
height = img.shape[0]
depth = img.shape[2]
write_xml(gen_name, folder, base_name, rects, texts, width, height, depth)
if word_path is not None:
do_crop(img, rects, texts, base_name, word_path, word_label)
if verbose:
draw_rects(img, rects, (255, 0,0))
ratio = max(width/700.0, height/700.0)
resize_w = int(width /ratio)
resize_h = int(height / ratio)
dst_size = (resize_w, resize_h)
resize_img = cv2.resize(img, dst_size)
#cv2.imwrite('1.jpg', resize_img)
cv2.imshow("anno", resize_img)
cv2.waitKey()
f.close()
def convert_icdar13():
dataset = 'train'
#dataset = 'val'
convert_dataset(dataset)
def parse_icdar17(label_name):
results = []
f = open(label_name, 'r')
lines = f.readlines()
f.close()
lines = [line.strip() for line in lines]
for line in lines:
annos = line.split(',')
assert len(annos) >= 10
difficult = annos[8]
if difficult == '1':
continue
for i in range(0, 8):
annos[i] = int(annos[i].decode('utf-8').strip())
left = min(annos[0], annos[2], annos[4], annos[6])
right = max(annos[0], annos[2], annos[4], annos[6])
top = min(annos[1], annos[3], annos[5], annos[7])
bottom = max(annos[1], annos[3], annos[5], annos[7])
if left == right or top == bottom:
continue
if len(annos) > 10:
text = ','.join(annos[i] for i in range(9, len(annos)))
else:
text = annos[9]
text = text.replace('"','').replace('(', '(').replace(')', ')').replace(':', ':').replace(' ', '')
print left, top, right, bottom, text
results.append([left,top,right,bottom,text])
return results
def convert_icdar17_labels(source_dirs, target_dirs):
if not os.path.exists(target_dirs):
os.makedirs(target_dirs)
for root, dirs, files in os.walk(source_dirs):
for name in files:
source_name = os.path.join(root, name)
try:
results = parse_icdar17(source_name)
except:
print source_name, 'parse failed'
continue
base_name = os.path.basename(source_name)
base_name = os.path.splitext(base_name)[0]
text_name = '%s/gt_%s.txt' % (target_dirs, base_name)
with codecs.open(text_name, 'w', encoding='utf-8') as f:
for obj in results:
assert len(obj) == 5
f.write('%s %s %s %s %s\n' %(obj[0],obj[1],obj[2],obj[3],obj[4]))
def convert_icdar17():
dataset = 'val'
#dataset = 'val'
source_label_dir = dataset + '/gt/'
target_label_dir = dataset + '/labels/'
convert_icdar17_labels(source_label_dir, target_label_dir)
convert_dataset(dataset)
def convert_dataset(dataset):
base_path = dataset + '/'
label_path = base_path + '/labels/'
gen_path = base_path + '/gen/'
img_path = base_path + '/img/'
word_path = base_path + '/crop/'
word_label = base_path + '/word.txt'
convert_folder(label_path, gen_path, img_path, dataset, word_path, word_label)
#convert_folder(label_path, gen_path, img_path, dataset)
def convert_card_format(label_name):
convert_text_path = 'labels/'
if not os.path.exists(convert_text_path):
os.makedirs(convert_text_path)
f = open(label_name, 'r')
lines = f.readlines()
f.close()
lines = [line.strip() for line in lines]
for line in lines:
words = line.split(' ')
base_name = words[0]
text_name = 'gt_%s.txt' % base_name
text_name = convert_text_path + text_name
with codecs.open(text_name, 'w', encoding='utf-8') as f:
anno_str = ''
for i in xrange(1, len(words)):
anno_str = anno_str + words[i]
#print base_name, ' ---> ', anno_str
anno_dict = ast.literal_eval(anno_str)
for key, value in anno_dict.iteritems():
assert len(value) == 2
left_top = value[0]
right_bottom = value[1]
left = left_top[0]
top = left_top[1]
right = right_bottom[0]
bottom = right_bottom[1]
key = key.decode('utf-8').strip()
f.write('%d %d %d %d %s\n' % (left, top, right, bottom, key))
#print left, top, right, bottom, key
def convert_card():
label_name = 'orig.txt'
convert_card_format(label_name)
dataset='./'
convert_dataset(dataset)
def convert_card_xml():
xml_dir = 'annotations'
text_dir = 'labels'
convert_xml(xml_dir, text_dir)
dataset = './'
convert_dataset(dataset)
def test_convert_xml():
xml_file = 'annotations/card_ocr334.xml'
read_xml(xml_file)
def convert_xml(xml_dir, text_dir):
if not os.path.exists(text_dir):
os.makedirs(text_dir)
for root, dirs, files in os.walk(xml_dir):
for name in files:
xml_name = os.path.join(root, name)
results = read_xml(xml_name)
base_name = os.path.basename(xml_name)
base_name = os.path.splitext(base_name)[0]
text_name = '%s/gt_%s.txt' % (text_dir, base_name)
with codecs.open(text_name, 'w', encoding='utf-8') as f:
for obj in results:
assert len(obj) == 5
f.write('%s %s %s %s %s\n' %(obj[0],obj[1],obj[2],obj[3],obj[4]))
def test_parse_icdar17():
label_name = 'gt/image_0.txt'
parse_icdar17(label_name)
if __name__ == '__main__':
#convert_icdar13()
#convert_card()
#convert_xml()
#convert_card_xml()
#test_parse_icdar17()
convert_icdar17()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment