woolpeeker · March 14, 2023 15:16
diff --git a/coco2yolo.py b/coco2yolo.py

 import json

 from pathlib import Path
 import glob
 import os
 import shutil
 from tqdm import tqdm
 import numpy as np

 def coco91_to_coco80_class():  # converts 80-index (val2014) to 91-index (paper)
    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
    # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
    # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
    # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
    x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
         None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
         51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
         None, 73, 74, 75, 76, 77, 78, 79, None]
    return x

 def make_folders(path):
    # Create folders

    if os.path.exists(path):
        shutil.rmtree(path)  # delete output folder
    os.makedirs(path)  # make new output folder
    os.makedirs(path + os.sep + 'labels')  # make new labels folder
    os.makedirs(path + os.sep + 'images')  # make new labels folder
    return path


 def convert_coco_json(json_dir, out_root):
    assert out_root is not None
    make_folders(path=out_root)  # output directory
    jsons = glob.glob(json_dir + 'instances_*.json')
    coco80 = coco91_to_coco80_class()

    # Import json
    for json_file in sorted(jsons):
        json_file = Path(json_file)
        out_dir_name = Path(json_file).stem.replace('instances_', '')
        fn = f'{out_root}/labels/{out_dir_name}/'
        os.mkdir(fn)
        with open(json_file) as f:
            data = json.load(f)

        # Create image dict
        images = {'%g' % x['id']: x for x in data['images']}

        # Write labels file
        for x in tqdm(data['annotations'], desc='Annotations %s' % json_file.name):
            if x['iscrowd']:
                continue

            img = images['%g' % x['image_id']]
            h, w, f = img['height'], img['width'], img['file_name']

            # The Labelbox bounding box format is [top left x, top left y, width, height]
            box = np.array(x['bbox'], dtype=np.float64)
            box[:2] += box[2:] / 2  # xy top-left corner to center
            box[[0, 2]] /= w  # normalize x
            box[[1, 3]] /= h  # normalize y

            if (box[2] > 0.) and (box[3] > 0.):  # if w > 0 and h > 0
                with open(fn + Path(f).stem + '.txt', 'a') as file:
                    file.write('%g %.6f %.6f %.6f %.6f\n' % (coco80[x['category_id'] - 1], *box))


 if __name__ == '__main__':
    JSON_DIR = '/media/HD1/datasets/coco2017/annotations/'
    OUT_DIR = './coco2017/'

    convert_coco_json(JSON_DIR, OUT_DIR)
diff --git a/data2yolo.md b/data2yolo.md
diff --git a/kitti2yolo.py b/kitti2yolo.py
 """
 convert_txt0 use the original images
 convert_txt1 crop the original images
 """
 from pathlib import Path
 from PIL import Image
 import numpy as np
 import pandas as pd
 import shutil
 import random

 LABEL_CFG = {
 	'person': [
 		'Pedestrian',
 		'Person_sitting'
 	],
 	'cyclist': [
 		'Cyclist'
 	],
 	'car': [
 		'Car',
 		'Van',
 	],
 }

 KITTI_ROOT = Path('/data/luojiapeng/datasets/kitti/')
 KITTI_TXT = KITTI_ROOT / 'training/ImageSets/trainval.txt'

 OUT_ROOT = Path('./kitti')

 class Convert_Original_Label:
 	_NAMES = list(LABEL_CFG.keys())
 	_DICT = {v:k for k,v_lst in LABEL_CFG.items() for v in v_lst }
 	
 	@classmethod
 	def __call__(cls, name):
 		if name not in cls._DICT:
 			return None
 		else:
 			return cls._NAMES.index(cls._DICT[name])

 label_converter = Convert_Original_Label()

 def convert_txt0(src_txt, src_img, dst_txt, dst_img):
 	# directly convert coordinate and copy image
 	data = pd.read_csv(str(src_txt), ' ', header=None, index_col=None)
 	classes = data.loc[:, 0].values.tolist()
 	boxes = data.loc[:, 4:7].values.tolist()
 	img = Image.open(str(src_img))
 	im_w, im_h = img.size
 	fp = open(dst_txt, 'w')
 	for c, box in zip(classes, boxes):
 		new_c = label_converter(c)
 		if new_c is None:
 			continue
 		x0, y0, x1, y1 = box
 		x0, x1 = x0 / im_w, x1 / im_w
 		y0, y1 = y0 / im_h, y1 / im_h
 		cx = (x0 + x1) / 2
 		cy = (y0 + y1) / 2
 		box_w = x1 - x0
 		box_h = y1 - y0
 		fp.write('%d %.6f %.6f %.6f %.6f\n' % (new_c, cx, cy, box_w, box_h))
 	fp.close()

 	# cp img_file
 	shutil.copyfile(str(src_img), str(dst_img))
 	return [dst_txt], [dst_img]

 def convert_txt1(src_txt, src_img, dst_txt, dst_img):
 	# convert coordinate and copy image with crop
 	data = pd.read_csv(str(src_txt), ' ', header=None, index_col=None)
 	classes = data.loc[:, 0].values.tolist()
 	boxes = data.loc[:, 4:7].values
 	img = Image.open(str(src_img))
 	im_w, im_h = img.size
 	crop_x0, crop_y0, crop_x1, crop_y1 = 0, 0, im_h, im_h
 	count = 0
 	out_txt_lst = []
 	out_img_lst = []
 	while True:
 		if crop_x1 >= im_w:
 			break
 		crop_box = (crop_x0, crop_y0, crop_x1, crop_y1)
 		c_img, c_boxes = crop_image_box(img, boxes, crop_box)
 		dst_txt_i = dst_txt.with_name(f'{dst_txt.stem}_{count}.txt')
 		dst_img_i = dst_img.with_name(f'{dst_img.stem}_{count}.png')
 		c_img.save(str(dst_img_i))
 		count += 1
 		fp = open(dst_txt_i, 'w')
 		for c, box in zip(classes, c_boxes):
 			new_c = label_converter(c)
 			if not new_c:
 				continue
 			x0, y0, x1, y1 = box
 			x0, x1 = x0 / im_h, x1 / im_h
 			y0, y1 = y0 / im_h, y1 / im_h
 			cx = (x0 + x1) / 2
 			cy = (y0 + y1) / 2
 			box_w = x1 - x0
 			box_h = y1 - y0
 			fp.write('%d %.6f %.6f %.6f %.6f\n' % (new_c, cx, cy, box_w, box_h))
 		out_txt_lst.append(dst_txt_i)
 		out_img_lst.append(dst_img_i)
 		crop_x0 += im_h // 2
 		crop_x1 += im_h // 2
 	fp.close()
 	return out_txt_lst, out_img_lst

 def crop_image_box(image, boxes, crop_box):
 	img = image.crop(crop_box)
 	x0, y0, x1, y1 = np.split(boxes, 4, axis=1)
 	x0 = np.clip(x0 - crop_box[0], 0, img.size[0])
 	y0 = np.clip(y0 - crop_box[1], 0, img.size[1])
 	x1 = np.clip(x1 - crop_box[0], 0, img.size[0])
 	y1 = np.clip(y1 - crop_box[1], 0, img.size[1])
 	boxes = np.stack([x0, y0, x1, y1], axis=1)
 	return img, boxes


 def func(id_lst, mode):
 	OUT_ROOT.mkdir(parents=True, exist_ok=True)
 	fp = open(OUT_ROOT / f'{mode}.txt', 'w')
 	for file_id in id_lst:
 		print(file_id)
 		txt_file = KITTI_ROOT / f'training/label_2/{file_id}.txt'
 		img_file = KITTI_ROOT / f'training/image_2/{file_id}.png'
 		out_txt_file = OUT_ROOT / f'labels/{mode}/{file_id}.txt'
 		out_img_file = OUT_ROOT / f'images/{mode}/{file_id}.png'
 		out_txt_file.parent.mkdir(parents=True, exist_ok=True)
 		out_img_file.parent.mkdir(parents=True, exist_ok=True)
 		out_txts, out_imgs = convert_txt0(txt_file, img_file, out_txt_file, out_img_file)
 		for x in out_imgs:
 			fp.write(str(x.absolute())+'\n')
 	fp.close()


 if __name__ == '__main__':
 	file_id_lst = np.loadtxt(KITTI_TXT, dtype=str).tolist()
 	random.shuffle(file_id_lst)
 	train_num = int(len(file_id_lst) * 0.8)
 	train_id_lst = sorted(file_id_lst[:train_num])
 	val_id_lst = sorted(file_id_lst[train_num:])
 	func(train_id_lst, 'train')
 	func(val_id_lst, 'val')
diff --git a/wider2yolo.py b/wider2yolo.py
 from pathlib import Path
 import numpy as np
 from PIL import Image
 import shutil

 WIDER_ROOT = Path('/media/HD1/Datasets/widerface')
 MODE = 'val'

 def read_txt(file_path):
    data = {}
    for line in open(file_path).readlines():
        line = line.rstrip()
        if not line:
            continue
        if line.endswith('.jpg'):
            file_id = line[:-4]
            data[file_id] = []
            continue
        line = line.split()
        if len(line) == 10:
            x0, y0, w, h = [int(x) for x in line[:4]]
            x1 = x0 + w
            y1 = y0 + h
            data[file_id].append([x0, y0, x1, y1])
    return data


 if __name__ == '__main__':
    src_img_dir = WIDER_ROOT / f'WIDER_{MODE}/images'
    txt_file = WIDER_ROOT / f'wider_face_split/wider_face_{MODE}_bbx_gt.txt'
    dst_img_dir = Path(f'./wider/images/{MODE}')
    dst_txt_dir = Path(f'./wider/labels/{MODE}')
    img_list_file = Path(f'./wider/{MODE}.txt')
    dst_img_dir.mkdir(parents=True, exist_ok=False)
    dst_txt_dir.mkdir(parents=True, exist_ok=False)
    
    labels = read_txt(txt_file)
    img_list_fp = open(img_list_file, 'w')
    for file_id, boxes in labels.items():
        if len(boxes) == 0:
            continue
        src_img = src_img_dir / (file_id + '.jpg')
        dst_id = file_id.split('/')[1]
        dst_img = dst_img_dir / (dst_id + '.jpg')
        dst_txt = dst_txt_dir / (dst_id + '.txt')

        dst_img.parent.mkdir(exist_ok=True)
        dst_txt.parent.mkdir(exist_ok=True)

        img = Image.open(str(src_img))
        im_w, im_h = img.size
        fp = open(dst_txt, 'w')
        dst_txt.parent.mkdir(exist_ok=True)

        for box in boxes:
            x0, y0, x1, y1 = box
            cx = (x0 + x1) / 2
            cy = (y0 + y1) / 2
            w = x1 - x0
            h = y1 - y0
            cx /= im_w
            cy /= im_h
            w /= im_w
            h /= im_h
            fp.write('0 %.6f %.6f %.6f %.6f\n' % (cx, cy, w, h))
        fp.close()
        
        shutil.copyfile(str(src_img), str(dst_img))
        img_list_fp.write(str(dst_img.absolute())+'\n')
    img_list_fp.close()

	import json

	from pathlib import Path
	import glob
	import os
	import shutil
	from tqdm import tqdm
	import numpy as np

	def coco91_to_coco80_class(): # converts 80-index (val2014) to 91-index (paper)
	# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
	# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
	# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
	# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
	# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
	x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
	None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
	51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
	None, 73, 74, 75, 76, 77, 78, 79, None]
	return x

	def make_folders(path):
	# Create folders

	if os.path.exists(path):
	shutil.rmtree(path) # delete output folder
	os.makedirs(path) # make new output folder
	os.makedirs(path + os.sep + 'labels') # make new labels folder
	os.makedirs(path + os.sep + 'images') # make new labels folder
	return path


	def convert_coco_json(json_dir, out_root):
	assert out_root is not None
	make_folders(path=out_root) # output directory
	jsons = glob.glob(json_dir + 'instances_*.json')
	coco80 = coco91_to_coco80_class()

	# Import json
	for json_file in sorted(jsons):
	json_file = Path(json_file)
	out_dir_name = Path(json_file).stem.replace('instances_', '')
	fn = f'{out_root}/labels/{out_dir_name}/'
	os.mkdir(fn)
	with open(json_file) as f:
	data = json.load(f)

	# Create image dict
	images = {'%g' % x['id']: x for x in data['images']}

	# Write labels file
	for x in tqdm(data['annotations'], desc='Annotations %s' % json_file.name):
	if x['iscrowd']:
	continue

	img = images['%g' % x['image_id']]
	h, w, f = img['height'], img['width'], img['file_name']

	# The Labelbox bounding box format is [top left x, top left y, width, height]
	box = np.array(x['bbox'], dtype=np.float64)
	box[:2] += box[2:] / 2 # xy top-left corner to center
	box[[0, 2]] /= w # normalize x
	box[[1, 3]] /= h # normalize y

	if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0
	with open(fn + Path(f).stem + '.txt', 'a') as file:
	file.write('%g %.6f %.6f %.6f %.6f\n' % (coco80[x['category_id'] - 1], *box))


	if __name__ == '__main__':
	JSON_DIR = '/media/HD1/datasets/coco2017/annotations/'
	OUT_DIR = './coco2017/'

	convert_coco_json(JSON_DIR, OUT_DIR)
	"""
	convert_txt0 use the original images
	convert_txt1 crop the original images
	"""
	from pathlib import Path
	from PIL import Image
	import numpy as np
	import pandas as pd
	import shutil
	import random

	LABEL_CFG = {
	'person': [
	'Pedestrian',
	'Person_sitting'
	],
	'cyclist': [
	'Cyclist'
	],
	'car': [
	'Car',
	'Van',
	],
	}

	KITTI_ROOT = Path('/data/luojiapeng/datasets/kitti/')
	KITTI_TXT = KITTI_ROOT / 'training/ImageSets/trainval.txt'

	OUT_ROOT = Path('./kitti')

	class Convert_Original_Label:
	_NAMES = list(LABEL_CFG.keys())
	_DICT = {v:k for k,v_lst in LABEL_CFG.items() for v in v_lst }

	@classmethod
	def __call__(cls, name):
	if name not in cls._DICT:
	return None
	else:
	return cls._NAMES.index(cls._DICT[name])

	label_converter = Convert_Original_Label()

	def convert_txt0(src_txt, src_img, dst_txt, dst_img):
	# directly convert coordinate and copy image
	data = pd.read_csv(str(src_txt), ' ', header=None, index_col=None)
	classes = data.loc[:, 0].values.tolist()
	boxes = data.loc[:, 4:7].values.tolist()
	img = Image.open(str(src_img))
	im_w, im_h = img.size
	fp = open(dst_txt, 'w')
	for c, box in zip(classes, boxes):
	new_c = label_converter(c)
	if new_c is None:
	continue
	x0, y0, x1, y1 = box
	x0, x1 = x0 / im_w, x1 / im_w
	y0, y1 = y0 / im_h, y1 / im_h
	cx = (x0 + x1) / 2
	cy = (y0 + y1) / 2
	box_w = x1 - x0
	box_h = y1 - y0
	fp.write('%d %.6f %.6f %.6f %.6f\n' % (new_c, cx, cy, box_w, box_h))
	fp.close()

	# cp img_file
	shutil.copyfile(str(src_img), str(dst_img))
	return [dst_txt], [dst_img]

	def convert_txt1(src_txt, src_img, dst_txt, dst_img):
	# convert coordinate and copy image with crop
	data = pd.read_csv(str(src_txt), ' ', header=None, index_col=None)
	classes = data.loc[:, 0].values.tolist()
	boxes = data.loc[:, 4:7].values
	img = Image.open(str(src_img))
	im_w, im_h = img.size
	crop_x0, crop_y0, crop_x1, crop_y1 = 0, 0, im_h, im_h
	count = 0
	out_txt_lst = []
	out_img_lst = []
	while True:
	if crop_x1 >= im_w:
	break
	crop_box = (crop_x0, crop_y0, crop_x1, crop_y1)
	c_img, c_boxes = crop_image_box(img, boxes, crop_box)
	dst_txt_i = dst_txt.with_name(f'{dst_txt.stem}_{count}.txt')
	dst_img_i = dst_img.with_name(f'{dst_img.stem}_{count}.png')
	c_img.save(str(dst_img_i))
	count += 1
	fp = open(dst_txt_i, 'w')
	for c, box in zip(classes, c_boxes):
	new_c = label_converter(c)
	if not new_c:
	continue
	x0, y0, x1, y1 = box
	x0, x1 = x0 / im_h, x1 / im_h
	y0, y1 = y0 / im_h, y1 / im_h
	cx = (x0 + x1) / 2
	cy = (y0 + y1) / 2
	box_w = x1 - x0
	box_h = y1 - y0
	fp.write('%d %.6f %.6f %.6f %.6f\n' % (new_c, cx, cy, box_w, box_h))
	out_txt_lst.append(dst_txt_i)
	out_img_lst.append(dst_img_i)
	crop_x0 += im_h // 2
	crop_x1 += im_h // 2
	fp.close()
	return out_txt_lst, out_img_lst

	def crop_image_box(image, boxes, crop_box):
	img = image.crop(crop_box)
	x0, y0, x1, y1 = np.split(boxes, 4, axis=1)
	x0 = np.clip(x0 - crop_box[0], 0, img.size[0])
	y0 = np.clip(y0 - crop_box[1], 0, img.size[1])
	x1 = np.clip(x1 - crop_box[0], 0, img.size[0])
	y1 = np.clip(y1 - crop_box[1], 0, img.size[1])
	boxes = np.stack([x0, y0, x1, y1], axis=1)
	return img, boxes


	def func(id_lst, mode):
	OUT_ROOT.mkdir(parents=True, exist_ok=True)
	fp = open(OUT_ROOT / f'{mode}.txt', 'w')
	for file_id in id_lst:
	print(file_id)
	txt_file = KITTI_ROOT / f'training/label_2/{file_id}.txt'
	img_file = KITTI_ROOT / f'training/image_2/{file_id}.png'
	out_txt_file = OUT_ROOT / f'labels/{mode}/{file_id}.txt'
	out_img_file = OUT_ROOT / f'images/{mode}/{file_id}.png'
	out_txt_file.parent.mkdir(parents=True, exist_ok=True)
	out_img_file.parent.mkdir(parents=True, exist_ok=True)
	out_txts, out_imgs = convert_txt0(txt_file, img_file, out_txt_file, out_img_file)
	for x in out_imgs:
	fp.write(str(x.absolute())+'\n')
	fp.close()


	if __name__ == '__main__':
	file_id_lst = np.loadtxt(KITTI_TXT, dtype=str).tolist()
	random.shuffle(file_id_lst)
	train_num = int(len(file_id_lst) * 0.8)
	train_id_lst = sorted(file_id_lst[:train_num])
	val_id_lst = sorted(file_id_lst[train_num:])
	func(train_id_lst, 'train')
	func(val_id_lst, 'val')