Created
April 25, 2018 08:05
-
-
Save zengyu714/f2d23161e4e530dbc4c39d0c398c890e to your computer and use it in GitHub Desktop.
light-head-rcnn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pathlib import Path | |
from tqdm import tqdm | |
from itertools import groupby | |
train_data_root = Path('/data1/kimmyzeng/dataset/Detect_COCO/train/JPEGIMAGES') | |
train_json_file = Path('../data/Detect_COCO/train_instances.json') | |
trainall_odgt = Path('../data/Detect_COCO/odformat/vehicle_trainall.odgt') | |
trainout_odgt = Path('../data/Detect_COCO/odformat/vehicle_train.odgt') | |
trainval_odgt = Path('../data/Detect_COCO/odformat/vehicle_val.odgt') | |
if not trainall_odgt.parent.exists(): | |
trainall_odgt.parent.mkdir() | |
def to_index(image_id): | |
""" | |
Args: | |
image_id: <int>, say, 2018042400001 | |
Returns: | |
List index, i.e., 2018042400001 - 2018042400001 = 0 | |
""" | |
return image_id - 2018042400001 | |
def convert_to_odgt(): | |
""" | |
json_file: categories | |
[{'id': 1, 'name': 'car', 'supercategory': 'none'}, | |
{'id': 2, 'name': 'pedestrian', 'supercategory': 'none'}, | |
{'id': 3, 'name': 'others', 'supercategory': 'none'}, | |
{'id': 4, 'name': 'bus', 'supercategory': 'none'}, | |
{'id': 5, 'name': 'van', 'supercategory': 'none'}] | |
:return: | |
""" | |
with train_json_file.open() as fi: | |
json_file = json.load(fi) | |
images = json_file['images'] | |
tags = json_file['categories'] | |
annos = json_file['annotations'] # list of annotations | |
annos.sort(key=lambda x: x['image_id']) | |
# group annotations by image_id | |
img_not_found_count = 0 | |
with trainall_odgt.open(mode='w+') as fo: | |
od_line = {} | |
for image_id, objects in tqdm(groupby(annos, key=lambda x: x['image_id'])): | |
image_info = images[to_index(image_id)] | |
gtboxes = [] | |
for obj in objects: | |
gtbox = { | |
'box' : obj['bbox'], | |
'occ' : 0, | |
'tag' : tags[obj['category_id'] - 1]['name'], | |
'extra': {'ignore': 0} | |
} | |
gtboxes.append(gtbox) | |
od_line['gtboxes'] = gtboxes | |
img_path = train_data_root / image_info['file_name'] | |
if not img_path.exists(): | |
img_not_found_count += 1 | |
continue | |
od_line['fpath'] = str(img_path) | |
od_line['dbName'] = "COCO", | |
od_line['dbInfo'] = {"vID": "vehicle_train", "frameID": -1} | |
od_line['width'] = image_info['width'] | |
od_line['height'] = image_info['height'] | |
od_line['ID'] = image_info['file_name'] | |
fo.write(f'{json.dumps(od_line)}\n') | |
print(f'Can\'t find {img_not_found_count} images') | |
def split_train_val(train_ratio=0.9): | |
with trainall_odgt.open() as fi: | |
all_odgt = fi.readlines() | |
all_odgt = [l.strip() for l in all_odgt] | |
total = len(all_odgt) | |
train_num = int(total * train_ratio) | |
print(f'Total nums of data: {total}\tTrain: {train_num}\tVal: {total - train_num}') | |
with trainout_odgt.open(mode='w+') as train_out: | |
train_out.write('\n'.join(all_odgt[:train_num])) | |
with trainval_odgt.open(mode='w+') as val_out: | |
val_out.write('\n'.join(all_odgt[train_num:])) | |
if __name__ == '__main__': | |
convert_to_odgt() | |
split_train_val() | |
""" | |
# odgt example | |
{"gtboxes": [{"box": [250, 168, 70, 65], "occ": 0, "tag": "person", "extra": {"ignore": 0}}, | |
{"box": [435, 294, 13, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [447, 293, 12, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [460, 291, 13, 9], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [407, 287, 12, 10], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [618, 289, 11, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [512, 294, 21, 5], "occ": 0, "tag": "car", "extra": {"ignore": 0}}, | |
{"box": [285, 370, 12, 19], "occ": 0, "tag": "person", "extra": {"ignore": 0}}, | |
{"box": [61, 43, 46, 79], "occ": 0, "tag": "kite", "extra": {"ignore": 0}}, | |
{"box": [238, 158, 61, 55], "occ": 0, "tag": "surfboard", "extra": {"ignore": 0}}], | |
"fpath" : "/val2014/COCO_val2014_000000532481.jpg", | |
"dbName" : "COCO", | |
"dbInfo" : {"vID": "COCO_trainval2014_womini", "frameID": -1}, | |
"width" : 640, | |
"height" : 426, | |
"ID" : "COCO_val2014_000000532481.jpg"} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment