Last active
January 19, 2024 00:58
-
-
Save e96031413/bd6b3bbf12f6f2d3f1663592b5536b13 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
https://github.com/d246810g2000/YOLOX/blob/main/datasets/train_val_data_split_coco.py | |
""" | |
import os | |
import cv2 | |
import json | |
import random | |
import shutil | |
import xml.etree.ElementTree as ET | |
from tqdm import tqdm | |
split = 'val' | |
data_path = f'/home/Yanwei_Liu/New_Datasets/PASCALRAW/images_crop_new/{split}' | |
output_dir = '/home/Yanwei_Liu/New_Datasets/PASCALRAW/' | |
jsons_path = os.path.join(output_dir, 'annotations/') | |
class_names = ['person', 'bicycle', 'car'] | |
def get_annotations(xml_path, class_names): | |
tree = ET.parse(xml_path) | |
root = tree.getroot() | |
annotations = [] | |
for object in root.findall('object'): | |
cls = object.find('name').text | |
obj = object.find('bndbox') | |
xmin = obj.find('xmin').text | |
ymin = obj.find('ymin').text | |
xmax = obj.find('xmax').text | |
ymax = obj.find('ymax').text | |
annotations.append((int(xmin), int(ymin), int(xmax)-int(xmin), int(ymax)-int(ymin), int(class_names.index(cls))+1)) | |
return annotations | |
if not os.path.exists(output_dir): | |
os.makedirs(jsons_path) | |
data = {} | |
data['info'] = { | |
'description': '', | |
'url': '', | |
'version': '1.0', | |
'year': 2024, | |
'contributor': 'Yanwei', | |
'date_created': ''} | |
data['categories'] = [{'supercategory': 'deterioration', 'id': i+1, 'name': cls} for i, cls in enumerate(class_names)] | |
data['licenses'] = [] | |
annotation_data = data.copy() | |
imgs_list = sorted([file for file in os.listdir(data_path) if file.split('.')[-1] in ['jpg', 'png', 'tiff']]) | |
img_id = 1 | |
ann_id = 1 | |
all_imgs = [] | |
all_anns = [] | |
for img in tqdm(imgs_list): | |
img_path = os.path.join(data_path, img) | |
h, w, _ = cv2.imread(img_path).shape | |
xml_path = os.path.join(data_path.replace('images_crop_new', 'annotations'), img.split('.')[0]+'.xml') | |
all_imgs.append({'id':img_id, | |
'width':w, | |
'height':h, | |
'license':1, | |
'file_name':img}) | |
annotations = get_annotations(xml_path, class_names) | |
for ann in annotations: | |
# annotation_json | |
all_anns.append({'id': ann_id, | |
'image_id': img_id, | |
'category_id': ann[-1], | |
'segmentation': [], | |
'bbox': [ann[0], ann[1], ann[2], ann[3]], | |
'area': ann[2]*ann[3], | |
'iscrowd': 0}) | |
ann_id+=1 | |
img_id+=1 | |
annotation_data['images'] = all_imgs | |
annotation_data['annotations'] = all_anns | |
with open(os.path.join(jsons_path, f'instances_{split}2017.json'), 'w') as json_file: | |
json.dump(annotation_data, json_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment