Skip to content

Instantly share code, notes, and snippets.

Last active January 19, 2024 00:58
Show Gist options
  • Save e96031413/bd6b3bbf12f6f2d3f1663592b5536b13 to your computer and use it in GitHub Desktop.
Save e96031413/bd6b3bbf12f6f2d3f1663592b5536b13 to your computer and use it in GitHub Desktop.
import os
import cv2
import json
import random
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm
split = 'val'
data_path = f'/home/Yanwei_Liu/New_Datasets/PASCALRAW/images_crop_new/{split}'
output_dir = '/home/Yanwei_Liu/New_Datasets/PASCALRAW/'
jsons_path = os.path.join(output_dir, 'annotations/')
class_names = ['person', 'bicycle', 'car']
def get_annotations(xml_path, class_names):
tree = ET.parse(xml_path)
root = tree.getroot()
annotations = []
for object in root.findall('object'):
cls = object.find('name').text
obj = object.find('bndbox')
xmin = obj.find('xmin').text
ymin = obj.find('ymin').text
xmax = obj.find('xmax').text
ymax = obj.find('ymax').text
annotations.append((int(xmin), int(ymin), int(xmax)-int(xmin), int(ymax)-int(ymin), int(class_names.index(cls))+1))
return annotations
if not os.path.exists(output_dir):
data = {}
data['info'] = {
'description': '',
'url': '',
'version': '1.0',
'year': 2024,
'contributor': 'Yanwei',
'date_created': ''}
data['categories'] = [{'supercategory': 'deterioration', 'id': i+1, 'name': cls} for i, cls in enumerate(class_names)]
data['licenses'] = []
annotation_data = data.copy()
imgs_list = sorted([file for file in os.listdir(data_path) if file.split('.')[-1] in ['jpg', 'png', 'tiff']])
img_id = 1
ann_id = 1
all_imgs = []
all_anns = []
for img in tqdm(imgs_list):
img_path = os.path.join(data_path, img)
h, w, _ = cv2.imread(img_path).shape
xml_path = os.path.join(data_path.replace('images_crop_new', 'annotations'), img.split('.')[0]+'.xml')
annotations = get_annotations(xml_path, class_names)
for ann in annotations:
# annotation_json
all_anns.append({'id': ann_id,
'image_id': img_id,
'category_id': ann[-1],
'segmentation': [],
'bbox': [ann[0], ann[1], ann[2], ann[3]],
'area': ann[2]*ann[3],
'iscrowd': 0})
annotation_data['images'] = all_imgs
annotation_data['annotations'] = all_anns
with open(os.path.join(jsons_path, f'instances_{split}2017.json'), 'w') as json_file:
json.dump(annotation_data, json_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment