Created
May 25, 2023 11:21
-
-
Save diramazioni/69c4340e6663bb3e5c7193b860fce04e to your computer and use it in GitHub Desktop.
DIANNE annotation tool outputs xml annotations, this script was made to convert the dataset of the ROSE's chalange in COCO format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import xml.etree.ElementTree as ET | |
import json | |
import cv2 | |
from PIL import Image | |
def rose2COCO(coco_annotations, root_dir, name_ds, name_cls): | |
''' | |
root_dir = base directory of the dataset | |
name_ds = name of the dataset | |
name_cls = name of the class | |
''' | |
# Define the directory paths | |
cls_dir = os.path.realpath(f'{root_dir}/COCO/{name_cls}') | |
image_dir = 'src' | |
annotation_dir = 'ref' | |
years = ["2019","2020","2021"] | |
img_data = os.path.realpath(f'{cls_dir}/data') | |
# make the dir for the output | |
if not os.path.exists(img_data): os.makedirs(img_data, exist_ok=True) | |
# Initialize the category and annotation ID counter | |
category_id = 0 | |
annotation_id = 1 | |
# Loop over the directories | |
for year in years: | |
year_dir = os.path.join(root_dir, year) | |
image_set_dir = os.path.join(year_dir, image_dir, name_ds, name_cls) | |
annotation_set_dir = os.path.join(year_dir, annotation_dir, name_ds, name_cls) | |
for xml_ann in sorted(os.listdir(annotation_set_dir)): | |
tree = ET.parse(os.path.join(annotation_set_dir, xml_ann)) | |
root = tree.getroot() | |
# Extract the segmentation information | |
segmentations = [] | |
category_ids = [] | |
for clipping in root.findall('.//clipping'): | |
points = clipping.find('points') | |
segmentation = [] | |
for point in points.findall('point'): | |
segmentation.append(int(point.attrib['x'])) | |
segmentation.append(int(point.attrib['y'])) | |
segmentations.append(segmentation) | |
# Extract the category information | |
category = clipping.find('class').text | |
if category == "unknown": | |
category_ids.append(2) # unknown are clasified as weed, we avoid to distinguish between known weeds | |
else: | |
for cat in coco_annotations['categories']: | |
if category in cat['name']: | |
category_ids.append(cat['id']) | |
# Compute the bounding boxes | |
bboxes = [] | |
for segmentation in segmentations: | |
xmin = min(segmentation[::2]) | |
xmax = max(segmentation[::2]) | |
ymin = min(segmentation[1::2]) | |
ymax = max(segmentation[1::2]) | |
bbox = [xmin, ymin, xmax - xmin, ymax - ymin] | |
bboxes.append(bbox) | |
# Extract the image information | |
image_id = len(coco_annotations['images']) + 1 | |
file_name = xml_ann.replace('.xml', '.jpg') | |
file_path = os.path.realpath(os.path.join(image_set_dir, file_name)) | |
if not os.path.exists(file_path): | |
file_name = xml_ann.replace('.xml', '.png') | |
file_path = os.path.realpath(os.path.join(image_set_dir, file_name)) | |
if os.path.exists(file_path): | |
img_png = Image.open(file_path) | |
file_path = file_path.replace('.png', '.jpg') | |
img_png.save(file_path) | |
else: print(f"image not found {file_path}") | |
img = cv2.imread(file_path) | |
height, width = img.shape[:2] | |
image = { | |
'id': image_id, | |
'file_name': file_name, | |
'height': height, | |
'width': width, | |
} | |
coco_annotations['images'].append(image) | |
target = f"{img_data}/{file_name}" | |
if not os.path.exists(target): | |
os.symlink( file_path, target) | |
# Extract the annotation information | |
for i in range(len(segmentations)): | |
annotation_id = len(coco_annotations['annotations']) + 1 | |
annotation = { | |
'id': annotation_id, | |
'image_id': image_id, | |
'category_id': category_ids[i], | |
'segmentation': [segmentations[i]], | |
'bbox': bboxes[i], | |
'area': height*width, | |
'iscrowd': 0, | |
} | |
coco_annotations['annotations'].append(annotation) | |
#annotation_id += 1 | |
return coco_annotations | |
if __name__ == "__main__": | |
# Initialize the COCO annotations | |
coco_annotations = { | |
'licenses': [], | |
'info': {}, | |
"categories": [ | |
{ | |
"id": 1, | |
"name": "crop", | |
"supercategory": "" | |
}, | |
{ | |
"id": 2, | |
"name": "weed", | |
"supercategory": "" | |
} | |
], | |
'images': [], | |
'annotations': [], | |
} | |
root_dir = './dataset/rose' | |
ds_names = ['weedelec','bipbip','pead','roseau'] #, | |
cls_names = ['bean','maize'] | |
#cls_names = ['maize'] | |
for name_cls in cls_names: | |
print(name_cls) | |
coco_annotations['images'] = [] # reset annotations for each class | |
coco_annotations['annotations'] = [] | |
label_files = [] | |
#print(len(coco_annotations['images'])) | |
for name_ds in ds_names: | |
print(" " + name_ds) | |
coco_annotations = rose2COCO(coco_annotations, root_dir, name_ds, name_cls) | |
print("\n " + str(len(coco_annotations['images']))) | |
# Save the COCO annotations to a JSON file | |
cls_dir = os.path.realpath(f'{root_dir}/COCO/{name_cls}') | |
label_file = os.path.realpath(f'{cls_dir}/labels.json') | |
with open(label_file, 'w') as f: | |
json.dump(coco_annotations, f, indent=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment