Last active
July 22, 2022 09:25
-
-
Save AlexeyGy/5e9c5a177db31569c20c76ad4dc39284 to your computer and use it in GitHub Desktop.
Convert MS COCO Annotation to Pascal VOC format for python3 and with tqdm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import baker | |
import json | |
import os | |
from cytoolz import merge, join, groupby | |
from cytoolz.compatibility import iteritems | |
from cytoolz.curried import update_in | |
from itertools import starmap | |
from collections import deque | |
from lxml import etree, objectify | |
from scipy.io import savemat | |
from scipy.ndimage import imread | |
from pathlib import Path | |
from tqdm import tqdm | |
def keyjoin(leftkey, leftseq, rightkey, rightseq): | |
return starmap(merge, join(leftkey, leftseq, rightkey, rightseq)) | |
def root(folder, filename, width, height): | |
E = objectify.ElementMaker(annotate=False) | |
return E.annotation( | |
E.folder(folder), | |
E.filename(filename), | |
E.source( | |
E.database('MS COCO 2014'), | |
E.annotation('MS COCO 2014'), | |
E.image('Flickr'), | |
), | |
E.size( | |
E.width(width), | |
E.height(height), | |
E.depth(3), | |
), | |
E.segmented(0) | |
) | |
def instance_to_xml(anno): | |
E = objectify.ElementMaker(annotate=False) | |
xmin, ymin, width, height = anno['bbox'] | |
return E.object( | |
E.name(anno['category_id']), | |
E.bndbox( | |
E.xmin(xmin), | |
E.ymin(ymin), | |
E.xmax(xmin+width), | |
E.ymax(ymin+height), | |
), | |
) | |
@baker.command | |
def write_categories(coco_annotation, dst): | |
with open(os.path.abspath(coco_annotation)) as file: | |
content = json.load(file) | |
categories = tuple( d['name'] for d in content['categories']) | |
savemat(os.path.abspath(dst), {'categories': categories}) | |
def get_instances(coco_annotation): | |
coco_annotation = os.path.abspath(coco_annotation) | |
with open(coco_annotation) as file: | |
content = json.load(file) | |
categories = {d['id']: d['name'] for d in content['categories']} | |
return categories, tuple(keyjoin('id', content['images'], 'image_id', content['annotations'])) | |
def rename(name, year=2014): | |
out_name = os.path.splitext(name)[0] | |
# out_name = out_name.split('_')[-1] | |
# out_name = '{}_{}'.format(year, out_name) | |
return out_name | |
@baker.command | |
def create_imageset(annotations, dst): | |
annotations = os.path.abspath(annotations) | |
dst = os.path.abspath(dst) | |
val_txt = os.path.join(dst,'val.txt') | |
train_txt = os.path.join(dst, 'train.txt') | |
for val in annotations.listdir('*val*'): | |
val_txt.write_text('{}\n'.format(os.path.splitext(val.basename())[0]), append=True) | |
for train in annotations.listdir('*train*'): | |
train_txt.write_text('{}\n'.format(os.path.splitext(train.basename())[0]), append=True) | |
@baker.command | |
def create_annotations(dbPath, subset, dst='annotations_voc'): | |
""" converts annotations from coco to voc pascal. | |
parameters: | |
dbPath: folder which contains the annotations subfolder which contains the annotations file in .json format. | |
Note: the corresponding images should be in the train2014 or val2014 subfolder. | |
subset: which of the .json files should be opened e.g. train for the "instances_train2014.json" file | |
dst: destination folder for the annotations. Will be created if it doesn't exist e.g. "annotations_voc" | |
""" | |
if not os.path.exists(dst): | |
os.makedirs(dst) | |
annotations_path = os.path.join(os.path.abspath(dbPath),'annotations','instances_'+str(subset)+'2014.json') | |
images_Path = os.path.join(os.path.abspath(dbPath),str(subset)+'2014') | |
print("reading data...") | |
categories , instances= get_instances(annotations_path) | |
print("finished reading data") | |
dst = os.path.abspath(dst) | |
for i, instance in tqdm(enumerate(instances),desc="rewriting categories"): | |
instances[i]['category_id'] = categories[instance['category_id']] | |
for name, group in tqdm(iteritems(groupby('file_name', instances)), total=len(groupby('file_name', instances)), desc="processing annotations"): | |
img = imread(os.path.abspath(os.path.join(images_Path,name))) | |
if img.ndim == 3: | |
out_name = rename(name) | |
annotation = root('VOC2014', '{}.jpg'.format(out_name), group[0]['height'], group[0]['width']) | |
for instance in group: | |
annotation.append(instance_to_xml(instance)) | |
etree.ElementTree(annotation).write(os.path.join(dst, '{}.xml'.format(out_name))) | |
#print( out_name) | |
#else: | |
#print (instance['file_name']) | |
if __name__ == '__main__': | |
baker.run() |
Ah, thanks! This is useful.
Do you have any license or copyright snippet?
Hey Samuel, I would use the same license as the creator, the MIT license.
https://github.com/experiencor/keras-yolo2/blob/master/LICENSE
@AlexeyGy can you write command file to use this script, thanks you so much
Check out the baker docs here.
An example invocation could be:
coco2pascal.py create_annotations /home/somePathContainingAnnotationsFile train
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
update: now creates the output dir automatically