Created
January 6, 2017 03:11
-
-
Save jon-barker/3833adf1b32eaeaa3e35137fe9693c80 to your computer and use it in GitHub Desktop.
Script to convert MS COCO annotations file to Kitti bounding box label files Edit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""coco2kitti.py: Converts MS COCO annotation files to | |
Kitti format bounding box label files | |
__author__ = "Jon Barker" | |
""" | |
import os | |
from pycocotools.coco import COCO | |
def coco2kitti(catNms, annFile): | |
# initialize COCO api for instance annotations | |
coco = COCO(annFile) | |
# Create an index for the category names | |
cats = coco.loadCats(coco.getCatIds()) | |
cat_idx = {} | |
for c in cats: | |
cat_idx[c['id']] = c['name'] | |
for img in coco.imgs: | |
# Get all annotation IDs for the image | |
catIds = coco.getCatIds(catNms=catNms) | |
annIds = coco.getAnnIds(imgIds=[img], catIds=catIds) | |
# If there are annotations, create a label file | |
if len(annIds) > 0: | |
# Get image filename | |
img_fname = coco.imgs[img]['file_name'] | |
# open text file | |
with open('./labels/' + img_fname.split('.')[0] + '.txt','w') as label_file: | |
anns = coco.loadAnns(annIds) | |
for a in anns: | |
bbox = a['bbox'] | |
# Convert COCO bbox coords to Kitti ones | |
bbox = [bbox[0], bbox[1], bbox[2] + bbox[0], bbox[3] + bbox[1]] | |
bbox = [str(b) for b in bbox] | |
catname = cat_idx[a['category_id']] | |
# Format line in label file | |
# Note: all whitespace will be removed from class names | |
out_str = [catname.replace(" ","") | |
+ ' ' + ' '.join(['0']*2) | |
+ ' ' + ' '.join([b for b in bbox]) | |
+ ' ' + ' '.join(['0']*8) | |
+'\n'] | |
label_file.write(out_str[0]) | |
if __name__ == '__main__': | |
# These settings assume this script is in the annotations directory | |
dataDir = '..' | |
dataType = 'train2014' | |
annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType) | |
# If this list is populated then label files will only be produced | |
# for images containing the listed classes and only the listed classes | |
# will be in the label file | |
# EXAMPLE: | |
#catNms = ['person', 'dog', 'skateboard'] | |
catNms = [] | |
# Check if a labels file exists and, if not, make one | |
# If it exists already, exit to avoid overwriting | |
if os.path.isdir('./labels'): | |
print('Labels folder already exists - exiting to prevent badness') | |
else: | |
os.mkdir('./labels') | |
coco2kitti(catNms, annFile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey there,
I've been trying to train the DetectNet on the MSCOCO Dataset and therefore used this script to convert the annotations. I have not seen any progress in learning and found, that this script only inserts two '0's after the class label on line 42.
However, the kitti format specification is given at https://github.com/NVIDIA/DIGITS/blob/master/digits/extensions/data/objectDetection/README.md
as
Values Name Description
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
1 score Only for results: Float, indicating confidence in
detection, needed for p/r curves, higher is better.
Here there are three values specified after the class label.
Am I missing a point or is this a bug?
cheers