Skip to content

Instantly share code, notes, and snippets.

@cuuupid
Last active February 17, 2020 20:17
Show Gist options
  • Save cuuupid/2766f3ae09d1f325eced190dc5bba10d to your computer and use it in GitHub Desktop.
Save cuuupid/2766f3ae09d1f325eced190dc5bba10d to your computer and use it in GitHub Desktop.
Labelbox to YOLOnet
import sys, os
import json
import glob
from PIL import Image
USAGE = "python labelbox_label.py /path/to/training/image/directory/ /path/to/labelbox/exported/xy/data.json"
assert len(sys.argv) > 2, f'Insufficient arguments: {USAGE}'
args = sys.argv[1:] if '.py' in sys.argv[0] else sys.argv[0:]
assert(len(args) == 2), f'Incorrect usage: {USAGE}'
IMAGE_DIR = args[0]
JSON_FILE = args[1]
with open(JSON_FILE) as f:
data = json.load(f)
print(f'Found {len(data)} datapoints.')
images = glob.glob(f'{IMAGE_DIR}/*.jpg')
print(f'Found {len(images)} images.')
classes = list(set(str(key) for d in data for key in d["Label"].keys()))
classes.remove('START')
print(f'Found {len(classes)} classes.')
def convert_bbox(bbox, width, height, offset_x=0, offset_y=0):
x = min([coord["x"] for coord in bbox])
y = min([coord["y"] for coord in bbox])
w = max([coord["x"] for coord in bbox]) - x
h = max([coord["y"] for coord in bbox]) - y
x += w/2
y += h/2
x -= max(offset_x, 0)
y = max(offset_y - y, 0)
return x/width, y/height, w/width, h/height
for image in images:
# Trim the extension for our use case
image = image.split('/')[-1]
# Get the width and height of the original image
img = Image.open(f'{IMAGE_DIR}/{image}')
width, height = img.size[:2]
width, height = int(width), int(height)
# Next we look up the image in the JSON data
annotation = [d for d in data if image.split(' ')[0] in d["Labeled Data"]][0]
with open(f'{IMAGE_DIR}/{image[:image.rfind(".")]}.txt', 'w') as label:
startbox = annotation["Label"]["START"][0]
offset_x, offset_y = startbox["x"], startbox["y"]
for bbox in annotation["Label"].keys():
if bbox == "START": continue
x, y, w, h = convert_bbox(annotation["Label"][bbox][0], width, height, offset_x, offset_y)
label.write(f'{classes.index(bbox)} {x} {y} {w} {h}\n')
print(f'{image}: {len(annotation["Label"].keys())} annotations')
with open(f'{IMAGE_DIR}/train.txt', 'w') as image_paths:
for image in images:
image_paths.write(f'{image}\n')
with open(f'{IMAGE_DIR}/custom.names', 'w') as names:
for classname in classes:
names.write(f'{classname}\n')
with open(f'{IMAGE_DIR}/custom.cfg', 'w') as config:
config.write(f'classes={len(classes)}\n')
config.write(f'train={IMAGE_DIR}/train.txt\n')
config.write(f'valid={IMAGE_DIR}/train.txt\n') # TODO: add support for validation set
config.write(f'names={IMAGE_DIR}/custom.names\n')
config.write(f'backup=backup') # TODO: add custom checkpointing directory
print("Change the classes at the bottom of and in the middle of the architecture configuration you're using to match the number of classes you have.")
print("In the same file also change filters of the first conv. layer, the value should be num/3*(classes+5)")
@getcontrol
Copy link

python labelbox_label.py images/ export.json

where images/ contains my images that were annotations and export.json is the labelbox json export for those labelled images.

Get this error :

Found 5 datapoints.
Found 0 images.
Traceback (most recent call last):
  File "labelbox_label.py", line 24, in <module>
    classes.remove('START')
ValueError: list.remove(x): x not in list

@getcontrol
Copy link

Fixed one issue, I had .png in images instead of .jpg .

Still an error in output


Found 5 datapoints.
Found 5 images.
Traceback (most recent call last):
  File "labelbox_label.py", line 24, in <module>
    classes.remove('START')
ValueError: list.remove(x): x not in list

@cuuupid
Copy link
Author

cuuupid commented Dec 3, 2019

I unfortunately wrote this script a very long time ago so Labelbox may have changed their output in the meantime. If there is no longer a start token in the output you can just remove this line :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment