Skip to content

Instantly share code, notes, and snippets.

@wesslen
Last active April 6, 2023 13:45
Show Gist options
  • Select an option

  • Save wesslen/f345e7155f2ce7800e52bdda66a2c9b2 to your computer and use it in GitHub Desktop.

Select an option

Save wesslen/f345e7155f2ce7800e52bdda66a2c9b2 to your computer and use it in GitHub Desktop.
Convert Prodigy jsonl bounding box annotations to coco format
import json
from typing import List
import srsly
import typer
app = typer.Typer()
def convert_to_coco(input_file: str, output_file: str):
# Load the JSONL file using srsly
data = list(srsly.read_jsonl(input_file))
# Define the COCO dictionary structure
coco_dict = {
"info": {},
"licenses": [],
"categories": [],
"images": [],
"annotations": []
}
# Define the category mapping
category_map = {}
# Loop through each image in the JSONL file
for image_idx, image_data in enumerate(data):
# Add the image information to the COCO dictionary
image_dict = {
"id": image_idx + 1,
"width": image_data["width"],
"height": image_data["height"],
"file_name": image_data["image"]
}
coco_dict["images"].append(image_dict)
# Loop through each span in the image data
for span in image_data["spans"]:
# Add the category to the category mapping if it doesn't exist
if span["label"] not in category_map:
category_dict = {
"id": len(category_map) + 1,
"name": span["label"],
"supercategory": ""
}
category_map[span["label"]] = category_dict
coco_dict["categories"].append(category_dict)
# Add the annotation information to the COCO dictionary
annotation_dict = {
"id": len(coco_dict["annotations"]) + 1,
"image_id": image_dict["id"],
"category_id": category_map[span["label"]]["id"],
"iscrowd": 0
}
# Add segmentation information if it exists
if span["type"] == "polygon":
annotation_dict["segmentation"] = [span["points"]]
elif span["type"] == "rect":
annotation_dict["bbox"] = [span["x"], span["y"], span["width"], span["height"]]
annotation_dict["area"] = span["width"] * span["height"]
annotation_dict["segmentation"] = [[
span["x"], span["y"], span["x"], span["y"] + span["height"],
span["x"] + span["width"], span["y"] + span["height"], span["x"] + span["width"], span["y"]
]]
coco_dict["annotations"].append(annotation_dict)
# Save the COCO dictionary to a JSON file
with open(output_file, 'w') as f:
json.dump(coco_dict, f)
@app.command()
def main(input_file: str, output_file: str):
"""
Convert a Prodigy Bounding Box JSONL file to COCO format.
Args:
input_file: The path to the input Prodigy Bounding Box JSONL file.
output_file: The path to the output COCO JSON file.
"""
convert_to_coco(input_file, output_file)
if __name__ == "__main__":
app()
{"image":"https://images.unsplash.com/photo-1434993568367-36f24aa04d2f?w=400","width":400,"height":267,"spans":[{"label":"SKATEBOARD","color":"yellow","x":47.5,"y":171.4,"width":109.1,"height":67.4,"points":[[47.5,171.4],[47.5,238.8],[156.6,238.8],[156.6,171.4]],"center":[102.05,205.1],"type":"rect"},{"label":"PERSON","color":"cyan","points":[[256,39.5],[237,78.5],[269,116.5],[286,67.5]],"type":"polygon"}]}
{"image":"https://images.unsplash.com/photo-1434993568367-36f24aa04d2f?w=500","width":500,"height":267,"spans":[{"label":"SKATEBOARD","color":"yellow","x":47.5,"y":171.4,"width":109.1,"height":67.4,"points":[[47.5,171.4],[47.5,238.8],[156.6,238.8],[156.6,171.4]],"center":[102.05,205.1],"type":"rect"},{"label":"PERSON","color":"cyan","points":[[256,39.5],[237,78.5],[269,116.5],[286,67.5]],"type":"polygon"}]}
{"info": {}, "licenses": [], "categories": [{"id": 1, "name": "SKATEBOARD", "supercategory": ""}, {"id": 2, "name": "PERSON", "supercategory": ""}], "images": [{"id": 1, "width": 400, "height": 267, "file_name": "https://images.unsplash.com/photo-1434993568367-36f24aa04d2f?w=400"}, {"id": 2, "width": 500, "height": 267, "file_name": "https://images.unsplash.com/photo-1434993568367-36f24aa04d2f?w=500"}], "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "iscrowd": 0, "bbox": [47.5, 171.4, 109.1, 67.4], "area": 7353.34, "segmentation": [[47.5, 171.4, 47.5, 238.8, 156.6, 238.8, 156.6, 171.4]]}, {"id": 2, "image_id": 1, "category_id": 2, "iscrowd": 0, "segmentation": [[[256, 39.5], [237, 78.5], [269, 116.5], [286, 67.5]]]}, {"id": 3, "image_id": 2, "category_id": 1, "iscrowd": 0, "bbox": [47.5, 171.4, 109.1, 67.4], "area": 7353.34, "segmentation": [[47.5, 171.4, 47.5, 238.8, 156.6, 238.8, 156.6, 171.4]]}, {"id": 4, "image_id": 2, "category_id": 2, "iscrowd": 0, "segmentation": [[[256, 39.5], [237, 78.5], [269, 116.5], [286, 67.5]]]}]}
@wesslen
Copy link
Copy Markdown
Author

wesslen commented Apr 4, 2023

To run: python jsonl-to-coco.py prodigy-boundingbox-input.jsonl prodigy-coco.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment