Last active
July 19, 2024 01:51
-
-
Save travishsu/6efa5c9fb92ece37b4748036026342f6 to your computer and use it in GitHub Desktop.
Convert COCO format segmentation annotation to LabelMe format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import subprocess | |
import numpy as np | |
import pandas as pd | |
from skimage.measure import find_contours | |
class CocoDatasetHandler: | |
def __init__(self, jsonpath, imgpath): | |
with open(jsonpath, 'r') as jsonfile: | |
ann = json.load(jsonfile) | |
images = pd.DataFrame.from_dict(ann['images']).set_index('id') | |
annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id') | |
categories = pd.DataFrame.from_dict(ann['categories']).set_index('id') | |
annotations = annotations.merge(images, left_on='image_id', right_index=True) | |
annotations = annotations.merge(categories, left_on='category_id', right_index=True) | |
annotations = annotations.assign( | |
shapes=annotations.apply(self.coco2shape, axis=1)) | |
self.annotations = annotations | |
self.labelme = {} | |
self.imgpath = imgpath | |
self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name') | |
def coco2shape(self, row): | |
if row.iscrowd == 1: | |
shapes = self.rle2shape(row) | |
elif row.iscrowd == 0: | |
shapes = self.polygon2shape(row) | |
return shapes | |
def rle2shape(self, row): | |
rle, shape = row['segmentation']['counts'], row['segmentation']['size'] | |
mask = self._rle_decode(rle, shape) | |
padded_mask = np.zeros( | |
(mask.shape[0]+2, mask.shape[1]+2), | |
dtype=np.uint8, | |
) | |
padded_mask[1:-1, 1:-1] = mask | |
points = find_contours(mask, 0.5) | |
shapes = [ | |
[[int(point[1]), int(point[0])] for point in polygon] | |
for polygon in points | |
] | |
return shapes | |
def _rle_decode(self, rle, shape): | |
mask = np.zeros([shape[0] * shape[1]], np.bool) | |
for idx, r in enumerate(rle): | |
if idx < 1: | |
s = 0 | |
else: | |
s = sum(rle[:idx]) | |
e = s + r | |
if e == s: | |
continue | |
assert 0 <= s < mask.shape[0] | |
assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r) | |
if idx % 2 == 1: | |
mask[s:e] = 1 | |
# Reshape and transpose | |
mask = mask.reshape([shape[1], shape[0]]).T | |
return mask | |
def polygon2shape(self, row): | |
# shapes: (n_polygons, n_points, 2) | |
shapes = [ | |
[[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)] | |
for points in row.segmentation | |
] | |
return shapes | |
def coco2labelme(self): | |
fillColor = [255, 0, 0, 128] | |
lineColor = [0, 255, 0, 128] | |
groups = self.annotations.groupby('file_name') | |
for file_idx, (filename, df) in enumerate(groups): | |
record = { | |
'imageData': None, | |
'fillColor': fillColor, | |
'lineColor': lineColor, | |
'imagePath': filename, | |
'imageHeight': int(self.images.loc[filename].height), | |
'imageWidth': int(self.images.loc[filename].width), | |
} | |
record['shapes'] = [] | |
instance = { | |
'line_color': None, | |
'fill_color': None, | |
'shape_type': "polygon", | |
} | |
for inst_idx, (_, row) in enumerate(df.iterrows()): | |
for polygon in row.shapes: | |
copy_instance = instance.copy() | |
copy_instance.update({ | |
'label': row['name'], | |
'group_id': inst_idx, | |
'points': polygon | |
}) | |
record['shapes'].append(copy_instance) | |
if filename not in self.labelme.keys(): | |
self.labelme[filename] = record | |
def save_labelme(self, file_names, dirpath, save_json_only=False): | |
if not os.path.exists(dirpath): | |
os.makedirs(dirpath) | |
else: | |
raise ValueError(f"{dirpath} has existed") | |
for file in file_names: | |
filename = os.path.basename(os.path.splitext(file)[0]) | |
with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile: | |
json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2) | |
if not save_json_only: | |
subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath]) | |
ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/') | |
ds.coco2labelme() | |
ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014') |
不知道發這邊好不好
想請問作者有考慮新增png to json嗎?
也就是mask.png 轉成 coco.json (png2coco)
我原本參考這位作者的程式碼執行,他的資料集確實可以跑
但用我的資料去跑,產生COCO裡面的segmentatiom的點的座標,有些是負值,導致無法開起來
https://github.com/chrise96/image-to-coco-json-converter
這是我的資料集檔案連結
https://drive.google.com/drive/folders/1butmjjGTgMIEr6bq1nQ3ejjm0M7oN_YR?usp=share_link
謝謝你
你好,我最後查出原因,只有把照片改成JPG即可
但有個極為困難的點,如果圖形是甜甜圈那種形狀,中間需要挖空
似乎coco json無法表示
用實際例子說明,假設有個正方形的農田,正中間有個農舍房子
因此農田polygon要正方形減去正中間農舍
但coco json的polygon的點,是輪廓組成,因此無法形成
不曉得你這邊有無辦法解決重疊的地方把它去除
Hi @stphtan94117,
我想 png2coco
可能不適合放在這個 coco2labelme
底下。
那位作者用的是多個 polygon 放在同一個 segmentation 且設定 iscrowd
為 0,
但如果有沒辦法用 polygon 表示的 instance,我想你可以考慮用將同一個 instance 的 mask 轉成 RLE format,且讓 iscrowd
設為 1
References
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dear sir,
Thank you so much!
I got it. I made some modifications for getting those multiple masks as one single instance under one grup_id as needed. And verified it!
Thank you so much for your time.