-
-
Save travishsu/6efa5c9fb92ece37b4748036026342f6 to your computer and use it in GitHub Desktop.
import os | |
import json | |
import subprocess | |
import numpy as np | |
import pandas as pd | |
from skimage.measure import find_contours | |
class CocoDatasetHandler: | |
def __init__(self, jsonpath, imgpath): | |
with open(jsonpath, 'r') as jsonfile: | |
ann = json.load(jsonfile) | |
images = pd.DataFrame.from_dict(ann['images']).set_index('id') | |
annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id') | |
categories = pd.DataFrame.from_dict(ann['categories']).set_index('id') | |
annotations = annotations.merge(images, left_on='image_id', right_index=True) | |
annotations = annotations.merge(categories, left_on='category_id', right_index=True) | |
annotations = annotations.assign( | |
shapes=annotations.apply(self.coco2shape, axis=1)) | |
self.annotations = annotations | |
self.labelme = {} | |
self.imgpath = imgpath | |
self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name') | |
def coco2shape(self, row): | |
if row.iscrowd == 1: | |
shapes = self.rle2shape(row) | |
elif row.iscrowd == 0: | |
shapes = self.polygon2shape(row) | |
return shapes | |
def rle2shape(self, row): | |
rle, shape = row['segmentation']['counts'], row['segmentation']['size'] | |
mask = self._rle_decode(rle, shape) | |
padded_mask = np.zeros( | |
(mask.shape[0]+2, mask.shape[1]+2), | |
dtype=np.uint8, | |
) | |
padded_mask[1:-1, 1:-1] = mask | |
points = find_contours(mask, 0.5) | |
shapes = [ | |
[[int(point[1]), int(point[0])] for point in polygon] | |
for polygon in points | |
] | |
return shapes | |
def _rle_decode(self, rle, shape): | |
mask = np.zeros([shape[0] * shape[1]], np.bool) | |
for idx, r in enumerate(rle): | |
if idx < 1: | |
s = 0 | |
else: | |
s = sum(rle[:idx]) | |
e = s + r | |
if e == s: | |
continue | |
assert 0 <= s < mask.shape[0] | |
assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r) | |
if idx % 2 == 1: | |
mask[s:e] = 1 | |
# Reshape and transpose | |
mask = mask.reshape([shape[1], shape[0]]).T | |
return mask | |
def polygon2shape(self, row): | |
# shapes: (n_polygons, n_points, 2) | |
shapes = [ | |
[[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)] | |
for points in row.segmentation | |
] | |
return shapes | |
def coco2labelme(self): | |
fillColor = [255, 0, 0, 128] | |
lineColor = [0, 255, 0, 128] | |
groups = self.annotations.groupby('file_name') | |
for file_idx, (filename, df) in enumerate(groups): | |
record = { | |
'imageData': None, | |
'fillColor': fillColor, | |
'lineColor': lineColor, | |
'imagePath': filename, | |
'imageHeight': int(self.images.loc[filename].height), | |
'imageWidth': int(self.images.loc[filename].width), | |
} | |
record['shapes'] = [] | |
instance = { | |
'line_color': None, | |
'fill_color': None, | |
'shape_type': "polygon", | |
} | |
for inst_idx, (_, row) in enumerate(df.iterrows()): | |
for polygon in row.shapes: | |
copy_instance = instance.copy() | |
copy_instance.update({ | |
'label': row['name'], | |
'group_id': inst_idx, | |
'points': polygon | |
}) | |
record['shapes'].append(copy_instance) | |
if filename not in self.labelme.keys(): | |
self.labelme[filename] = record | |
def save_labelme(self, file_names, dirpath, save_json_only=False): | |
if not os.path.exists(dirpath): | |
os.makedirs(dirpath) | |
else: | |
raise ValueError(f"{dirpath} has existed") | |
for file in file_names: | |
filename = os.path.basename(os.path.splitext(file)[0]) | |
with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile: | |
json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2) | |
if not save_json_only: | |
subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath]) | |
ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/') | |
ds.coco2labelme() | |
ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014') |
@travishsu Thank you, sir, for your reply.
I have attached the files here https://drive.google.com/drive/folders/1cvPGxPGLCb-6fbDGVEHPDVX2bxPEvx3m?usp=sharing
Thank you!
This script might call polygon2shape(L68) since "iscrowd" is 0.
most of my images are crowded so should I fix it to 1? inorder to get the exact mask number?
Hi @manaswakchaure,
I found there are multiple lists nested in the value of segmentation so there'll be multiple converted masks for a single instance, and the converted masks will have the same group_id
.
Besides, using iscrowd=0
is correct if the value of segmentation is in polygon format instead of RLE format.
Dear sir,
Thank you so much!
I got it. I made some modifications for getting those multiple masks as one single instance under one grup_id as needed. And verified it!
Thank you so much for your time.
不知道發這邊好不好
想請問作者有考慮新增png to json嗎?
也就是mask.png 轉成 coco.json (png2coco)
我原本參考這位作者的程式碼執行,他的資料集確實可以跑
但用我的資料去跑,產生COCO裡面的segmentatiom的點的座標,有些是負值,導致無法開起來
https://github.com/chrise96/image-to-coco-json-converter
這是我的資料集檔案連結
https://drive.google.com/drive/folders/1butmjjGTgMIEr6bq1nQ3ejjm0M7oN_YR?usp=share_link
謝謝你
你好,我最後查出原因,只有把照片改成JPG即可
但有個極為困難的點,如果圖形是甜甜圈那種形狀,中間需要挖空
似乎coco json無法表示
用實際例子說明,假設有個正方形的農田,正中間有個農舍房子
因此農田polygon要正方形減去正中間農舍
但coco json的polygon的點,是輪廓組成,因此無法形成
不曉得你這邊有無辦法解決重疊的地方把它去除
Hi @stphtan94117,
我想 png2coco
可能不適合放在這個 coco2labelme
底下。
那位作者用的是多個 polygon 放在同一個 segmentation 且設定 iscrowd
為 0,
但如果有沒辦法用 polygon 表示的 instance,我想你可以考慮用將同一個 instance 的 mask 轉成 RLE format,且讓 iscrowd
設為 1
References
Hi @manaswakchaure,
Could you provide the entire value of the key "segmentation" in this example?
This script might call
polygon2shape
(L68) since "iscrowd" is 0.