-
-
Save travishsu/6efa5c9fb92ece37b4748036026342f6 to your computer and use it in GitHub Desktop.
import os | |
import json | |
import subprocess | |
import numpy as np | |
import pandas as pd | |
from skimage.measure import find_contours | |
class CocoDatasetHandler: | |
def __init__(self, jsonpath, imgpath): | |
with open(jsonpath, 'r') as jsonfile: | |
ann = json.load(jsonfile) | |
images = pd.DataFrame.from_dict(ann['images']).set_index('id') | |
annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id') | |
categories = pd.DataFrame.from_dict(ann['categories']).set_index('id') | |
annotations = annotations.merge(images, left_on='image_id', right_index=True) | |
annotations = annotations.merge(categories, left_on='category_id', right_index=True) | |
annotations = annotations.assign( | |
shapes=annotations.apply(self.coco2shape, axis=1)) | |
self.annotations = annotations | |
self.labelme = {} | |
self.imgpath = imgpath | |
self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name') | |
def coco2shape(self, row): | |
if row.iscrowd == 1: | |
shapes = self.rle2shape(row) | |
elif row.iscrowd == 0: | |
shapes = self.polygon2shape(row) | |
return shapes | |
def rle2shape(self, row): | |
rle, shape = row['segmentation']['counts'], row['segmentation']['size'] | |
mask = self._rle_decode(rle, shape) | |
padded_mask = np.zeros( | |
(mask.shape[0]+2, mask.shape[1]+2), | |
dtype=np.uint8, | |
) | |
padded_mask[1:-1, 1:-1] = mask | |
points = find_contours(mask, 0.5) | |
shapes = [ | |
[[int(point[1]), int(point[0])] for point in polygon] | |
for polygon in points | |
] | |
return shapes | |
def _rle_decode(self, rle, shape): | |
mask = np.zeros([shape[0] * shape[1]], np.bool) | |
for idx, r in enumerate(rle): | |
if idx < 1: | |
s = 0 | |
else: | |
s = sum(rle[:idx]) | |
e = s + r | |
if e == s: | |
continue | |
assert 0 <= s < mask.shape[0] | |
assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r) | |
if idx % 2 == 1: | |
mask[s:e] = 1 | |
# Reshape and transpose | |
mask = mask.reshape([shape[1], shape[0]]).T | |
return mask | |
def polygon2shape(self, row): | |
# shapes: (n_polygons, n_points, 2) | |
shapes = [ | |
[[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)] | |
for points in row.segmentation | |
] | |
return shapes | |
def coco2labelme(self): | |
fillColor = [255, 0, 0, 128] | |
lineColor = [0, 255, 0, 128] | |
groups = self.annotations.groupby('file_name') | |
for file_idx, (filename, df) in enumerate(groups): | |
record = { | |
'imageData': None, | |
'fillColor': fillColor, | |
'lineColor': lineColor, | |
'imagePath': filename, | |
'imageHeight': int(self.images.loc[filename].height), | |
'imageWidth': int(self.images.loc[filename].width), | |
} | |
record['shapes'] = [] | |
instance = { | |
'line_color': None, | |
'fill_color': None, | |
'shape_type': "polygon", | |
} | |
for inst_idx, (_, row) in enumerate(df.iterrows()): | |
for polygon in row.shapes: | |
copy_instance = instance.copy() | |
copy_instance.update({ | |
'label': row['name'], | |
'group_id': inst_idx, | |
'points': polygon | |
}) | |
record['shapes'].append(copy_instance) | |
if filename not in self.labelme.keys(): | |
self.labelme[filename] = record | |
def save_labelme(self, file_names, dirpath, save_json_only=False): | |
if not os.path.exists(dirpath): | |
os.makedirs(dirpath) | |
else: | |
raise ValueError(f"{dirpath} has existed") | |
for file in file_names: | |
filename = os.path.basename(os.path.splitext(file)[0]) | |
with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile: | |
json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2) | |
if not save_json_only: | |
subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath]) | |
ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/') | |
ds.coco2labelme() | |
ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014') |
Hi @yc0619, thanks for attention. It seems happened when we're dealing with crowd instances, because we're allowed to use directly the polygon points in COCO annotation if it's not a crowd instance.
In this script we use skimage.measure.find_contours
for finding polygon points from mask. Unfortunately I'm not familiar with the algorithm in this function enough to adjust the density.
Hi @travishsu thx for the reply! I will take a look on it. 👍
it's RUN!!!, labelme==4.5.7, python==3.6.5,
I am trying to convert COCO JSON format to LabelMe JSON format using this code but getting error [Errno 2] No such file or directory '" DataFile/lab/train/img/01F443WGRGW23MTN1ZD6KNFNSF.json'"
At first, I manually created a folder in the directory like "DataFile/lab/train/" in the same "DataFile" folder where my COCO JSON and image files exist.
"DataFile" contains
1.a single image folder 'img' where all the images exist (no splitting of the train, test and validation images)
2. a 'train.json', 'test.json' and Val.json
3.I created 'lab/train/' in it to save the LabelMe json format files.
I appreciate help to fix this problem.
Hi @fsultana44,
I guess the JSON-file DataFile/train.json has item with 2-level filepath on file_name, such as img/01F443WGRGW23MTN1ZD6KNFNSF.jpg.
If so, the quickest way to resolve this is to modify this line
116 filename = os.path.splitext(file)[0] # filename = 'img/01F443WGRGW23MTN1ZD6KNFNSF'
into
116 filename = os.path.basename(os.path.splitext(file)[0]) # filename = '01F443WGRGW23MTN1ZD6KNFNSF'
Thx for reporting this issue!
Hi @travishsu,
Thank you for your quick reply!
Yes, my DataFile/train.json has an item with a 2-level file path on file_name. After changing this line of code you mentioned, it worked, but the problem is that it only split one JSON file from the train.json file and shown another error. Here is the snapshot of this error and result files.
I am very new in Python programming, so I am very sorry for asking this stupid question again.
I am facing the same issue
My problem has been solved
Hi @travishsu,
Thank you for your quick reply!
Yes, my DataFile/train.json has an item with a 2-level file path on file_name. After changing this line of code you mentioned, it worked, but the problem is that it only split one JSON file from the train.json file and shown another error. Here is the snapshot of this error and result files.
I am very new in Python programming, so I am very sorry for asking this stupid question again.
This problem has been solved while I restart my computer and run this code again.
This problem has been solved while I restart my computer and run this code again.
Hey @TravisHu,
Perfectly , transferred the coco data set into a labelme format , but could not view in labelme, any idea?
Got this error everytime I try to load the image with labelme json files.
Hello @TuarAnup, sorry for the late reply.
I guess you were using save_json_only=True
but didn't copy images to the same directory with output JSONs.
In Line-83 of this script, we set imageData
to None, so we need a copy image placed with JSONs.
So if you were using save_json_only=True
, try to copy images to the same directory with output JSONs.
Or try to set save_json_only=False
.
Thx
Hello sir,
Thank you so much for your work!
I had this problem, the segmentation of one mask was converted into multiple segmentation.
for ex
COCO json file...
{
"id": 2,
"image_id": 1,
"segmentation": [
**[
]
[
]
]**
]
"iscrowd": 0,
"bbox": [
762.0,
496.0,
10.0,
10.0
],
"area": 83,
"category_id": 1394339
above segmentation has 3 masks of the same object.
When we convert it to label me
each of those masks is consider as separate segemntation
why so??
Hello @stphtan94117,
Hi @manaswakchaure,
Could you provide the entire value of the key "segmentation" in this example?
This script might call polygon2shape
(L68) since "iscrowd" is 0.
@travishsu Thank you, sir, for your reply.
I have attached the files here https://drive.google.com/drive/folders/1cvPGxPGLCb-6fbDGVEHPDVX2bxPEvx3m?usp=sharing
Thank you!
This script might call polygon2shape(L68) since "iscrowd" is 0.
most of my images are crowded so should I fix it to 1? inorder to get the exact mask number?
Hi @manaswakchaure,
I found there are multiple lists nested in the value of segmentation so there'll be multiple converted masks for a single instance, and the converted masks will have the same group_id
.
Besides, using iscrowd=0
is correct if the value of segmentation is in polygon format instead of RLE format.
Dear sir,
Thank you so much!
I got it. I made some modifications for getting those multiple masks as one single instance under one grup_id as needed. And verified it!
Thank you so much for your time.
不知道發這邊好不好
想請問作者有考慮新增png to json嗎?
也就是mask.png 轉成 coco.json (png2coco)
我原本參考這位作者的程式碼執行,他的資料集確實可以跑
但用我的資料去跑,產生COCO裡面的segmentatiom的點的座標,有些是負值,導致無法開起來
https://github.com/chrise96/image-to-coco-json-converter
這是我的資料集檔案連結
https://drive.google.com/drive/folders/1butmjjGTgMIEr6bq1nQ3ejjm0M7oN_YR?usp=share_link
謝謝你
你好,我最後查出原因,只有把照片改成JPG即可
但有個極為困難的點,如果圖形是甜甜圈那種形狀,中間需要挖空
似乎coco json無法表示
用實際例子說明,假設有個正方形的農田,正中間有個農舍房子
因此農田polygon要正方形減去正中間農舍
但coco json的polygon的點,是輪廓組成,因此無法形成
不曉得你這邊有無辦法解決重疊的地方把它去除
Hi @stphtan94117,
我想 png2coco
可能不適合放在這個 coco2labelme
底下。
那位作者用的是多個 polygon 放在同一個 segmentation 且設定 iscrowd
為 0,
但如果有沒辦法用 polygon 表示的 instance,我想你可以考慮用將同一個 instance 的 mask 轉成 RLE format,且讓 iscrowd
設為 1
References
Hi,
thanks for the sharing, for me it runs also by the labelme 4.5.7. But i have a question. Which parameter could i adjust the density these points? Mine was too much. Thx again! :) 👍