-
-
Save goodhamgupta/7ca514458d24af980669b8b1c8bcdafd to your computer and use it in GitHub Desktop.
# Script to convert yolo annotations to voc format | |
# Sample format | |
# <annotation> | |
# <folder>_image_fashion</folder> | |
# <filename>brooke-cagle-39574.jpg</filename> | |
# <size> | |
# <width>1200</width> | |
# <height>800</height> | |
# <depth>3</depth> | |
# </size> | |
# <segmented>0</segmented> | |
# <object> | |
# <name>head</name> | |
# <pose>Unspecified</pose> | |
# <truncated>0</truncated> | |
# <difficult>0</difficult> | |
# <bndbox> | |
# <xmin>549</xmin> | |
# <ymin>251</ymin> | |
# <xmax>625</xmax> | |
# <ymax>335</ymax> | |
# </bndbox> | |
# </object> | |
# <annotation> | |
import os | |
import xml.etree.cElementTree as ET | |
from PIL import Image | |
ANNOTATIONS_DIR_PREFIX = "annotations" | |
DESTINATION_DIR = "converted_labels" | |
CLASS_MAPPING = { | |
'0': 'name' | |
# Add your remaining classes here. | |
} | |
def create_root(file_prefix, width, height): | |
root = ET.Element("annotations") | |
ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix) | |
ET.SubElement(root, "folder").text = "images" | |
size = ET.SubElement(root, "size") | |
ET.SubElement(size, "width").text = str(width) | |
ET.SubElement(size, "height").text = str(height) | |
ET.SubElement(size, "depth").text = "3" | |
return root | |
def create_object_annotation(root, voc_labels): | |
for voc_label in voc_labels: | |
obj = ET.SubElement(root, "object") | |
ET.SubElement(obj, "name").text = voc_label[0] | |
ET.SubElement(obj, "pose").text = "Unspecified" | |
ET.SubElement(obj, "truncated").text = str(0) | |
ET.SubElement(obj, "difficult").text = str(0) | |
bbox = ET.SubElement(obj, "bndbox") | |
ET.SubElement(bbox, "xmin").text = str(voc_label[1]) | |
ET.SubElement(bbox, "ymin").text = str(voc_label[2]) | |
ET.SubElement(bbox, "xmax").text = str(voc_label[3]) | |
ET.SubElement(bbox, "ymax").text = str(voc_label[4]) | |
return root | |
def create_file(file_prefix, width, height, voc_labels): | |
root = create_root(file_prefix, width, height) | |
root = create_object_annotation(root, voc_labels) | |
tree = ET.ElementTree(root) | |
tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix)) | |
def read_file(file_path): | |
file_prefix = file_path.split(".txt")[0] | |
image_file_name = "{}.jpg".format(file_prefix) | |
img = Image.open("{}/{}".format("images", image_file_name)) | |
w, h = img.size | |
with open(file_path, 'r') as file: | |
lines = file.readlines() | |
voc_labels = [] | |
for line in lines: | |
voc = [] | |
line = line.strip() | |
data = line.split() | |
voc.append(CLASS_MAPPING.get(data[0])) | |
bbox_width = float(data[3]) * w | |
bbox_height = float(data[4]) * h | |
center_x = float(data[1]) * w | |
center_y = float(data[2]) * h | |
voc.append(center_x - (bbox_width / 2)) | |
voc.append(center_y - (bbox_height / 2)) | |
voc.append(center_x + (bbox_width / 2)) | |
voc.append(center_y + (bbox_height / 2)) | |
voc_labels.append(voc) | |
create_file(file_prefix, w, h, voc_labels) | |
print("Processing complete for file: {}".format(file_path)) | |
def start(dir_name): | |
if not os.path.exists(DESTINATION_DIR): | |
os.makedirs(DESTINATION_DIR) | |
for filename in os.listdir(ANNOTATIONS_DIR_PREFIX): | |
if filename.endswith('txt'): | |
read_file(filename) | |
else: | |
print("Skipping file: {}".format(filename)) | |
if __name__ == "__main__": | |
start() |
Hi people! Pay attention to the image extension. In my case, there was an extension .jpg (lower case), and I had to replace .JPEG so that the script worked fine.
A useful tip:
- Make sure that you have all your images in a folder name images in the same directory where you execute this script
I would like to suggest one edit on line #78
Change
with open(file_path, 'r') as file:
To
with open(os.path.join(ANNOTATIONS_DIR_PREFIX,file_path), 'r') as file:
Otherwise script generates an error, as it tries to read the filename (label .txt file) without complete path.
@khalidw
thank you very much. Helped a lot.
@devika-28
Added a "if" in line 85. So the "List Index Out Of Range Error" is fixed
if len(data) == 0: continue;
A useful tip: this is the script to change the file names from jpg to .JPEG
import os, sys
dir =("C:\Users\ashwa\Downloads\Downloads\visiolab-food-detection\train\labels")
for i in os.listdir(dir):
files = os.path.join(dir,i)
split= os.path.splitext(files)
if split[1]=='.jpg':
os.rename(files,split[0]+'.JPEG')
ANNOTATIONS_DIR_PREFIX = "./Database1"
and change the Database1 name in the program with the respective folder name
Hey guys, I edited the code a little so you can apply it to your data. Jut set directories and class names as you wish:
# Script to convert yolo annotations to voc format
import os
import xml.etree.cElementTree as ET
from PIL import Image
from math import floor
###########################
#set directories and class names
###########################
ANNOTATIONS_DIR_PREFIX = "D:/DeepData/DeepHemato/Data/Train/yoloAnots"
IMAGE_DIR_PREFIX = "D:/DeepData/DeepHemato/Data/Train/images"
imgExt = "png"
imgChnls = 3 #RGB:3 ; Grayscale:1
DESTINATION_DIR = "D:/DeepData/DeepHemato/Data/Train/vocAnots"
CLASS_MAPPING = {
'0': 'Neutrophil',
'1': 'Eosinophil',
'2': 'Basophil',
'3': 'Lymphocyte',
'4': 'Monocyte',
'5': 'Denature'
}
###########################
def create_root(file_prefix, width, height):
root = ET.Element("annotations")
ET.SubElement(root, "filename").text = "{}.{}".format(file_prefix,imgExt)
ET.SubElement(root, "folder").text = "{}/{}.{}".format(IMAGE_DIR_PREFIX,file_prefix,imgExt)
size = ET.SubElement(root, "size")
ET.SubElement(size, "width").text = str(width)
ET.SubElement(size, "height").text = str(height)
ET.SubElement(size, "depth").text = str(imgChnls)
return root
def create_object_annotation(root, voc_labels):
for voc_label in voc_labels:
obj = ET.SubElement(root, "object")
ET.SubElement(obj, "name").text = voc_label[0]
ET.SubElement(obj, "pose").text = "Unspecified"
ET.SubElement(obj, "truncated").text = str(0)
ET.SubElement(obj, "difficult").text = str(0)
bbox = ET.SubElement(obj, "bndbox")
ET.SubElement(bbox, "xmin").text = str(voc_label[1])
ET.SubElement(bbox, "ymin").text = str(voc_label[2])
ET.SubElement(bbox, "xmax").text = str(voc_label[3])
ET.SubElement(bbox, "ymax").text = str(voc_label[4])
return root
def create_file(file_prefix, width, height, voc_labels):
root = create_root(file_prefix, width, height)
root = create_object_annotation(root, voc_labels)
tree = ET.ElementTree(root)
tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix))
def read_file(file_path):
file_prefix = file_path.split(".txt")[0]
image_file_name = "{}.{}".format(file_prefix,imgExt)
img = Image.open("{}/{}".format(IMAGE_DIR_PREFIX, image_file_name))
print(img)
w, h = img.size
prueba = "{}/{}".format(ANNOTATIONS_DIR_PREFIX, file_path)
print(prueba)
with open(prueba) as file:
lines = file.readlines()
voc_labels = []
for line in lines:
voc = []
line = line.strip()
data = line.split()
voc.append(CLASS_MAPPING.get(data[0]))
bbox_width = float(data[3]) * w
bbox_height = float(data[4]) * h
center_x = float(data[1]) * w
center_y = float(data[2]) * h
voc.append(floor(center_x - (bbox_width / 2)))
voc.append(floor(center_y - (bbox_height / 2)))
voc.append(floor(center_x + (bbox_width / 2)))
voc.append(floor(center_y + (bbox_height / 2)))
voc_labels.append(voc)
create_file(file_prefix, w, h, voc_labels)
print("Processing complete for file: {}".format(file_path))
def start():
if not os.path.exists(DESTINATION_DIR):
os.makedirs(DESTINATION_DIR)
for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
if filename.endswith('txt'):
try:
PathFileName = "{}/{}".format(ANNOTATIONS_DIR_PREFIX, filename)
if os.stat(PathFileName).st_size > 0:
print("Si")
read_file(filename)
except:
print("No")
else:
print("Skipping file: {}".format(filename))
if __name__ == "__main__":
start()
for VOC format, do we not need a xml file for a image that has no object at all? cause from the code I assume that it passed cases where txt is empty which means there is no object in the picture.
Yes Even if the class/object you want is not present in the image. You need XML which has the annotation for no object. Regards, Usman Khalid Mian, 20100061.
…
Thank you! But the code in this repo does not generate xml for non-object image. where it ignored YOLO empty txt file, which represents the non-object images. Is there any way to fix this issue?
You need to create annotation for that. Don't create bounding box. Just annotate it as no object. And it will save that as an XML
…
Thank you! I guess I did not phrase my question well, I have already created a yolo label for images without object, and the label file is an empty file, but the code here will omit cases when the txt file is empty, so what should I save in label.txt for yolo label when there is no object so that this code will work and transfer to an empty xml file?
You need to create annotation for that. Don't create bounding box. Just annotate it as no object. And it will save that as an XML
…Thank you! I guess I did not phrase my question well, I have already created a yolo label for images without object, and the label file is an empty file, but the code here will omit cases when the txt file is empty, so what should I save in label.txt for yolo label when there is no object so that this code will work and transfer to an empty xml file?
A useful tip:
- Make sure that you have all your images in a folder name images in the same directory where you execute this script
I would like to suggest one edit on line #78
Change
with open(file_path, 'r') as file:
To
with open(os.path.join(ANNOTATIONS_DIR_PREFIX,file_path), 'r') as file:
Otherwise script generates an error, as it tries to read the filename (label .txt file) without complete path.
Absolutely right. Congratulations. Code is working with this plugin. Also, it is mandatory to use a round in voc.append calculation. XML does not accept comma numbers. labeling map is not available with this reason. If;
voc.append (round (center_x - (bbox_width / 2)))
voc.append (round (center_y - (bbox_height / 2)))
voc.append (round (center_x + (bbox_width / 2)))
voc.append (round (center_y + (bbox_height / 2)))
If you use the form, your data will be easily tagged.
full working of code. thanks @goodhamgupta
import os
import xml.etree.cElementTree as ET
from PIL import Image
ANNOTATIONS_DIR_PREFIX = "data"
DESTINATION_DIR = "converted_labels"
CLASS_MAPPING = {
'0': 'name'
# Add your remaining classes here.
}
def create_root(file_prefix, width, height):
root = ET.Element("annotation")
ET.SubElement(root, "folder").text = "converted_labels"
ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix)
ET.SubElement(root, "path").text = "D:\PycharmProjects\convert\converted_labels\{}.jpg".format(file_prefix)
source = ET.SubElement(root, "source")
ET.SubElement(source, "database").text = "Unknown"
size = ET.SubElement(root, "size")
ET.SubElement(size, "width").text = str(width)
ET.SubElement(size, "height").text = str(height)
ET.SubElement(size, "depth").text = "3"
ET.SubElement(root, "segmented").text = "0"
return root
def create_object_annotation(root, voc_labels):
for voc_label in voc_labels:
obj = ET.SubElement(root, "object")
ET.SubElement(obj, "name").text=str(voc_label[0])
ET.SubElement(obj, "pose").text = "Unspecified"
ET.SubElement(obj, "truncated").text = str(0)
ET.SubElement(obj, "difficult").text = str(0)
bbox = ET.SubElement(obj, "bndbox")
ET.SubElement(bbox, "xmin").text = str(voc_label[1])
ET.SubElement(bbox, "ymin").text = str(voc_label[2])
ET.SubElement(bbox, "xmax").text = str(voc_label[3])
ET.SubElement(bbox, "ymax").text = str(voc_label[4])
return root
def create_file(file_prefix, width, height, voc_labels):
root = create_root(file_prefix, width, height)
root = create_object_annotation(root, voc_labels)
tree = ET.ElementTree(root)
tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix))
def read_file(file_path):
file_prefix = file_path.split(".txt")[0]
image_file_name = "{}.jpg".format(file_prefix)
img = Image.open("{}/{}".format("data", image_file_name))
w, h = img.size
with open(os.path.join(ANNOTATIONS_DIR_PREFIX,file_path), 'r') as file:
lines = file.readlines()
voc_labels = []
for line in lines:
voc = []
line = line.strip()
data = line.split()
CLASS_MAPPING.get(data[0])
a = int(data[0])
if a == 0:
voc.append("al")
else:
voc.append("sat")
bbox_width = float(data[3]) * w
bbox_height = float(data[4]) * h
center_x = float(data[1]) * w
center_y = float(data[2]) * h
voc.append(round(center_x - (bbox_width / 2)))
voc.append(round(center_y - (bbox_height / 2)))
voc.append(round(center_x + (bbox_width / 2)))
voc.append(round(center_y + (bbox_height / 2)))
voc_labels.append(voc)
create_file(file_prefix, w, h, voc_labels)
print("Processing complete for file: {}".format(file_path))
def start():
if not os.path.exists(DESTINATION_DIR):
os.makedirs(DESTINATION_DIR)
for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
if filename.endswith('txt'):
read_file(filename)
else:
print("Skipping file: {}".format(filename))
if __name__ == "__main__":
start()
Thanks @goodhamgupta @mjahanifar
hello. Just change the Annotations_dir_prefix and DESTINATION_DIR files and your code will run perfectly given that YOU HAVE THE ANNOTATIONS AND IMAGES IN THE SAME FOLDER.
`import os
import xml.etree.cElementTree as ET
from PIL import Image
ANNOTATIONS_DIR_PREFIX = "C:/Users/Murtaza Kazmi/Folder/"
DESTINATION_DIR = "C:/Users/Murtaza Kazmi/Corrected_Annotations_to_XML/"
CLASS_MAPPING = {
'0': '0',
'1':'1',
'2': '2',
'3': '3',
'4': '4',
'5': '5',
'6':'6',
'7':'7',
'8':'8',
'9':'9',
# Add your remaining classes here.
}
def create_root(file_prefix, width, height):
root = ET.Element("annotations")
ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix)
ET.SubElement(root, "folder").text = "images"
size = ET.SubElement(root, "size")
ET.SubElement(size, "width").text = str(width)
ET.SubElement(size, "height").text = str(height)
ET.SubElement(size, "depth").text = "3"
return root
def create_object_annotation(root, voc_labels):
for voc_label in voc_labels:
obj = ET.SubElement(root, "object")
ET.SubElement(obj, "name").text = voc_label[0]
ET.SubElement(obj, "pose").text = "Unspecified"
ET.SubElement(obj, "truncated").text = str(0)
ET.SubElement(obj, "difficult").text = str(0)
bbox = ET.SubElement(obj, "bndbox")
ET.SubElement(bbox, "xmin").text = str(voc_label[1])
ET.SubElement(bbox, "ymin").text = str(voc_label[2])
ET.SubElement(bbox, "xmax").text = str(voc_label[3])
ET.SubElement(bbox, "ymax").text = str(voc_label[4])
return root
def create_file(file_prefix, width, height, voc_labels):
root = create_root(file_prefix, width, height)
root = create_object_annotation(root, voc_labels)
tree = ET.ElementTree(root)
tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix))
def read_file(file_path):
file_prefix = file_path.split(".txt")[0]
image_file_name = "{}.jpg".format(file_prefix)
img = Image.open(image_file_name)
w, h = img.size
print(file_path, '101')
with open(ANNOTATIONS_DIR_PREFIX + file_path, 'r') as file:
lines = file.readlines()
voc_labels = []
for line in lines:
voc = []
line = line.strip()
data = line.split()
voc.append(CLASS_MAPPING.get(data[0]))
bbox_width = float(data[3]) * w
bbox_height = float(data[4]) * h
center_x = float(data[1]) * w
center_y = float(data[2]) * h
voc.append(center_x - (bbox_width / 2))
voc.append(center_y - (bbox_height / 2))
voc.append(center_x + (bbox_width / 2))
voc.append(center_y + (bbox_height / 2))
voc_labels.append(voc)
create_file(file_prefix, w, h, voc_labels)
print("Processing complete for file: {}".format(file_path))
def start():
if not os.path.exists(DESTINATION_DIR):
os.makedirs(DESTINATION_DIR)
for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
if filename.endswith('txt'):
read_file(filename)
else:
print("Skipping file: {}".format(filename))
if name == "main":
start()
`
this code works
import os
from xml.dom import minidom
import xml.etree.cElementTree as ET
from PIL import Image
ANNOTATIONS_DIR_PREFIX = "path to your yolo annotations"
DESTINATION_DIR = "output path"
CLASS_MAPPING = {
'1': 'Char'
}
def formatter(elem):
"""Return a pretty-printed XML string for the Element.
"""
rough_string = ET.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
def create_root(file_prefix, width, height):
root = ET.Element("annotation")
ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix)
size = ET.SubElement(root, "size")
ET.SubElement(size, "width").text = str(width)
ET.SubElement(size, "height").text = str(height)
ET.SubElement(size, "depth").text = "3"
return root
def create_object_annotation(root, voc_labels):
for voc_label in voc_labels:
obj = ET.SubElement(root, "object")
ET.SubElement(obj, "name").text = voc_label[0]
bbox = ET.SubElement(obj, "bndbox")
ET.SubElement(bbox, "xmin").text = str(voc_label[1])
ET.SubElement(bbox, "ymin").text = str(voc_label[2])
ET.SubElement(bbox, "xmax").text = str(voc_label[3])
ET.SubElement(bbox, "ymax").text = str(voc_label[4])
return root
def create_file(file_prefix, width, height, voc_labels):
root = create_root(file_prefix, width, height)
root = create_object_annotation(root, voc_labels)
with open("{}/{}.xml".format(DESTINATION_DIR, file_prefix), "w") as f:
f.write(formatter(root))
f.close()
def read_file(file_path):
file_prefix = file_path.split(".txt")[0]
image_file_name = "{}.jpg".format(file_prefix)
img = Image.open("{}/{}".format("sites", image_file_name))
w, h = img.size
with open("labels/"+file_path, 'r') as file:
lines = file.readlines()
voc_labels = []
for line in lines:
voc = []
line = line.strip()
data = line.split()
voc.append(CLASS_MAPPING.get(data[0]))
bbox_width = float(data[3]) * w
bbox_height = float(data[4]) * h
center_x = float(data[1]) * w
center_y = float(data[2]) * h
voc.append(round(center_x - (bbox_width / 2)))
voc.append(round(center_y - (bbox_height / 2)))
voc.append(round(center_x + (bbox_width / 2)))
voc.append(round(center_y + (bbox_height / 2)))
voc_labels.append(voc)
create_file(file_prefix, w, h, voc_labels)
def start():
if not os.path.exists(DESTINATION_DIR):
os.makedirs(DESTINATION_DIR)
for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
if filename.endswith('txt'):
read_file(filename)
else:
print("Skipping file: {}".format(filename))
if __name__ == "__main__":
start()
It is working. Thanks @AdonaiVera.