Created
February 15, 2022 17:43
-
-
Save josepsmartinez/ec965b61fd4aba862a70bee97b2460a0 to your computer and use it in GitHub Desktop.
Converts CVAT annotation samples from *checkbox tags* to *object tag attributes*
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bs4 | |
source_path = "annotations_source.xml" | |
output_xml_path = "annotations_out.xml" | |
LABELS = [ | |
"no-category", "valid", "non-centered", "occluded", "spoof", | |
"rotated", "blurred"] | |
print("Loading XML") | |
with open(source_path) as fp: | |
xml_obj = bs4.BeautifulSoup( | |
fp.read(), features="lxml") | |
print("Editing annotations") | |
invalid_annotations = set() | |
for image in xml_obj.select("image"): | |
# new CVAT attribute from scratch | |
new_box = bs4.Tag(name="box") | |
for k, v in image.find("box").attrs.items(): | |
new_box[k] = v | |
# tag -> attribute reformat | |
tags = set(map( | |
lambda tag: tag["label"], image.find_all("tag", recursive=False))) | |
for label in LABELS: | |
label_tag = bs4.Tag(name="attribute", attrs={"name": label}) | |
label_tag.string = "true" if label in tags else "false" | |
new_box.contents.append(label_tag) | |
# erases other boxes (to avoid duplicate) and insert new | |
[ box.decompose() for box in image.find_all("box") ] | |
image.contents.append(new_box) | |
print("Outputting fixed annotations") | |
with open(output_xml_path, "w") as fp: | |
xml_text = str(xml_obj) | |
xml_text = xml_text.replace("<", "<").replace(">", ">") | |
fp.write(xml_text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment