Skip to content

Instantly share code, notes, and snippets.

@josepsmartinez
Created February 15, 2022 17:43
Show Gist options
  • Save josepsmartinez/ec965b61fd4aba862a70bee97b2460a0 to your computer and use it in GitHub Desktop.
Save josepsmartinez/ec965b61fd4aba862a70bee97b2460a0 to your computer and use it in GitHub Desktop.
Converts CVAT annotation samples from *checkbox tags* to *object tag attributes*
import bs4
source_path = "annotations_source.xml"
output_xml_path = "annotations_out.xml"
LABELS = [
"no-category", "valid", "non-centered", "occluded", "spoof",
"rotated", "blurred"]
print("Loading XML")
with open(source_path) as fp:
xml_obj = bs4.BeautifulSoup(
fp.read(), features="lxml")
print("Editing annotations")
invalid_annotations = set()
for image in xml_obj.select("image"):
# new CVAT attribute from scratch
new_box = bs4.Tag(name="box")
for k, v in image.find("box").attrs.items():
new_box[k] = v
# tag -> attribute reformat
tags = set(map(
lambda tag: tag["label"], image.find_all("tag", recursive=False)))
for label in LABELS:
label_tag = bs4.Tag(name="attribute", attrs={"name": label})
label_tag.string = "true" if label in tags else "false"
new_box.contents.append(label_tag)
# erases other boxes (to avoid duplicate) and insert new
[ box.decompose() for box in image.find_all("box") ]
image.contents.append(new_box)
print("Outputting fixed annotations")
with open(output_xml_path, "w") as fp:
xml_text = str(xml_obj)
xml_text = xml_text.replace("&lt;", "<").replace("&gt;", ">")
fp.write(xml_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment