Last active
August 2, 2022 17:38
-
-
Save iaverypadberg/8df5726f087c132c686d15f18c38b1a3 to your computer and use it in GitHub Desktop.
Find wrong labels in dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#from importlib.metadata import files | |
import xml.etree.ElementTree as ET | |
import glob | |
# Sort through a dataset's xml files and print out file names that contain labels which are invalid | |
valid_labels = ["frodo","baggins"] | |
# Loop through all xml files | |
files = glob.glob('/home/isaac/Desktop/work_area/working_on/*.xml') | |
for file in files: | |
tree = ET.parse(file) | |
root = tree.getroot() | |
# Check to make sure that the .xml file has an object | |
objects = root.findall('object') | |
if objects: | |
for object in objects: | |
label_name = object.find('name') | |
if label_name.text not in valid_labels: | |
print(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment