Skip to content

Instantly share code, notes, and snippets.

@ixtli
Last active August 29, 2015 14:07
Show Gist options
  • Save ixtli/0ea0c7d5f85c408a3955 to your computer and use it in GitHub Desktop.
Save ixtli/0ea0c7d5f85c408a3955 to your computer and use it in GitHub Desktop.
Convert XML to JSON using Python 2.7.x
#!/usr/bin/python
import xml.etree.ElementTree as ET;
import json, os, fnmatch, time;
def coerceStringToType(val):
if not val:
return None;
try:
f = float(val);
i = int(f);
if f == i:
return i;
else:
return f;
except (ValueError, TypeError) as e:
if val.lower() == "true":
return True;
if val.lower() == "false":
return False;
return val;
def coerceArrayOfStrings(parsed):
ret = [];
hasString = False;
for elt in parsed:
val = coerceStringToType(elt.strip());
# This is the case where the string is "...,foo,,bar,..."
if val == None:
print "Bad string cooersion:", parsed;
sys.exit(1);
# If there are any strings, treat all elements as strings
if isinstance(val, str) and not hasString:
hasString = True;
for i in range(0, len(ret)):
ret[i] = str(ret[i]);
if hasString:
ret.append(str(val));
else:
ret.append(val);
if len(ret) == 1:
return ret[0];
return ret;
def coerceUnknownValue(val):
# Don't think this can happen, but just in case
if not isinstance(val, str):
return val;
val = val.strip();
# No empty strings
if not val:
return None;
return coerceArrayOfStrings(val.split(','));
def coerceKeyName(name):
lowerFirst = lambda s: s[:1].lower() + s[1:] if s else '';
if name.lower() == "id":
return name.lower();
return lowerFirst(name);
def parseNode(root):
out = {};
for child in root:
newName = coerceKeyName(child.tag);
if newName in out:
if not isinstance(out[newName], list):
out[newName] = list(out[newName]);
out[newName].append(child);
else:
out[newName] = child;
for key in out:
if isinstance(out[key], list):
for i in range(0, len(out[key])):
out[key][i] = parseNode(out[key][i]);
else:
out[key] = parseNode(out[key]);
for attr in root.attrib:
out[coerceKeyName(attr)] = coerceUnknownValue(root.attrib[attr]);
if root.text != None and root.text.strip():
val = coerceUnknownValue(root.text);
if len(out) < 1:
out = val;
else:
out["value"] = val;
if isinstance(out, dict) or isinstance(out, list):
if len(out) < 1:
out = None;
return out;
def parseFile(fileName):
tree = ET.parse(fileName);
return parseNode(tree.getroot());
if __name__ == "__main__":
start = time.clock();
count = 0;
for root, dirs, files in os.walk('.'):
for file in files:
if fnmatch.fnmatch(file, '*.xml'):
out = parseFile(root + '/' + file);
f = open(root + '/' + file.split('.')[0] + '.json', 'w');
json.dump(out, f, indent=2, sort_keys=True);
f.close();
count = count + 1;
end = time.clock();
print "Processed", count, "files in", (end - start), "seconds.";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment