Created
October 18, 2019 13:56
-
-
Save nicoknoll/be250a37ee0b0001e928b68fc039e9e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert(name, remove_ns=True): | |
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) | |
# remove namespaces | |
if remove_ns: | |
s1 = re.sub(r'^\{http.*\}', '', s1) | |
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() | |
def convert_keys(root): | |
if not isinstance(root, (list, dict)): | |
return root | |
if isinstance(root, dict): | |
return {convert(k): convert_keys(v) for k, v in root.items()} | |
if isinstance(root, list): | |
return [convert_keys(v) for v in root] | |
def _flatten_attributes(lookup, attributes): | |
if attributes is None: | |
return lookup | |
attributes_list = [('@' + k, v) for k, v in attributes.items()] | |
if not isinstance(lookup, dict): | |
return dict(attributes_list + [('$t', lookup)]) | |
return dict(list(lookup.items()) + attributes_list) | |
def _xml_element_to_json(xml_element, attributes): | |
if isinstance(xml_element, objectify.BoolElement): | |
return _flatten_attributes(bool(xml_element), attributes) | |
if isinstance(xml_element, objectify.IntElement): | |
return _flatten_attributes(int(xml_element), attributes) | |
if isinstance(xml_element, objectify.FloatElement): | |
# keep decimals as they are | |
return _flatten_attributes(str(xml_element), attributes) | |
if isinstance(xml_element, objectify.StringElement): | |
return _flatten_attributes(str(xml_element).strip(), attributes) | |
return _flatten_attributes( _xml_to_json(xml_element.getchildren()), attributes) | |
def _xml_to_json(xml_object): | |
attributes = None | |
if hasattr(xml_object, "attrib") and not xml_object.attrib == {}: | |
attributes = xml_object.attrib | |
if isinstance(xml_object, objectify.ObjectifiedElement): | |
return _xml_element_to_json(xml_object, attributes) | |
if isinstance(xml_object, list): | |
if len(xml_object) > 1 and all(xml_object[0].tag == item.tag for item in xml_object): | |
return [_xml_to_json(attr) for attr in xml_object] | |
result = {} | |
for item in xml_object: | |
if item.tag in result: | |
if not isinstance(result[item.tag], list): | |
result[item.tag] = [result[item.tag]] | |
result[item.tag].append(_xml_to_json(item)) | |
else: | |
result[item.tag] = _xml_to_json(item) | |
return result | |
return Exception("Not a valid lxml object") | |
def xml_to_json(xml): | |
return _xml_to_json(objectify.fromstring(xml)) | |
def cleanup_phone_number(parts): | |
return ' '.join(str(v) for v in [ | |
parts.get('country_code', {}).get('country_code', ''), | |
parts.get('area_or_city_code', ''), | |
parts.get('number', ''), | |
parts.get('extension', ''), | |
]).strip() | |
def cleanup_street(parts): | |
if isinstance(parts, str): | |
return parts | |
return ', '.join(p for p in parts if p).strip() | |
CLEANUPS = { | |
'telephone_number': cleanup_phone_number, | |
'street': cleanup_street, | |
} | |
def _cleanup(root): | |
if not isinstance(root, (list, dict)): | |
return root | |
if isinstance(root, dict): | |
return { | |
k: _cleanup(CLEANUPS[k](v) if k in CLEANUPS else v) | |
for k, v in root.items() | |
} | |
if isinstance(root, list): | |
return [_cleanup(v) for v in root] | |
def cleanup_cxml(parsed): | |
parsed = convert_keys(parsed) | |
return _cleanup(parsed) | |
def cxml_to_json(xml, cleanup=True): | |
parsed = xml_to_json(xml) | |
return cleanup_cxml(parsed) if cleanup else parsed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment