Skip to content

Instantly share code, notes, and snippets.

@aaronj1335
Last active December 19, 2015 15:39
Show Gist options
  • Select an option

  • Save aaronj1335/5978057 to your computer and use it in GitHub Desktop.

Select an option

Save aaronj1335/5978057 to your computer and use it in GitHub Desktop.
remove elements that aren't ours
from xml.etree.ElementTree import Element, fromstring, tostring
from xml.dom import minidom
OUR_IDS = ['BMBOMB', 'FUKUSH', 'SYCLWR', 'HURSAN', 'COLSSD',
'PIRATE', 'EXXVAL', 'BFACTO', 'HAIEAR', 'AIDSHI',
'BOMBER', 'ABDALA', 'JOSHAZ', 'RENPRE', 'SYRIDE',
'BROBMA', 'POPEFR',
'THIOKL', 'GENELE', 'MARORG', 'NATOAA', 'TEPCOM',
'REDCRS', 'WHORGN', 'NASAUS', 'CATHLC',]
# missing:
# CEO of TEPCO Masataka Shimizu
# American mechanical engineer Roger Boisjoly
# Russian President Vladimir Putin
# Joint United Nations Programme on HIV/AIDS (UNAIDS)
def prune(xml):
new_tree = Element('WorldCrises')
for el in fromstring(xml):
if el.get('ID').split('_')[-1] in OUR_IDS:
new_el = fromstring(tostring(el))
remove_top_level = []
for child in new_el:
if child.tag in ['Crises', 'People', 'Organizations']:
to_remove = []
for link in child:
if link.get('ID')[4:] not in OUR_IDS:
to_remove.append(link)
[child.remove(l) for l in to_remove]
if not len(child):
remove_top_level.append(child)
[new_el.remove(c) for c in remove_top_level]
new_tree.append(new_el)
jacked = minidom.parseString(tostring(new_tree)).toprettyxml()
return '\n'.join(l for l in jacked.split('\n') if l.strip())
@Cracktuar
Copy link
Copy Markdown

Hooray code I don't have to write! Thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment