Skip to content

Instantly share code, notes, and snippets.

@mgronhol
Last active December 23, 2015 12:19
Show Gist options
  • Select an option

  • Save mgronhol/6634315 to your computer and use it in GitHub Desktop.

Select an option

Save mgronhol/6634315 to your computer and use it in GitHub Desktop.
How to pick outer nodes by inner node content
#!/usr/bin/env python
import xml.dom.minidom as dom
import xml
import sys
def read_xml_file( fn ):
return dom.parse( fn ).documentElement
def pick_outer_tag_by_inner_content( nodes, outer_tag, inner_tag, predicate ):
out = []
outer_nodes = nodes.getElementsByTagName( outer_tag )
for outer_node in outer_nodes:
inner_nodes = outer_node.getElementsByTagName( inner_tag )
for inner_node in inner_nodes:
for node in inner_node.childNodes:
if node.nodeType == node.TEXT_NODE:
if predicate( node.data ):
out.append( outer_node )
return out
doc = read_xml_file( sys.argv[1] )
wanted_stuff = ["ulkomaat", "kulttuuri"]
nodes = pick_outer_tag_by_inner_content( doc, "item", "category", lambda node: node.lower() in wanted_stuff )
result = dom.getDOMImplementation().createDocument(None, "results", None )
root = result.documentElement
for node in nodes:
root.appendChild( node )
print result.toprettyxml()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment