Last active
December 23, 2015 12:19
-
-
Save mgronhol/6634315 to your computer and use it in GitHub Desktop.
How to pick outer nodes by inner node content
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import xml.dom.minidom as dom | |
| import xml | |
| import sys | |
| def read_xml_file( fn ): | |
| return dom.parse( fn ).documentElement | |
| def pick_outer_tag_by_inner_content( nodes, outer_tag, inner_tag, predicate ): | |
| out = [] | |
| outer_nodes = nodes.getElementsByTagName( outer_tag ) | |
| for outer_node in outer_nodes: | |
| inner_nodes = outer_node.getElementsByTagName( inner_tag ) | |
| for inner_node in inner_nodes: | |
| for node in inner_node.childNodes: | |
| if node.nodeType == node.TEXT_NODE: | |
| if predicate( node.data ): | |
| out.append( outer_node ) | |
| return out | |
| doc = read_xml_file( sys.argv[1] ) | |
| wanted_stuff = ["ulkomaat", "kulttuuri"] | |
| nodes = pick_outer_tag_by_inner_content( doc, "item", "category", lambda node: node.lower() in wanted_stuff ) | |
| result = dom.getDOMImplementation().createDocument(None, "results", None ) | |
| root = result.documentElement | |
| for node in nodes: | |
| root.appendChild( node ) | |
| print result.toprettyxml() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment