Created
April 8, 2016 13:42
-
-
Save 0xquad/7c4002ec86ec4ef8c7bbb8f2134a48d7 to your computer and use it in GitHub Desktop.
XPath selection tool in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
from lxml.etree import parse, fromstring, tostring, _Element | |
def print_result(result): | |
if type(result) is list: | |
for e in result: | |
# Recurse to print the individual elements | |
print_result(e) | |
elif isinstance(result, _Element): | |
value = tostring(result).strip() | |
if isinstance(value, bytes): | |
value = value.decode() | |
print(value) | |
else: | |
print(str(result)) | |
if __name__ == '__main__': | |
with open(sys.argv[2], 'r') as fp: | |
# Remove namespaces from the document. | |
xml = fp.read().replace('xmlns=', 'no-xmlns=') \ | |
.replace('xmlns:', 'no-xmlns-') \ | |
.replace('xsi:', 'no-xsi-') | |
doc = fromstring(xml.encode()) | |
result = doc.xpath(sys.argv[1]) | |
if result: | |
print_result(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment