Last active
June 17, 2022 17:44
-
-
Save yoki/fbff44f79e7f93b8d9c8b0bc11fd3d75 to your computer and use it in GitHub Desktop.
Python XML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# xml document | |
# edit.py | |
# encode.py | |
# extract.py | |
# namespace.py | |
# search.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_99 | |
>>> from xml.etree.ElementTree import parse, Element | |
>>> doc = parse('pred.xml') | |
>>> root = doc.getroot() | |
>>> # Remove a few elements | |
>>> root.remove(root.find('sri')) | |
>>> # Insert a new element after <nm>...</nm> | |
>>> root.getchildren().index(root.find('nm')) | |
1 | |
>>> e = Element('spam') | |
>>> e.text = 'This is a test' | |
>>> root.insert(2, e) | |
>>> # Write back to a file | |
>>> doc.write('newpred.xml', xml_declaration=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_solution_37 | |
>>> s = 'Elements are written as "<tag>text</tag>".' | |
>>> import html | |
>>> print(html.escape(s)) | |
Elements are written as "<tag>text</tag>". | |
>>> s = 'Spicy Jalapeño' | |
>>> s.encode('ascii', errors='xmlcharrefreplace') | |
b'Spicy Jalapeño' | |
>>> s = 'Spicy "Jalapeño".' | |
>>> from html.parser import HTMLParser | |
>>> p = HTMLParser() | |
>>> p.unescape(s) | |
'Spicy "Jalapeño".' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_96 | |
from urllib.request import urlopen | |
from xml.etree.ElementTree import parse | |
# Download the RSS feed and parse it | |
u = urlopen('http://planet.python.org/rss20.xml') | |
doc = parse(u) | |
# Extract and output tags of interest | |
for item in doc.iterfind('channel/item'): | |
title = item.findtext('title') | |
date = item.findtext('pubDate') | |
link = item.findtext('link') | |
#--------------- Extract tag, text, attributes | |
e = doc.find('channel/title') | |
>>> e | |
<Element 'title' at 0x10135b310> | |
>>> e.tag | |
'title' | |
>>> e.text | |
'Planet Python' | |
>>> e.get('some_attribute') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://chimera.labs.oreilly.com/books/1230000000393/ch06.html#_solution_100 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#---------XPATH-------------------- | |
# https://docs.python.org/3.5/library/xml.etree.elementtree.html#supported-xpath-syntax | |
import xml.etree.ElementTree as ET | |
root = ET.fromstring(countrydata) | |
# Top-level elements | |
root.findall(".") | |
# All 'neighbor' grand-children of 'country' children of the top-level | |
# elements | |
root.findall("./country/neighbor") | |
# Nodes with name='Singapore' that have a 'year' child | |
root.findall(".//year/..[@name='Singapore']") | |
# 'year' nodes that are children of nodes with name='Singapore' | |
root.findall(".//*[@name='Singapore']/year") | |
# All 'neighbor' nodes that are the second child of their parent | |
root.findall(".//neighbor[2]") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment