Created
June 15, 2017 22:19
-
-
Save vijayanandrp/1e3cfb8dfaadce65fdef02ff34ecb0ef to your computer and use it in GitHub Desktop.
Learn XML - parsing in python with simple examples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. How to read and XML file using Python ? | |
# For example purpose, kindly save the above XML content into a file named as 'data.xml' | |
# import a class named xml.etree.ElementTree | |
import xml.etree.ElementTree as element_tree | |
# Before reading we should parse the whole XML file content as tree | |
tree = element_tree.parse('data.xml') | |
# Now with tree we have to fetch the root | |
root = tree.getroot() | |
# if you print the root.tag, it will print as data | |
print "Root of the XML is ", root.tag | |
# if you print the root.attrib, it will print be a empty dict | |
print "Root attrib is ", root.attrib | |
# 2. How to print the country name out of it? | |
for child in root: | |
# print "Country Name = ", child.attrib['name'] | |
print "Country Name = ", child.attrib.get('name') | |
print '~' * 127 | |
# 2. How to print the neighbor and country name alone out of it? | |
for child in root: | |
print child.tag, " is ", child.attrib.get('name') | |
for neighbor in child.findall('neighbor'): | |
print neighbor.tag, " is ", neighbor.attrib | |
print '*' * 127 | |
# 3. How to print all the content out of the XML file ? | |
for child in root: | |
print child.tag, " is ", child.attrib['name'] | |
for grand_child in child: | |
print grand_child.tag, grand_child.text, grand_child.attrib | |
print "=" * 127 | |
# 3. How to print only neighbor out of the XML file ? | |
for neighbor in root.iter('neighbor'): | |
print neighbor.tag, neighbor.attrib | |
print "-" * 127 | |
# 4. How to print the rank and year of the country? | |
for country in root.iter('country'): | |
rank = country.find('rank').text | |
year = country.find('year').text | |
name = country.attrib.get('name') | |
print name, "rank is ", rank, " on ", year | |
print '~' * 127 | |
# 5. How to set the attribute for the root ? | |
root.set('location', 'world') | |
print "Now the Root attrib is ", root.attrib | |
# 6. How to write the output.xml file | |
tree.write('output.xml', xml_declaration=True, encoding='utf-8') | |
# 7. How to modify the values of rank or year particular country? | |
for country in root: | |
if country.attrib.get('name') == 'Panama': | |
for grand_child in country: | |
if grand_child.tag == 'year' or grand_child.tag == 'rank': | |
new_value = int(grand_child.text) + 1 | |
grand_child.text = str(new_value) | |
grand_child.set('Updated', 'yes') | |
# To print the dump | |
element_tree.dump(tree) | |
tree.write('output.xml', xml_declaration=True, encoding="us-ascii") | |
# 8. How to remove the country or a child from the xml file? | |
for country in root.findall('country'): | |
rank = int(country.find('rank').text) | |
if rank > 50: | |
print "Removing the Country ", country.attrib.get('name') | |
root.remove(country) | |
# To print the dump | |
element_tree.dump(tree) | |
print '=' * 127 | |
# xml.etree.ElementTree.tostring(element, encoding="us-ascii", method="xml") | |
# 9. How to convert the XML file as string? | |
xml_content = element_tree.tostring(root, method="xml", encoding="us-ascii") # Don't use tree | |
print xml_content | |
print '=' * 127 | |
# 10. How to Parse the XML content form the string ? | |
string_root = element_tree.fromstring(xml_content) # Don't consider as tree | |
print "New string tree root is ", string_root.tag |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment