Last active
July 31, 2024 10:27
-
-
Save onlyforbopi/5bb7f45905defe5fe7ff6255766b2045 to your computer and use it in GitHub Desktop.
#XML #xml #py #python #xmlhandling #xmlparsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
else: | |
print(dir(ET)) | |
def pplist(list_in): | |
import pprint | |
pp = pprint.PrettyPrinter(indent=4) | |
pp.pprint(list_in) | |
def MapXmlLvl2LoL(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
output = [] | |
for elem in root: | |
if list(elem) == []: | |
output.append(elem) | |
else: | |
innerlist = [] | |
for subelem in elem: | |
innerlist.append(subelem) | |
output.append(innerlist) | |
return output | |
def MapXmlLvl2(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root)) | |
for elem in root: | |
print("First Level element: " + str(elem)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("Second Level element: " + str(subelem)) | |
def MapXmlLvl2attrib(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root.tag) + " : " + str(root.attrib)) | |
for elem in root: | |
print("First Level element: " + str(elem.tag) + " : " + str(elem.attrib)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("Second Level element: " + str(subelem.tag) + " : " + str(subelem.attrib)) | |
def MapXmlLvl2tag(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root.tag)) | |
for elem in root: | |
print("First Level element: " + str(elem.tag)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("Second Level element: " + str(subelem.tag)) | |
def MapXmlLvl2full(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root.tag) + " : " + str(root.attrib) + " : " + str(root.text)) | |
for elem in root: | |
print("\tFirst Level element: " + str(elem.tag) + " : " + str(elem.attrib) + " : " + str(elem.text)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("\t\tSecond Level element: " + str(subelem.tag) + " : " + str(subelem.attrib) + " : " + str(subelem.text)) | |
print("\n") | |
def ExtractData2LevelIn(file_in, position): | |
# These will work for xml with double nesting | |
# ie <parent> | |
# <child> | |
# <subchild> | |
# Note: For more nesting, add a 3/4/5 dimension to root. | |
# ie for triple nesting root would be [0][1][2] | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
elements = len(root) | |
for i in range(int(elements)): | |
print("i is: " + str(i)) | |
print(root[i][position].text) | |
def ExtractDataSpecRecText(file_in, record_number): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
elements = len(root[int(record_number)]) | |
for i in range(int(elements)): | |
print(root[record_number][i].text) | |
def ExtractDataSpecRecAttr(file_in, record_number): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
elements = len(root[int(record_number)]) | |
for i in range(int(elements)): | |
print(root[record_number][i].attrib) | |
def ExtractDataSpecRecNode(file_in, record_number): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
elements = len(root[int(record_number)]) | |
for i in range(int(elements)): | |
print(root[record_number][i]) | |
file_in = sys.argv[1] | |
# Parse xml file | |
tree = ET.parse(file_in) | |
# Get root element | |
root = tree.getroot() | |
# Iterate over all item and print attribute pairs | |
# Example of how to invoke a name value | |
MapXmlLvl2(file_in) | |
MapXmlLvl2attrib(file_in) | |
MapXmlLvl2tag(file_in) | |
pplist(MapXmlLvl2LoL(file_in)) | |
MapXmlLvl2full(file_in) | |
# element.attrib will place all attribute/value pairs in a dict. | |
print("All item attributes") | |
for elem in root: | |
print("Handling element: ", str(elem)) | |
print("Handling element attribute: ", str(elem.attrib)) | |
for subelem in elem: | |
print("Handling subelement: " + str(subelem)) | |
print("Printing subelement attributes" + str(subelem.attrib)) | |
# This will work only if the attrib 'name' is present in the attrib dict | |
# of each node. | |
#print(subelem.attrib['name']) | |
# Iterate over all item and print values and attribute pairs and text | |
print("\n\nAll item attributes - Data accessing") | |
for elem in root: | |
print("Handling element: ", str(elem)) | |
for subelem in elem: | |
print("Handling subelement: " + str(subelem)) | |
print("Printing subelement attributes: " + str(subelem.attrib)) | |
print("Printing subelement value: " + str(subelem.text)) | |
#print(subelem.attrib['name']) | |
# In this module the XML file is represented by a connected graph. | |
# Here s[x][y] -> x = how many levels below root it is, with 0 it is one level. | |
# y = which item in grouping we want to fish | |
print(root[0][0]) | |
print(root[0][1]) | |
print(root[0][2]) | |
print(root[0][0].text) | |
print(root[0][1].text) | |
print(root[0][2].text) | |
print(root[0][0].attrib) | |
print(root[0][1].attrib) | |
print(root[0][2].attrib) | |
print(root[1][0].text) | |
print(root[2][0].text) | |
# for j in root[1][1]: | |
# print(j.text) | |
ExtractData2LevelIn(file_in, 0) | |
ExtractData2LevelIn(file_in, 4) | |
ExtractDataSpecRecText(file_in, 1) | |
ExtractDataSpecRecAttr(file_in, 2) | |
ExtractDataSpecRecNode(file_in, 3) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
def countxmlETsim(file_in): | |
''' | |
Name: countxmlETsim | |
Description: Counts tags under the root tag. | |
Counts xml "records" | |
Function: countxmlETsim(file_in) | |
Input: <file_in> | |
Output: <integer> | |
Usage: | |
Notes: | |
''' | |
import xml.etree.ElementTree as ET | |
tree = ET.parse(file_in) | |
try: | |
root = tree.getroot() | |
except Exception as e: | |
return False | |
else: | |
return len(root) | |
file_in = sys.argv[1] | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# total amount of items | |
print(len(root)) | |
print(countxmlETsim(file_in)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ######################################################### | |
# This is an example of how to create an xml file by | |
# declaring all elements one by one. | |
# | |
# Specific attributes are declared independently. | |
import os | |
import sys | |
import xml.etree.ElementTree as ET | |
file_in = sys.argv[1] | |
# create the file structure | |
data = ET.Element('data') | |
items = ET.SubElement(data, 'items') | |
item1 = ET.SubElement(items, 'item') | |
item2 = ET.SubElement(items, 'item') | |
#i_item1 = ET.SubElement(item1, 'gear') | |
# assign labels | |
item1.set('name', 'item1') | |
item2.set('name', 'item2') | |
#i_item1.set('name', 'gear1') | |
# assign values to the label tags | |
item1.text = 'item1abc' | |
item2.text = 'item2abc' | |
#i_item1.text = 'wheel' | |
# create the file with the results | |
mydata = ET.tostring(data) | |
myfile = open(file_in, 'w') | |
myfile.write(mydata.decode('utf8')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
# A different way of parsing XML is by using "events" | |
# The parser generates "start" events for opening tags and "end" | |
# events for closing tags. | |
# Data can be extracted from the document during the parsing phase | |
# by iterating over the event stream, that way the document does not | |
# need to be maintained in memory. | |
# iterparse() returns an iterable that produces tuples | |
# containing the name of the event and the node triggering the event. | |
# Events can be: | |
# "start" : new tag has been encountered. | |
# "end" : Closing angle tag has been processed. All of the children were already processed. | |
# "start-ns": Start a namespace declaration | |
# "end-ns : End a namespace declaration | |
from xml.etree.ElementTree import iterparse | |
depth = 0 | |
prefix_width = 8 | |
prefix_dots = '.' * prefix_width | |
line_template = '{prefix:<0.{prefix_len}}{event:<8}{suffix:<{suffix_len}} {node.tag:<12} {node_id}' | |
for (event, node) in iterparse('podcasts.opml', ['start', 'end', 'start-ns', 'end-ns']): | |
if event == 'end': | |
depth -= 1 | |
prefix_len = depth * 2 | |
print (line_template.format(prefix=prefix_dots, | |
prefix_len=prefix_len, | |
suffix='', | |
suffix_len=(prefix_width - prefix_len), | |
node=node, | |
node_id=id(node), | |
event=event, | |
)) | |
if event == 'start': | |
depth += 1 | |
# This can be used in a "line by line" fashion ie | |
# when converting XML input to some other format. | |
# ie XML TO CSV | |
import csv | |
from xml.etree.ElementTree import iterparse | |
import sys | |
writer = csv.writer(sys.stdout, quoting=csv.QUOTE_NONNUMERIC) | |
group_name = '' | |
# This is how we can convert XML to csv | |
for (event, node) in iterparse('podcasts.opml', events=['start']): | |
if node.tag != 'outline': | |
# Ignore anything not part of the outline | |
continue | |
if not node.attrib.get('xmlUrl'): | |
# Remember the current group | |
group_name = node.attrib['text'] | |
else: | |
# Output a podcast entry | |
writer.writerow( (group_name, node.attrib['text'], | |
node.attrib['xmlUrl'], | |
node.attrib.get('htmlUrl', ''), | |
) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
file_in = sys.argv[1] | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# find the first root object | |
# find(match, namespaces=None) | |
for elem in root: | |
print(elem.find('item').get('name')) | |
# find all item objects and iterate to print | |
# findall(match, namespaces=None) | |
for elem in root: | |
for subelem in elem.findall('item'): | |
# if we dont know the name of the attribute - get the dict | |
print(subelem.attrib) | |
# if we know the name of the attribute, access it directly | |
print(subelem.get('name')) | |
# Similarly we access the value of the attribute | |
print(subelem.text) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from xml.etree import ElementTree | |
with open('podcasts.opml', 'rt') as f: | |
tree = ElementTree.parse(f) | |
# Printing the Tree will return an ElementTreeObject | |
print (tree) | |
# Iterate over all notes with .iter() | |
for node in tree.iter(): | |
print(node.tag, node.attrib) | |
# Iteraver over all .outline nodes (specific) | |
# Limits iter to only nodes with the given tag. | |
for node in tree.iter('outline'): | |
name = node.attrib.get('text') | |
url = node.attrib.get('xmlUrl') | |
if name and url: | |
print(" %s :: %s " % (name, url)) | |
else: | |
print(name) | |
# Use findall to look for nodes with more descriptive search | |
# characteristics. | |
for node in tree.findall('.//outline'): | |
url = node.attrib.get('xmlUrl') | |
if url: | |
print(url) | |
# We could limit it even further, by having it iterate over | |
# all the inner outline nodes only. | |
for node in tree.findall('.//outline/outline'): | |
url = node.attrib.get('xmlUrl') | |
print(url) | |
# Parsing specific node attributes | |
with open('data.xml', 'rt') as f: | |
tree = ElementTree.parse(f) | |
# Set a specific node into a variable | |
node = tree.find('/with_attributes') | |
print (node.tag) | |
# Parsing attrivute key/value pairs | |
for name, value in sorted(node.attrib.items()): | |
print(' %-4s = "%s"' % (name, value)) | |
# Parsing text and tail text | |
for path in [ './child', './child_with_tail' ]: | |
node = tree.find(path) | |
print(node.tag) | |
print('Child node text: ', node.text) | |
print('and tail text : ', node.tail) | |
# If we are dealing with XML entity references embedded | |
# in an xml document those are converted to chars before | |
# values are returned. | |
node = tree.find('entity_expansion') | |
print(node.tag) | |
print(' in attribute:', node.attrib['attribute']) | |
print(' in text :', node.text) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
else: | |
pass | |
file_in = sys.argv[1] | |
def pplist(list_in): | |
import pprint | |
pp = pprint.PrettyPrinter(indent=4) | |
pp.pprint(list_in) | |
def MapXmlLvl3(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root)) | |
for elem in root: | |
print("First Level element: " + str(elem)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("Second Level element: " + str(subelem)) | |
if list(subelem) == []: | |
next | |
else: | |
for thirdlevel in subelem: | |
print("Third level element: " + str(thirdlevel)) | |
def MapXmlLvl3attrib(file_in): | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
print("Root element: " + str(root) + " : " + str(root.attrib)) | |
for elem in root: | |
print("First Level element: " + str(elem) + " : " + str(elem.attrib)) | |
if list(elem) == []: | |
next | |
else: | |
for subelem in elem: | |
print("Second Level element: " + str(subelem) + " : " + str(subelem.attrib)) | |
if list(subelem) == []: | |
next | |
else: | |
for thirdlevel in subelem: | |
print("Third level element: " + str(thirdlevel) + " : " + str(thirdlevel.attrib)) | |
MapXmlLvl3(file_in) | |
MapXmlLvl3attrib(file_in) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
file_in = sys.argv[1] | |
from xml.dom import minidom | |
def prettify(elem): | |
"""Return a pretty-printed XML string for the Element. | |
""" | |
from xml.dom import minidom | |
import xml.etree.ElementTree as ET | |
rough_string = ET.tostring(elem, 'utf-8') | |
reparsed = minidom.parseString(rough_string) | |
return reparsed.toprettyxml(indent=" ") | |
def indent(elem, level=0): | |
i = os.linesep + level*" " | |
#i = "\n" + level*" " | |
if len(elem): | |
if not elem.text or not elem.text.strip(): | |
elem.text = i + " " | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
for elem in elem: | |
indent(elem, level+1) | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
else: | |
if level and (not elem.tail or not elem.tail.strip()): | |
elem.tail = i | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# changing a field text | |
for elem in root.iter('item'): | |
elem.text = 'new text' | |
# changing an attribute | |
for elem in root.iter('item'): | |
elem.set('name', 'newitem') | |
# adding an extra attribute | |
for elem in root.iter('item'): | |
elem.set('name2', 'newitem2') | |
# Conditionally changing a field text | |
for elem in root.iter('item'): | |
#print(elem.attrib, elem.text) | |
if elem.attrib['name']=='item2': | |
elem.text = 'new text' | |
# Conditionally Remove an attribute | |
for elem in root.iter('item'): | |
if elem.text == 'new text': | |
elem.attrib = {} | |
# or we could set the new mapping. | |
# Using tree.write to write xml adds extra formatting and indentation | |
tree.write('newitems3.xml') | |
# Creating XML Sub-Elements | |
# makeelement() | |
# SubElement() | |
# To add element to ROOT NODE we need to do: | |
# 1. declare the attrib dictionary | |
attrib = {} | |
# 2. Construct the element | |
element = root.makeelement('seconditems', attrib) | |
# 3. Append to root | |
root.append(element) | |
# To add element to the second subnode of root | |
attrib = {'name2': 'secondname2'} | |
subelement = root[0][1].makeelement('seconditem', attrib) | |
ET.SubElement(root[1], 'seconditem', attrib) | |
root[1][0].text = 'seconditemabc' | |
indent(root) | |
# create a new xml file with the new element | |
tree.write('newitems_v1.xml') | |
# how to prettify xml | |
# check func as well | |
print(prettify(root)) | |
print (minidom.parseString( | |
ET.tostring( | |
tree.getroot(), | |
'utf-8')).toprettyxml(indent=" ")) | |
# Deleting XML Elements | |
# Deleting an attrib | |
# Deleting text | |
# Deleting one sub element | |
# Deleting all subelements | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# removing the name attribute from the first node of root. [0][0] | |
root[0][0].attrib.pop('name', None) | |
tree.write('newitems_v2_del.xml') | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
root[0].remove(root[0][0]) | |
tree.write('newitems_v2_sub.xml') | |
# write with declaration | |
tree.write("newitems_v2_sub2.xml", encoding="utf-8", xml_declaration=True) | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
root[0].clear() | |
tree.write('newitems_v2_clr.xml') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def indent(elem, level=0): | |
i = os.linesep + level*" " | |
#i = "\n" + level*" " | |
if len(elem): | |
if not elem.text or not elem.text.strip(): | |
elem.text = i + " " | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
for elem in elem: | |
indent(elem, level+1) | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
else: | |
if level and (not elem.tail or not elem.tail.strip()): | |
elem.tail = i | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
else: | |
print(dir(ET)) | |
file_in = sys.argv[1] | |
# Parse xml file | |
tree = ET.parse(file_in) | |
# Get root element | |
root = tree.getroot() | |
# Iterate over all item and print attribute pairs | |
# Example of how to invoke a name value | |
print("All item attributes") | |
for elem in root: | |
print("Handling element: ", str(elem)) | |
for subelem in elem: | |
print("Handling subelement: " + str(subelem)) | |
print("Printing subelement attributes" + str(subelem.attrib)) | |
print(subelem.attrib['name']) | |
# Iterate over all item and print values and attribute pairs and text | |
print("\n\nAll item attributes - Data accessing") | |
for elem in root: | |
print("Handling element: ", str(elem)) | |
for subelem in elem: | |
print("Handling subelement: " + str(subelem)) | |
print("Printing subelement attributes: " + str(subelem.attrib)) | |
print("Printing subelement value: " + str(subelem.text)) | |
print(subelem.attrib['name']) | |
# In this module the XML file is represented by a connected graph. | |
# Here s[x][y] -> x = how many levels below root it is, with 0 it is one level. | |
# y = which item in grouping we want to fish | |
print(root[0][0].text) | |
print(root[0][1].text) | |
print(root[0][2].text) | |
print(root[0][0].attrib) | |
print(root[0][1].attrib) | |
print(root[0][2].attrib) | |
############################################# | |
############################################# | |
import os | |
import sys | |
# Insert subelement | |
from xml.etree import ElementTree | |
root = ElementTree.parse("sample.xml").getroot() | |
c = ElementTree.Element("c") | |
c.text = "3" | |
root.insert(1, c) | |
ElementTree.dump(root) | |
# Remove SubElement | |
root = ElementTree.parse("sample.xml").getroot() | |
b = root.getchildren()[1] | |
root.remove(b) | |
ElementTree.dump(root) | |
# Get the text | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("sample.xml") | |
root = tree.getroot() | |
print root.find("a").text | |
# Get the attribute | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("fruits.xml") | |
item = tree.getroot()[0] | |
print item.get("color") | |
# Get the root XML element of an 'ElementTree' | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("sample.xml") | |
print tree.getroot() | |
# Get all child elements of an XML element | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("sample.xml") | |
root = tree.getroot() | |
children = root.getchildren() | |
for child in children: | |
ElementTree.dump(child) | |
# Get the tag name of an XML element | |
from xml.etree import ElementTree | |
element = ElementTree.Element("a") | |
print element.tag | |
# Get a descendant of an XML element using indices. | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("fruits.xml") | |
root = tree.getroot() | |
print root[0][1].text | |
# Get the key/value pairs of the attrib dictionary (XML) | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("fruits.xml") | |
item = tree.getroot()[0] | |
print item.keys() | |
print item.items() | |
# Retrieve the tag of an XML element with a namespace | |
from xml.etree import ElementTree | |
root = ElementTree.parse("namespaces.xml").getroot() | |
ElementTree.dump(root) | |
print root.tag | |
# Add element as last child of another XML element | |
from xml.etree import ElementTree | |
root = ElementTree.parse("sample.xml").getroot() | |
c = ElementTree.Element("c") | |
c.text = "3" | |
root.append(c) | |
ElementTree.dump(root) | |
# Load XML from string | |
from xml.etree import ElementTree | |
root = ElementTree.XML("<root><a>1</a></root>") | |
ElementTree.dump(root) | |
#Load XML from a string into an ElementTree | |
from xml.etree import ElementTree | |
root = ElementTree.fromstring("<root><a>1</a></root>") | |
ElementTree.dump(root) | |
# Load XML file into ElementTree | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("sample.xml") | |
ElementTree.dump(tree) | |
# Load XML file into an ElementTree more effieciently | |
from xml.etree import cElementTree | |
tree = cElementTree.parse("sample.xml") | |
cElementTree.dump(tree) | |
# Clear an XML element | |
from xml.etree import ElementTree | |
root = ElementTree.parse("fruits.xml").getroot() | |
root.clear() | |
ElementTree.dump(root | |
# Find the first subelement of an xml element that matches a condition | |
rom xml.etree import ElementTree | |
root = ElementTree.parse("fruits.xml").getroot() | |
for item in root.findall("item"): | |
ElementTree.dump(item) | |
from xml.etree import ElementTree | |
root = ElementTree.parse("fruits.xml").getroot() | |
name = root.find("item/name") | |
ElementTree.dump(name) | |
# Find all children of an XML element that match a tag | |
from xml.etree import ElementTree | |
root = ElementTree.parse("fruits.xml").getroot() | |
for name in root.iter("name"): | |
ElementTree.dump(name) | |
# Find all descendants of an XML element that match a tag | |
from xml.etree import ElementTree | |
root = ElementTree.parse("fruits.xml").getroot() | |
for name in root.iter("name"): | |
ElementTree.dump(name) | |
# Set attributes of an XML element | |
from xml.etree import ElementTree | |
item = ElementTree.Element("item") | |
item.set("color", "red") | |
item.set("flavor", "sweet") | |
ElementTree.dump(item) | |
# Check whether an object is an xml element | |
from xml.etree import ElementTree | |
tree = ElementTree.parse("sample.xml") | |
root = tree.getroot() | |
a = root[0] | |
print ElementTree.iselement(root) | |
print ElementTree.iselement(a) | |
print ElementTree.iselement(tree) | |
print ElementTree.iselement(1) | |
# Generate a string representation of an XML element | |
from xml.etree import ElementTree | |
a = ElementTree.Element("a") | |
a.text = "1" | |
print ElementTree.tostring(a) | |
# Efficient ways to construct XML elements | |
from xml.etree import cElementTree | |
a = cElementTree.Element("a") | |
a.text = "1" | |
cElementTree.dump(a) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
file_in = sys.argv[1] | |
from xml.dom import minidom | |
def prettify(elem): | |
"""Return a pretty-printed XML string for the Element. | |
""" | |
from xml.dom import minidom | |
import xml.etree.ElementTree as ET | |
rough_string = ET.tostring(elem, 'utf-8') | |
reparsed = minidom.parseString(rough_string) | |
return reparsed.toprettyxml(indent=" ") | |
def indent(elem, level=0): | |
i = os.linesep + level*" " | |
#i = "\n" + level*" " | |
if len(elem): | |
if not elem.text or not elem.text.strip(): | |
elem.text = i + " " | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
for elem in elem: | |
indent(elem, level+1) | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
else: | |
if level and (not elem.tail or not elem.tail.strip()): | |
elem.tail = i | |
try: | |
import xml.etree.ElementTree as ET | |
except Exception as e: | |
print(str(e)) | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# changing a field text | |
for elem in root.iter('item'): | |
elem.text = 'new text' | |
# changing an attribute | |
for elem in root.iter('item'): | |
elem.set('name', 'newitem') | |
# adding an extra attribute | |
for elem in root.iter('item'): | |
elem.set('name2', 'newitem2') | |
# Conditionally changing a field text | |
for elem in root.iter('item'): | |
#print(elem.attrib, elem.text) | |
if elem.attrib['name']=='item2': | |
elem.text = 'new text' | |
# Conditionally Remove an attribute | |
for elem in root.iter('item'): | |
if elem.text == 'new text': | |
elem.attrib = {} | |
# or we could set the new mapping. | |
# Using tree.write to write xml adds extra formatting and indentation | |
tree.write('newitems3.xml') | |
# Creating XML Sub-Elements | |
# makeelement() | |
# SubElement() | |
# To add element to ROOT NODE we need to do: | |
# 1. declare the attrib dictionary | |
attrib = {} | |
# 2. Construct the element | |
element = root.makeelement('seconditems', attrib) | |
# 3. Append to root | |
root.append(element) | |
# To add element to the second subnode of root | |
attrib = {'name2': 'secondname2'} | |
subelement = root[0][1].makeelement('seconditem', attrib) | |
ET.SubElement(root[1], 'seconditem', attrib) | |
root[1][0].text = 'seconditemabc' | |
indent(root) | |
# create a new xml file with the new element | |
tree.write('newitems_v1.xml') | |
# how to prettify xml | |
# check func as well | |
print(prettify(root)) | |
print (minidom.parseString( | |
ET.tostring( | |
tree.getroot(), | |
'utf-8')).toprettyxml(indent=" ")) | |
# Deleting XML Elements | |
# Deleting an attrib | |
# Deleting text | |
# Deleting one sub element | |
# Deleting all subelements | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
# removing the name attribute from the first node of root. [0][0] | |
root[0][0].attrib.pop('name', None) | |
tree.write('newitems_v2_del.xml') | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
root[0].remove(root[0][0]) | |
tree.write('newitems_v2_sub.xml') | |
# write with declaration | |
tree.write("newitems_v2_sub2.xml", encoding="utf-8", xml_declaration=True) | |
tree = ET.parse(file_in) | |
root = tree.getroot() | |
root[0].clear() | |
tree.write('newitems_v2_clr.xml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment