Skip to content

Instantly share code, notes, and snippets.

View walkerdb's full-sized avatar

Walker Boyle walkerdb

View GitHub Profile
# grab a single extent. Remember xpath returns a list, so we have to specify an individual element
>>> extent = tree.xpath("//extent")[0]
# now we have an "element" object, in this case an extent tag
# we can access all sorts of stuff from here.
# to get the text contained in the tag:
>>> extent.text
'3 linear feet and 1 outsize box'
>>> extent.text
'3 linear feet and 1 outsize box'
# we don't use "outsize", so we'll make it "oversize" instead
>>> extent.text = extent.text.replace(" outsize ", " oversize ")
>>> extent.text
'3 linear feet and 1 oversize box'
# create a text representation of the element
# etree.tostring() spits out all text, including tags and subtags
text = etree.tostring(element_with_complex_text)
# do the manipulation
text = text.replace("Prince", "Artist formerly known as Prince")
# transform that text back into an lxml element
new_element = etree.fromstring(text)
>>> extent.tag
'extent'
>>> extent.tag = "physfacet"
>>> extent.tag
'physfacet'
>>> parent = extent.getparent()
>>> print(etree.tostring(parent)) # printing out the parent just for comparison purposes
'''
<physdesc altrender="whole">
<extent encodinganalog="300">3 linear feet and 1 oversize box</extent>
</physdesc>
'''
>>> parent.remove(extent)
>>> print(etree.tostring(parent))
# make a new, empty tag
>>> new_tag = etree.Element("extent")
# add some text
>>> new_tag.text = "25 embarrassing photos"
# add an attribute if you want any
>>> new_tag.attrib["encodinganalog"] = "300"
# insert the new tag into the master ead tree
# import the part of the library you'll need
>>> from lxml.builder import E
# the basic format for using E is:
# E.[name of tag]([tag text], [attribute name]=[attribute value], [anything else that comes inside the tag])
# A single-tag example:
>>> new_element = E.extent("25 photographs", encodinganalog="300")
# printing to see the results
# say we have a c01 element that looks like this:
'''
<c01>
[...skipping <did> tag for brevity]
<c02>
...
<note>This collection is haunted</note>
</c02>
</c01>
'''
>>> container = tree.xpath("//container")[0]
>>> container.attrib
{'type': 'box', 'label': 'Box'}
# accessing attribute values:
>>> container.attrib.get("type", "")
"box"
>>> container.attrib.get("location", "")
"" # since the "location" attribute does not exist, the .get() function returns an empty string
# changing a current value or creating a new attribute
>>> container.attrib["type"] = "folder"