Skip to content

Instantly share code, notes, and snippets.

@kschlottmann
Last active April 12, 2021 12:59
Show Gist options
  • Select an option

  • Save kschlottmann/f2d691eeeff679a7b77529687fc39673 to your computer and use it in GitHub Desktop.

Select an option

Save kschlottmann/f2d691eeeff679a7b77529687fc39673 to your computer and use it in GitHub Desktop.
EAD: dsc to csv //edited to move columns around, add physdesc
Download EAD from ArchivesSpace
Download ead2csv.xsl
Transform the EAD using the XSL
*In oXygen, use the XSLT debugger mode.
*At a Linux/Cygwin command line, transform using the following command (be sure to download the saxon jar to the relevant directory)
java -cp saxon9he.jar net.sf.saxon.Transform -o:findingAid.tsv -s:{sourceEAD}.xml -xsl:dsc2excel.xsl
Analyze output, make sure nothing is missing. Modify XSL if needed.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ead="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink">
<xsl:strip-space elements="ead:*" />
<xsl:output method="text" indent="no" encoding="utf-8" standalone="yes"/>
<xsl:variable name="delimiter" select="'|'" />
<xsl:variable name="quote" select="'&quot;'" />
<xsl:variable name="eol" select="'&#10;'" />
<xsl:variable name="startlevel" select="1" />
<xsl:variable name="collprefix" select="lower-case(substring-before(ead:ead/ead:archdesc/ead:did/ead:unitid, '.'))" />
<xsl:variable name="collnum" select="substring-after(ead:ead/ead:archdesc/ead:did/ead:unitid, '.')" />
<xsl:template match="/">
<!-- Output csv headers -->
<xsl:value-of select="'REFID'" /><xsl:value-of select="$delimiter" /><!--REFID -->
<xsl:value-of select="'TITLE'" /><xsl:value-of select="$delimiter" /><!--TITLE -->
<xsl:value-of select="'LEVEL TYPE'" /><xsl:value-of select="$delimiter" /><!--LEVEL TYPE -->
<xsl:value-of select="'LEVEL'" /><xsl:value-of select="$delimiter" /><!--LEVEL -->
<xsl:text>Publish?</xsl:text>
<xsl:value-of select="'DATE'" /><xsl:value-of select="$delimiter" /><!--DATE -->
<xsl:value-of select="'DATE BEGIN'" /><xsl:value-of select="$delimiter" /><!--DATE BEGIN -->
<xsl:value-of select="'DATE END'" /><xsl:value-of select="$delimiter" /><!--DATE END -->
<xsl:value-of select="'BULK DATE BEGIN'" /><xsl:value-of select="$delimiter" /><!--BULK DATE BEGIN -->
<xsl:value-of select="'BULK DATE END'" /><xsl:value-of select="$delimiter" /><!--BULK DATE END -->
<xsl:value-of select="'BOX'" /><xsl:value-of select="$delimiter" /><!--BOX VAL -->
<xsl:value-of select="'FOLDER'" /><xsl:value-of select="$delimiter" /><!--FOLDER VAL -->
<xsl:value-of select="'DF'" /><xsl:value-of select="$delimiter" /><!-- DIGITAL FILE VAL -->
<xsl:value-of select="'OVSIZE'" /><xsl:value-of select="$delimiter" /><!--OVSIZE VAL -->
<xsl:value-of select="'INSTANCE TYPE'" /><xsl:value-of select="$delimiter" /><!--INSTANCE TYPE -->
<xsl:value-of select="'GENERAL NOTE'" /><xsl:value-of select="$delimiter" /><!--GENERAL NOTE -->
<xsl:value-of select="'RESTRICTIONS'" /><xsl:value-of select="$delimiter" /><!--RESTRICTIONS NOTE -->
<xsl:value-of select="'SCOPE'" /><xsl:value-of select="$delimiter" /><!--SCOPE NOTE -->
<xsl:value-of select="'PHYSDESC'" /><xsl:value-of select="$delimiter" /><!--PHYSDESC NOTE -->
<xsl:value-of select="'EXPECTEDFILENAME'" /><xsl:value-of select="$delimiter" /><!--EXPECTED FILENAME -->
<xsl:value-of select="$eol" />
<xsl:apply-templates select="ead:ead/ead:archdesc/ead:dsc"/>
</xsl:template>
<xsl:template match="ead:ead">
<xsl:apply-templates select="ead:archdesc" />
</xsl:template>
<xsl:template match="ead:archdesc">
<xsl:apply-templates select="ead:dsc" />
</xsl:template>
<xsl:template match="ead:dsc">
<xsl:apply-templates select="ead:c">
<xsl:with-param name="level" select="$startlevel" />
</xsl:apply-templates>
</xsl:template>
<xsl:template match="ead:c">
<xsl:param name="level" />
<xsl:variable name="next_level" select="$level+1" />
<xsl:variable name="node_position" select="position()" />
<xsl:variable name="title">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="ead:did/ead:unittitle"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="date">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="normalize-space(ead:did/ead:unitdate)" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="date_begin">
<xsl:if test="ead:did/ead:unitdate/@normal">
<xsl:value-of select="tokenize(ead:did/ead:unitdate/@normal, '/')[1]" />
</xsl:if>
</xsl:variable>
<xsl:variable name="date_end">
<xsl:if test="ead:did/ead:unitdate/@normal">
<xsl:value-of select="tokenize(ead:did/ead:unitdate/@normal, '/')[2]" />
</xsl:if>
</xsl:variable>
<xsl:variable name="bulk_date_begin">
<xsl:if test="ead:did/ead:unitdate/@type eq 'bulk'">
</xsl:if>
</xsl:variable>
<xsl:variable name="bulk_date_end">
<xsl:if test="ead:did/ead:unitdate/@type eq 'bulk'">
</xsl:if>
</xsl:variable>
<xsl:variable name="box">
<xsl:value-of select="ead:did/ead:container[@type='box']" />
</xsl:variable>
<xsl:variable name="folder">
<xsl:value-of select="ead:did/ead:container[@type='folder']" />
</xsl:variable>
<xsl:variable name="digital_file">
<xsl:value-of select="ead:did/ead:container[@type='Digital_file']" />
</xsl:variable>
<xsl:variable name="oversize">
<xsl:value-of select="ead:did/ead:container[@type='Oversize']" />
</xsl:variable>
<xsl:variable name="instance_type">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="ead:did/ead:container[1]/@label" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="general_note">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="ead:odd/ead:p" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="scope_content">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="ead:scopecontent/ead:p" />
</xsl:call-template>
</xsl:variable>
<xsl:variable name="physdesc">
<xsl:value-of select="ead:did/ead:physdesc" />
</xsl:variable>
<xsl:variable name="restrict_note">
<xsl:call-template name="escape_values">
<xsl:with-param name="value" select="ead:accessrestrict/ead:p" />
</xsl:call-template>
</xsl:variable>
<!--Expected filename can be used to match to existing digital files for automatic DAO processing -->
<xsl:variable name="expected_filename">
</xsl:variable>
<xsl:value-of select="@id" /><xsl:value-of select="$delimiter" /><!--REFID -->
<xsl:value-of select="normalize-space($title)" /><xsl:value-of select="$delimiter" /><!--TITLE -->
<xsl:value-of select="@level" /><xsl:value-of select="$delimiter" /><!--LEVEL TYPE -->
<xsl:value-of select="$level" /><xsl:value-of select="$delimiter" /><!--LEVEL -->
<xsl:text>TRUE</xsl:text><xsl:value-of select="$delimiter" /><!--LEVEL -->
<xsl:value-of select="normalize-space($date)" /><xsl:value-of select="$delimiter" /><!--DATE -->
<xsl:value-of select="$date_begin" /><xsl:value-of select="$delimiter" /><!--DATE BEGIN -->
<xsl:value-of select="$date_end" /><xsl:value-of select="$delimiter" /><!--DATE END -->
<xsl:value-of select="$bulk_date_begin" /><xsl:value-of select="$delimiter" /><!--BULK DATE BEGIN -->
<xsl:value-of select="$bulk_date_end" /><xsl:value-of select="$delimiter" /><!--BULK DATE END -->
<xsl:value-of select="$box" /><xsl:value-of select="$delimiter" /><!--BOX VAL -->
<xsl:value-of select="$folder" /><xsl:value-of select="$delimiter" /><!--FOLDER VAL -->
<xsl:value-of select="$digital_file" /><xsl:value-of select="$delimiter" /><!-- DIGITAL FILE VAL -->
<xsl:value-of select="$oversize" /><xsl:value-of select="$delimiter" /><!--OVSIZE VAL -->
<xsl:value-of select="$instance_type" /><xsl:value-of select="$delimiter" /><!--INSTANCE TYPE -->
<xsl:value-of select="normalize-space($general_note)" /><xsl:value-of select="$delimiter" /><!--GENERAL NOTE -->
<xsl:value-of select="normalize-space($restrict_note)" /><xsl:value-of select="$delimiter" /><!--RESTRICTIONS NOTE -->
<xsl:value-of select="normalize-space($scope_content)" /><xsl:value-of select="$delimiter" /><!--SCOPE NOTE -->
<xsl:value-of select="normalize-space($physdesc)" /><xsl:value-of select="$delimiter" /><!--PYHSDESC NOTE -->
<xsl:value-of select="$expected_filename" /><xsl:value-of select="$delimiter" /><!--EXPECTED FILENAME -->
<xsl:value-of select="$eol" />
<xsl:apply-templates select="ead:c">
<xsl:with-param name="level" select="$next_level" />
</xsl:apply-templates>
</xsl:template>
<xsl:template name="escape_values">
<xsl:param name="value" />
<xsl:choose>
<xsl:when test="contains($value, $quote)">
<xsl:variable name="escapedquote" select="replace($value, $quote, concat($quote, $quote))" />
<xsl:value-of select="concat($quote, $escapedquote, $quote)" />
</xsl:when>
<xsl:when test="contains($value, $delimiter)">
<xsl:value-of select="concat($quote, $value, $quote)" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$value" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="ead:did/ead:unittitle">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ead:title">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ead:did/ead:physdesc">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ead:extent">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ead:odd">
<xsl:apply-templates select="ead:p" />
</xsl:template>
<xsl:template match="ead:p">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="ead:emph">
<xsl:apply-templates />
</xsl:template>
</xsl:stylesheet>
Update 2018-12-19
Added physdesc and scopecontent
Update 2021-04-12
Added normalize-space for title and date
For Claremont:
Ran unitdate across CUL EAD. Took output, removed empty namespaces (xmlns=""), pretty printed, ran ead2csv.xsl.
EAD cleanup notes (tested on Crane)
This stylesheet: https://github.com/mdpeters/EAD_stylesheets/blob/master/ead_to_csv.xsl
Switched to | as delimiter
Lowercased box and folder
<xsl:variable name="box">
<xsl:value-of select="ead:did/ead:container[@type='box']" />
</xsl:variable>
<xsl:variable name="folder">
<xsl:value-of select="ead:did/ead:container[@type='folder']" />
</xsl:variable>
4077437 - Charles Richard Crane Papers
Pretty printed in oXygen for line break reasons
Run unitdate.xsl to move the unitdates in parallel
Remove "xmlns="urn:isbn:1-931666-22-9" ; "xmlns=""" ; copied <ead> element from original file for namespace reasons
Run ead2csv.xsl, paste into Excel, split data
Problem with ead2csv.xsl - the unitdate.xsl isn't copying c-file inside c-file
-> fixed with call to apply templates inside c
Resulting output csv:
Problem1: Using <unitdate> as a separator
Problem 2: Excluding the day from a unitdate
<unittitle>To Martin Ryerson 5 <unitdate type="inclusive">February 1870</unitdate> Chicago</unittitle>
Saved as CRC.xls - will try to clean this up
11048740 - ran fine
<?xml version="1.0"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ead="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink">
<!--
-->
<!-- PLEASE SEE THE MODIFICATION TIPS FILE FOR TIPS ON HOW TO ALTER THIS DOCUMENT -->
<xsl:output method="xml" indent="yes" exclude-result-prefixes="ead"/>
<!-- Identity templage - copies everything -->
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- The following template takes the unitdate out of unit title-->
<!--
<xsl:template match="ead/archdesc/did/unittitle">
<xsl:element name="unittitle">
<!-\-<xsl:copy-of select="ead/archdesc/did/unittitle[not(unitdate)]"/>
<xsl:copy-of select="@*"/>-\->
</xsl:element>
</xsl:template>
--><!--
<xsl:template match="ead/archdesc/did">
<xsl:element name="unitdate">
<xsl:value-of select="ead/archdesc/did/unittitle/unitdate"/>
</xsl:element>
<xsl:apply-templates/>
</xsl:template>
-->
<!-- will only copy containers, titles, dates, physdesc, and scopenote; this is manual -->
<xsl:template match="ead:c[@level='file']" exclude-result-prefixes="#all">
<c level='file'><did>
<xsl:copy-of select="ead:did/ead:container[1]" exclude-result-prefixes="#all"/>
<xsl:copy-of select="ead:did/ead:container[2]" exclude-result-prefixes="#all"/>
<unittitle>
<xsl:value-of select="ead:did/ead:unittitle/text()"/>
</unittitle>
<unitdate type="inclusive">
<xsl:value-of select="ead:did/ead:unittitle/ead:unitdate/text()"/>
</unitdate>
<physdesc>
<extent>
<xsl:value-of select="ead:did/ead:physdesc/ead:extent/text()"/>
</extent>
</physdesc>
</did>
<xsl:copy-of select="ead:scopecontent"/>
<!-- test for another c; then apply templates? -->
<xsl:apply-templates select="ead:c"/>
</c>
</xsl:template>
<!-- This works, but no attributes, obviously.
<xsl:template match="ead/archdesc/did/unittitle">
<unittitle>
<xsl:value-of select="self::node()"/>
</unittitle>
<unitdate>
<xsl:value-of select="child::unitdate"/>
</unitdate>
</xsl:template> -->
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment