Last active
April 12, 2021 12:59
-
-
Save kschlottmann/f2d691eeeff679a7b77529687fc39673 to your computer and use it in GitHub Desktop.
EAD: dsc to csv //edited to move columns around, add physdesc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Download EAD from ArchivesSpace | |
| Download ead2csv.xsl | |
| Transform the EAD using the XSL | |
| *In oXygen, use the XSLT debugger mode. | |
| *At a Linux/Cygwin command line, transform using the following command (be sure to download the saxon jar to the relevant directory) | |
| java -cp saxon9he.jar net.sf.saxon.Transform -o:findingAid.tsv -s:{sourceEAD}.xml -xsl:dsc2excel.xsl | |
| Analyze output, make sure nothing is missing. Modify XSL if needed. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="UTF-8"?> | |
| <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ead="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink"> | |
| <xsl:strip-space elements="ead:*" /> | |
| <xsl:output method="text" indent="no" encoding="utf-8" standalone="yes"/> | |
| <xsl:variable name="delimiter" select="'|'" /> | |
| <xsl:variable name="quote" select="'"'" /> | |
| <xsl:variable name="eol" select="' '" /> | |
| <xsl:variable name="startlevel" select="1" /> | |
| <xsl:variable name="collprefix" select="lower-case(substring-before(ead:ead/ead:archdesc/ead:did/ead:unitid, '.'))" /> | |
| <xsl:variable name="collnum" select="substring-after(ead:ead/ead:archdesc/ead:did/ead:unitid, '.')" /> | |
| <xsl:template match="/"> | |
| <!-- Output csv headers --> | |
| <xsl:value-of select="'REFID'" /><xsl:value-of select="$delimiter" /><!--REFID --> | |
| <xsl:value-of select="'TITLE'" /><xsl:value-of select="$delimiter" /><!--TITLE --> | |
| <xsl:value-of select="'LEVEL TYPE'" /><xsl:value-of select="$delimiter" /><!--LEVEL TYPE --> | |
| <xsl:value-of select="'LEVEL'" /><xsl:value-of select="$delimiter" /><!--LEVEL --> | |
| <xsl:text>Publish?</xsl:text> | |
| <xsl:value-of select="'DATE'" /><xsl:value-of select="$delimiter" /><!--DATE --> | |
| <xsl:value-of select="'DATE BEGIN'" /><xsl:value-of select="$delimiter" /><!--DATE BEGIN --> | |
| <xsl:value-of select="'DATE END'" /><xsl:value-of select="$delimiter" /><!--DATE END --> | |
| <xsl:value-of select="'BULK DATE BEGIN'" /><xsl:value-of select="$delimiter" /><!--BULK DATE BEGIN --> | |
| <xsl:value-of select="'BULK DATE END'" /><xsl:value-of select="$delimiter" /><!--BULK DATE END --> | |
| <xsl:value-of select="'BOX'" /><xsl:value-of select="$delimiter" /><!--BOX VAL --> | |
| <xsl:value-of select="'FOLDER'" /><xsl:value-of select="$delimiter" /><!--FOLDER VAL --> | |
| <xsl:value-of select="'DF'" /><xsl:value-of select="$delimiter" /><!-- DIGITAL FILE VAL --> | |
| <xsl:value-of select="'OVSIZE'" /><xsl:value-of select="$delimiter" /><!--OVSIZE VAL --> | |
| <xsl:value-of select="'INSTANCE TYPE'" /><xsl:value-of select="$delimiter" /><!--INSTANCE TYPE --> | |
| <xsl:value-of select="'GENERAL NOTE'" /><xsl:value-of select="$delimiter" /><!--GENERAL NOTE --> | |
| <xsl:value-of select="'RESTRICTIONS'" /><xsl:value-of select="$delimiter" /><!--RESTRICTIONS NOTE --> | |
| <xsl:value-of select="'SCOPE'" /><xsl:value-of select="$delimiter" /><!--SCOPE NOTE --> | |
| <xsl:value-of select="'PHYSDESC'" /><xsl:value-of select="$delimiter" /><!--PHYSDESC NOTE --> | |
| <xsl:value-of select="'EXPECTEDFILENAME'" /><xsl:value-of select="$delimiter" /><!--EXPECTED FILENAME --> | |
| <xsl:value-of select="$eol" /> | |
| <xsl:apply-templates select="ead:ead/ead:archdesc/ead:dsc"/> | |
| </xsl:template> | |
| <xsl:template match="ead:ead"> | |
| <xsl:apply-templates select="ead:archdesc" /> | |
| </xsl:template> | |
| <xsl:template match="ead:archdesc"> | |
| <xsl:apply-templates select="ead:dsc" /> | |
| </xsl:template> | |
| <xsl:template match="ead:dsc"> | |
| <xsl:apply-templates select="ead:c"> | |
| <xsl:with-param name="level" select="$startlevel" /> | |
| </xsl:apply-templates> | |
| </xsl:template> | |
| <xsl:template match="ead:c"> | |
| <xsl:param name="level" /> | |
| <xsl:variable name="next_level" select="$level+1" /> | |
| <xsl:variable name="node_position" select="position()" /> | |
| <xsl:variable name="title"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="ead:did/ead:unittitle"/> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <xsl:variable name="date"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="normalize-space(ead:did/ead:unitdate)" /> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <xsl:variable name="date_begin"> | |
| <xsl:if test="ead:did/ead:unitdate/@normal"> | |
| <xsl:value-of select="tokenize(ead:did/ead:unitdate/@normal, '/')[1]" /> | |
| </xsl:if> | |
| </xsl:variable> | |
| <xsl:variable name="date_end"> | |
| <xsl:if test="ead:did/ead:unitdate/@normal"> | |
| <xsl:value-of select="tokenize(ead:did/ead:unitdate/@normal, '/')[2]" /> | |
| </xsl:if> | |
| </xsl:variable> | |
| <xsl:variable name="bulk_date_begin"> | |
| <xsl:if test="ead:did/ead:unitdate/@type eq 'bulk'"> | |
| </xsl:if> | |
| </xsl:variable> | |
| <xsl:variable name="bulk_date_end"> | |
| <xsl:if test="ead:did/ead:unitdate/@type eq 'bulk'"> | |
| </xsl:if> | |
| </xsl:variable> | |
| <xsl:variable name="box"> | |
| <xsl:value-of select="ead:did/ead:container[@type='box']" /> | |
| </xsl:variable> | |
| <xsl:variable name="folder"> | |
| <xsl:value-of select="ead:did/ead:container[@type='folder']" /> | |
| </xsl:variable> | |
| <xsl:variable name="digital_file"> | |
| <xsl:value-of select="ead:did/ead:container[@type='Digital_file']" /> | |
| </xsl:variable> | |
| <xsl:variable name="oversize"> | |
| <xsl:value-of select="ead:did/ead:container[@type='Oversize']" /> | |
| </xsl:variable> | |
| <xsl:variable name="instance_type"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="ead:did/ead:container[1]/@label" /> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <xsl:variable name="general_note"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="ead:odd/ead:p" /> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <xsl:variable name="scope_content"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="ead:scopecontent/ead:p" /> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <xsl:variable name="physdesc"> | |
| <xsl:value-of select="ead:did/ead:physdesc" /> | |
| </xsl:variable> | |
| <xsl:variable name="restrict_note"> | |
| <xsl:call-template name="escape_values"> | |
| <xsl:with-param name="value" select="ead:accessrestrict/ead:p" /> | |
| </xsl:call-template> | |
| </xsl:variable> | |
| <!--Expected filename can be used to match to existing digital files for automatic DAO processing --> | |
| <xsl:variable name="expected_filename"> | |
| </xsl:variable> | |
| <xsl:value-of select="@id" /><xsl:value-of select="$delimiter" /><!--REFID --> | |
| <xsl:value-of select="normalize-space($title)" /><xsl:value-of select="$delimiter" /><!--TITLE --> | |
| <xsl:value-of select="@level" /><xsl:value-of select="$delimiter" /><!--LEVEL TYPE --> | |
| <xsl:value-of select="$level" /><xsl:value-of select="$delimiter" /><!--LEVEL --> | |
| <xsl:text>TRUE</xsl:text><xsl:value-of select="$delimiter" /><!--LEVEL --> | |
| <xsl:value-of select="normalize-space($date)" /><xsl:value-of select="$delimiter" /><!--DATE --> | |
| <xsl:value-of select="$date_begin" /><xsl:value-of select="$delimiter" /><!--DATE BEGIN --> | |
| <xsl:value-of select="$date_end" /><xsl:value-of select="$delimiter" /><!--DATE END --> | |
| <xsl:value-of select="$bulk_date_begin" /><xsl:value-of select="$delimiter" /><!--BULK DATE BEGIN --> | |
| <xsl:value-of select="$bulk_date_end" /><xsl:value-of select="$delimiter" /><!--BULK DATE END --> | |
| <xsl:value-of select="$box" /><xsl:value-of select="$delimiter" /><!--BOX VAL --> | |
| <xsl:value-of select="$folder" /><xsl:value-of select="$delimiter" /><!--FOLDER VAL --> | |
| <xsl:value-of select="$digital_file" /><xsl:value-of select="$delimiter" /><!-- DIGITAL FILE VAL --> | |
| <xsl:value-of select="$oversize" /><xsl:value-of select="$delimiter" /><!--OVSIZE VAL --> | |
| <xsl:value-of select="$instance_type" /><xsl:value-of select="$delimiter" /><!--INSTANCE TYPE --> | |
| <xsl:value-of select="normalize-space($general_note)" /><xsl:value-of select="$delimiter" /><!--GENERAL NOTE --> | |
| <xsl:value-of select="normalize-space($restrict_note)" /><xsl:value-of select="$delimiter" /><!--RESTRICTIONS NOTE --> | |
| <xsl:value-of select="normalize-space($scope_content)" /><xsl:value-of select="$delimiter" /><!--SCOPE NOTE --> | |
| <xsl:value-of select="normalize-space($physdesc)" /><xsl:value-of select="$delimiter" /><!--PYHSDESC NOTE --> | |
| <xsl:value-of select="$expected_filename" /><xsl:value-of select="$delimiter" /><!--EXPECTED FILENAME --> | |
| <xsl:value-of select="$eol" /> | |
| <xsl:apply-templates select="ead:c"> | |
| <xsl:with-param name="level" select="$next_level" /> | |
| </xsl:apply-templates> | |
| </xsl:template> | |
| <xsl:template name="escape_values"> | |
| <xsl:param name="value" /> | |
| <xsl:choose> | |
| <xsl:when test="contains($value, $quote)"> | |
| <xsl:variable name="escapedquote" select="replace($value, $quote, concat($quote, $quote))" /> | |
| <xsl:value-of select="concat($quote, $escapedquote, $quote)" /> | |
| </xsl:when> | |
| <xsl:when test="contains($value, $delimiter)"> | |
| <xsl:value-of select="concat($quote, $value, $quote)" /> | |
| </xsl:when> | |
| <xsl:otherwise> | |
| <xsl:value-of select="$value" /> | |
| </xsl:otherwise> | |
| </xsl:choose> | |
| </xsl:template> | |
| <xsl:template match="ead:did/ead:unittitle"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="ead:title"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="ead:did/ead:physdesc"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="ead:extent"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="ead:odd"> | |
| <xsl:apply-templates select="ead:p" /> | |
| </xsl:template> | |
| <xsl:template match="ead:p"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| <xsl:template match="ead:emph"> | |
| <xsl:apply-templates /> | |
| </xsl:template> | |
| </xsl:stylesheet> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Update 2018-12-19 | |
| Added physdesc and scopecontent | |
| Update 2021-04-12 | |
| Added normalize-space for title and date | |
| For Claremont: | |
| Ran unitdate across CUL EAD. Took output, removed empty namespaces (xmlns=""), pretty printed, ran ead2csv.xsl. | |
| EAD cleanup notes (tested on Crane) | |
| This stylesheet: https://github.com/mdpeters/EAD_stylesheets/blob/master/ead_to_csv.xsl | |
| Switched to | as delimiter | |
| Lowercased box and folder | |
| <xsl:variable name="box"> | |
| <xsl:value-of select="ead:did/ead:container[@type='box']" /> | |
| </xsl:variable> | |
| <xsl:variable name="folder"> | |
| <xsl:value-of select="ead:did/ead:container[@type='folder']" /> | |
| </xsl:variable> | |
| 4077437 - Charles Richard Crane Papers | |
| Pretty printed in oXygen for line break reasons | |
| Run unitdate.xsl to move the unitdates in parallel | |
| Remove "xmlns="urn:isbn:1-931666-22-9" ; "xmlns=""" ; copied <ead> element from original file for namespace reasons | |
| Run ead2csv.xsl, paste into Excel, split data | |
| Problem with ead2csv.xsl - the unitdate.xsl isn't copying c-file inside c-file | |
| -> fixed with call to apply templates inside c | |
| Resulting output csv: | |
| Problem1: Using <unitdate> as a separator | |
| Problem 2: Excluding the day from a unitdate | |
| <unittitle>To Martin Ryerson 5 <unitdate type="inclusive">February 1870</unitdate> Chicago</unittitle> | |
| Saved as CRC.xls - will try to clean this up | |
| 11048740 - ran fine |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0"?> | |
| <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ead="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink"> | |
| <!-- | |
| --> | |
| <!-- PLEASE SEE THE MODIFICATION TIPS FILE FOR TIPS ON HOW TO ALTER THIS DOCUMENT --> | |
| <xsl:output method="xml" indent="yes" exclude-result-prefixes="ead"/> | |
| <!-- Identity templage - copies everything --> | |
| <xsl:template match="node()|@*"> | |
| <xsl:copy> | |
| <xsl:apply-templates select="node()|@*"/> | |
| </xsl:copy> | |
| </xsl:template> | |
| <!-- The following template takes the unitdate out of unit title--> | |
| <!-- | |
| <xsl:template match="ead/archdesc/did/unittitle"> | |
| <xsl:element name="unittitle"> | |
| <!-\-<xsl:copy-of select="ead/archdesc/did/unittitle[not(unitdate)]"/> | |
| <xsl:copy-of select="@*"/>-\-> | |
| </xsl:element> | |
| </xsl:template> | |
| --><!-- | |
| <xsl:template match="ead/archdesc/did"> | |
| <xsl:element name="unitdate"> | |
| <xsl:value-of select="ead/archdesc/did/unittitle/unitdate"/> | |
| </xsl:element> | |
| <xsl:apply-templates/> | |
| </xsl:template> | |
| --> | |
| <!-- will only copy containers, titles, dates, physdesc, and scopenote; this is manual --> | |
| <xsl:template match="ead:c[@level='file']" exclude-result-prefixes="#all"> | |
| <c level='file'><did> | |
| <xsl:copy-of select="ead:did/ead:container[1]" exclude-result-prefixes="#all"/> | |
| <xsl:copy-of select="ead:did/ead:container[2]" exclude-result-prefixes="#all"/> | |
| <unittitle> | |
| <xsl:value-of select="ead:did/ead:unittitle/text()"/> | |
| </unittitle> | |
| <unitdate type="inclusive"> | |
| <xsl:value-of select="ead:did/ead:unittitle/ead:unitdate/text()"/> | |
| </unitdate> | |
| <physdesc> | |
| <extent> | |
| <xsl:value-of select="ead:did/ead:physdesc/ead:extent/text()"/> | |
| </extent> | |
| </physdesc> | |
| </did> | |
| <xsl:copy-of select="ead:scopecontent"/> | |
| <!-- test for another c; then apply templates? --> | |
| <xsl:apply-templates select="ead:c"/> | |
| </c> | |
| </xsl:template> | |
| <!-- This works, but no attributes, obviously. | |
| <xsl:template match="ead/archdesc/did/unittitle"> | |
| <unittitle> | |
| <xsl:value-of select="self::node()"/> | |
| </unittitle> | |
| <unitdate> | |
| <xsl:value-of select="child::unitdate"/> | |
| </unitdate> | |
| </xsl:template> --> | |
| </xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment