Created
April 29, 2021 12:57
-
-
Save dmj/3946d5ce0364e28ed8dae4f25445bf7b to your computer and use it in GitHub Desktop.
Parse Transkribus PAGE-XML annotations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<xsl:transform version="3.0" exclude-result-prefixes="#all" | |
xpath-default-namespace="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" | |
xmlns="http://www.tei-c.org/ns/1.0" | |
xmlns:fn="https://code.dehmel.digital/ns" | |
xmlns:map="http://www.w3.org/2005/xpath-functions/map" | |
xmlns:xs="http://www.w3.org/2001/XMLSchema" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
<xsl:template name="fn:properties-to-fs" as="element(Q{http://www.tei-c.org/ns/1.0}fs)?"> | |
<xsl:param name="properties" as="map(*)" required="true"/> | |
<xsl:param name="id" as="xs:string?"/> | |
<xsl:param name="type" as="xs:string?"/> | |
<fs> | |
<xsl:if test="$id"> | |
<xsl:attribute name="xml:id" select="$id"/> | |
</xsl:if> | |
<xsl:if test="$type"> | |
<xsl:attribute name="type" select="$type"/> | |
</xsl:if> | |
<xsl:for-each select="map:keys($properties)"> | |
<xsl:variable name="value" select="map:get($properties, .)"/> | |
<f name="{.}"> | |
<xsl:choose> | |
<xsl:when test="$value instance of map(*)+"> | |
<xsl:for-each select="$value"> | |
<xsl:call-template name="fn:properties-to-fs"> | |
<xsl:with-param name="properties" as="map(*)" select="."/> | |
</xsl:call-template> | |
</xsl:for-each> | |
</xsl:when> | |
<xsl:when test="$value instance of xs:float or $value instance of xs:double or $value instance of xs:decimal"> | |
<numeric value="{$value}"/> | |
</xsl:when> | |
<xsl:when test="$value instance of xs:boolean"> | |
<binary value="{$value}"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<string><xsl:value-of select="$value"/></string> | |
</xsl:otherwise> | |
</xsl:choose> | |
</f> | |
</xsl:for-each> | |
</fs> | |
</xsl:template> | |
<xsl:function name="fn:properties-to-map" as="map(xs:string, map(xs:string, item()))"> | |
<xsl:param name="properties" as="xs:string"/> | |
<xsl:variable name="parsed-properties" as="map(xs:string, map(xs:string, item()))*"> | |
<xsl:analyze-string select="$properties" regex="([a-zA-Z]+) \{{([^\}}]*)\}}"> | |
<xsl:matching-substring> | |
<xsl:variable name="selector" as="xs:string" select="regex-group(1)"/> | |
<xsl:variable name="props" as="xs:string*" select="tokenize(regex-group(2), ';') ! normalize-space()"/> | |
<xsl:map-entry key="$selector"> | |
<xsl:map> | |
<xsl:for-each select="$props[normalize-space()]"> | |
<xsl:choose> | |
<xsl:when test="contains(., ':')"> | |
<xsl:map-entry key="substring-before(., ':')"> | |
<xsl:variable name="value" as="xs:string" select="substring-after(., ':')"/> | |
<xsl:choose> | |
<xsl:when test="lower-case($value) = ('true', 'false')"> | |
<xsl:sequence select="xs:boolean($value)"/> | |
</xsl:when> | |
<xsl:when test="$value castable as xs:decimal"> | |
<xsl:sequence select="number($value)"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="$value"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:map-entry> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:message expand-text="yes">Invalid property: {.}</xsl:message> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:for-each> | |
</xsl:map> | |
</xsl:map-entry> | |
</xsl:matching-substring> | |
</xsl:analyze-string> | |
</xsl:variable> | |
<xsl:sequence select="map:merge($parsed-properties, map{'duplicates': 'combine'})"/> | |
</xsl:function> | |
</xsl:transform> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment