Created
June 27, 2012 08:32
-
-
Save grtjn/3002466 to your computer and use it in GitHub Desktop.
Convert a single line of a turtle .nx or .nq file to xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:local="local" xmlns:xdmp="http://marklogic.com/xdmp"> | |
<xsl:variable name="quot">"</xsl:variable> | |
<xsl:variable name="encoded-string-pattern">^"(([^\\"]+|\\[\\"nrt]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]|\\U[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])+)"(.*)</xsl:variable> | |
<xsl:template match="/"> | |
<!-- http://dbpedia.org/Downloads37 --> | |
<!-- example input: | |
<http://dbpedia.org/resource/Alabama> <http://dbpedia.org/ontology/PopulatedPlace/areaTotal> "135765.0"^^<http://dbpedia.org/datatype/squareKilometre> <http://en.wikipedia.org/wiki/Alabama#absolute-line=33> . | |
--> | |
<xsl:variable name="tokens" select="local:tokenize(.)"/> | |
<xsl:variable name="s" select="$tokens[1]"/> | |
<xsl:variable name="p" select="$tokens[2]"/> | |
<xsl:variable name="o" select="$tokens[3]"/> | |
<xsl:variable name="c" select="$tokens[position() = (4, 5)][. != '.'][not(starts-with(., '@'))]"/> | |
<xsl:variable name="lang" select="if (starts-with($tokens[4], '@')) then substring-after($tokens[4], '@') else ()"/> | |
<t> | |
<s><xsl:value-of select="$s"/></s> | |
<p><xsl:value-of select="$p"/></p> | |
<o> | |
<xsl:if test="exists($lang)"> | |
<xsl:attribute name="xml:lang" select="$lang"/> | |
</xsl:if> | |
<xsl:value-of select="$o"/> | |
</o> | |
<xsl:if test="exists($c)"> | |
<c><xsl:value-of select="$c"/></c> | |
</xsl:if> | |
</t> | |
</xsl:template> | |
<xsl:function name="local:tokenize" as="xs:string*"> | |
<xsl:param name="str" as="xs:string"/> | |
<xsl:choose> | |
<xsl:when test="$str = ('', '.')"/> | |
<xsl:when test="starts-with($str, '<')"> | |
<xsl:value-of select="substring-after(substring-before($str, '>'), '<')"/> | |
<xsl:sequence select="local:tokenize(substring-after($str, '> '))"/> | |
</xsl:when> | |
<xsl:when test="starts-with($str, $quot)"> | |
<xsl:variable name="encoded-string" select="replace($str, $encoded-string-pattern, '$1')"/> | |
<xsl:value-of select="local:decode-string($encoded-string)"/> | |
<xsl:variable name="remainder" select="replace($str, $encoded-string-pattern, '$3')"/> | |
<xsl:choose> | |
<xsl:when test="starts-with($remainder, '^^')"> | |
<xsl:sequence select="local:tokenize(substring-after($remainder, '> '))"/> | |
</xsl:when> | |
<xsl:when test="starts-with($remainder, '@')"> | |
<xsl:value-of select="substring-before($remainder, ' ')"/> | |
<xsl:sequence select="local:tokenize(substring-after($remainder, ' '))"/> | |
</xsl:when> | |
<xsl:when test="starts-with($remainder, ' ')"> | |
<xsl:sequence select="local:tokenize(substring-after($remainder, ' '))"/> | |
</xsl:when> | |
<xsl:otherwise>##Should not be reached!##</xsl:otherwise> | |
</xsl:choose> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="substring-before($str, ' ')"/> | |
<xsl:sequence select="local:tokenize(substring-after($str, ' '))"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:function> | |
<xsl:function name="local:decode-string" as="xs:string"> | |
<xsl:param name="str" as="xs:string"/> | |
<xsl:variable name="_" select="replace($str, '\\\\', '\\')"/> | |
<xsl:variable name="_" select="replace($_, concat('\\', $quot), $quot)"/> | |
<xsl:variable name="_" select="replace($_, '(\\n|\\r)+', ' ')"/> | |
<xsl:variable name="_" select="replace($_, '\\t', '	')"/> | |
<xsl:analyze-string select="$_" regex="\\u([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])|\\U([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])"> | |
<xsl:matching-substring> | |
<xsl:value-of select="codepoints-to-string(xdmp:hex-to-integer(regex-group(1)))"/> | |
</xsl:matching-substring> | |
<xsl:non-matching-substring> | |
<xsl:value-of select="."/> | |
</xsl:non-matching-substring> | |
</xsl:analyze-string> | |
</xsl:function> | |
</xsl:stylesheet> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment