Skip to content

Instantly share code, notes, and snippets.

@tingletech
Last active August 29, 2015 14:01
Show Gist options
  • Save tingletech/4521a4eb11b8b14f24f0 to your computer and use it in GitHub Desktop.
Save tingletech/4521a4eb11b8b14f24f0 to your computer and use it in GitHub Desktop.
require 'date'
require 'nokogiri'
inFile, outFile, ark = ARGV
io = File.open(inFile, "r"); input = Nokogiri::XML(io); io.close
# Evil? Sure, but makes for less code and this doc doesn't need them.
input.remove_namespaces!
feed = input.root
out = Nokogiri::XML::Builder.new { |xml|
# Create the main UCI record element, with its many attributes
xml.record(id: ark,
dateStamp: DateTime.now.iso8601,
# Sadly we can't use matches() below because Nokogiri only supports XPath 1.0
pubType: feed.xpath("category[contains(@term, '/journal-article')]") ? 'article' :
feed.xpath("category[contains(@term, '/book')]") ? 'monograph' :
feed.xpath("category[contains(@term, '/chapter')]") ? 'chapter' :
raise('unknown pubType'),
peerReview: 'yes',
pubStatus: 'externalPub',
state: 'published',
stateDate: feed.at('updated').text
) {
# Translate author metadata
feed.xpath("//field[@name='authors']/people").each { |people|
xml.authors {
people.xpath("person").each { |person|
xml.author {
if person.at("first-name") then xml.fname(person.at("first-name").text)
elsif person.at("initials") then xml.fname(person.at("initials").text); end
if person.at("last-name") then xml.lname(person.at("last-name").text); end
if person.at("suffix") then xml.lname(person.at("suffix").text); end
if person.at("email") then xml.lname(person.at("email").text); end
}
}
}
}
# Children normally inherit their parent's namespace; do it last to only affect parent.
xml.parent.namespace = xml.parent.add_namespace("uci", "http://www.cdlib.org/ucingest")
}
}
File.open(outFile, "w") { |io| out.doc.write_xml_to(io) }
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:pubs="http://www.symplectic.co.uk/publications/atom-api"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xsl atom pubs xs">
<xsl:output method="xml" indent="yes" encoding="UTF-8"/>
<xsl:param name="ark"/>
<!-- Root Template -->
<xsl:template match="/atom:feed">
<!-- Create the main UCI record element, with its many attributes -->
<uci:record xmlns:uci="http://www.cdlib.org/ucingest"
id="{$ark}"
dateStamp="{current-dateTime()}"
peerReview="yes"
pubStatus="internalPub" state="published"
stateDate="{atom:updated/text()}">
<!-- Publication type -->
<xsl:choose>
<xsl:when test="atom:category[matches(@term, '.*/journal-article$')]">
<xsl:attribute name="type" select="'paper'"/>
</xsl:when>
<xsl:when test="atom:category[matches(@term, '.*/book$')]">
<xsl:attribute name="type" select="'monograph'"/>
</xsl:when>
<xsl:when test="atom:category[matches(@term, '.*/chapter$')]">
<xsl:attribute name="type" select="'chapter'"/>
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes">Unrecognized pubtype</xsl:message>
</xsl:otherwise>
</xsl:choose>
<!-- Everything else -->
<xsl:apply-templates/>
</uci:record>
</xsl:template>
<!-- Disable XSLT default of copying all text -->
<xsl:template match="text()"/>
<!-- Translate author metadata -->
<xsl:template match="pubs:field[@name='authors']">
<authors>
<xsl:for-each select="pubs:people/pubs:person">
<author>
<xsl:choose>
<xsl:when test="pubs:first-name">
<fname><xsl:value-of select="pubs:first-name"/></fname>
</xsl:when>
<xsl:when test="pubs:initials">
<fname><xsl:value-of select="pubs:initials"/></fname>
</xsl:when>
</xsl:choose>
<xsl:if test="pubs:last-name">
<lname><xsl:value-of select="pubs:last-name"/></lname>
</xsl:if>
<xsl:if test="pubs:suffix">
<suffix><xsl:value-of select="pubs:suffix"/></suffix>
</xsl:if>
<xsl:if test="pubs:email">
<email><xsl:value-of select="pubs:email"/></email>
</xsl:if>
</author>
</xsl:for-each>
</authors>
</xsl:template>
</xsl:stylesheet>
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:pubs="http://www.symplectic.co.uk/publications/atom-api"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xsl atom pubs xs">
<xsl:output method="xml" indent="yes" encoding="UTF-8"/>
<xsl:param name="ark"/>
<!-- Root Template -->
<xsl:template match="/atom:feed">
<!-- Create the main UCI record element, with its many attributes -->
<uci:record xmlns:uci="http://www.cdlib.org/ucingest"
id="{$ark}"
dateStamp="{current-dateTime()}"
peerReview="yes"
pubStatus="internalPub" state="published"
stateDate="{atom:updated/text()}">
<!-- Publication type -->
<xsl:variable name="type" select="if (ends-with(atom:category/@term, '/journal-article'))
then 'paper' else
if (ends-with(atom:category/@term, '/book'))
then 'monograph' else
if (ends-with(atom:category/@term, '/chapter'))
then 'chapter' else ''
"/>
<xsl:if test="$type=''">
<xsl:message terminate="yes">Unrecognized pubtype</xsl:message>
</xsl:if>
<xsl:attribute name="type" select="$type"/>
<!-- Everything else -->
<xsl:apply-templates/>
</uci:record>
</xsl:template>
<!-- Disable XSLT default of copying all text -->
<xsl:template match="text()"/>
<!-- Translate author metadata -->
<xsl:template match="pubs:field[@name='authors']">
<authors>
<xsl:apply-templates select="pubs:people/pubs:person">
</authors>
</xsl:template>
<xsl:template match="pubs:person">
<author>
<xsl:choose>
<xsl:when test="pubs:first-name">
<xsl:apply-templates select="pubs:first-name"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="pubs:initials"/>
</xsl:otherwise>
<xsl:apply-templates select="pubs:last-name,pubs:suffix,pubs:email"/>
</author>
</xsl:template>
<xsl:template match="pubs:first-name|pubs:initials|pubs:last-name|pubs:suffix|pubs:email">
<xsl:variable name="name" select="if (local-name()='first-name' or local-name(initials)='') then 'fname' else
if (local-name()='last-name') then 'lname' else local-name()
"/>
<xsl:element name="$name">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment