Last active
February 10, 2022 10:17
-
-
Save peterstadler/81d17e9354a445acbac517c91fdb6c4e to your computer and use it in GitHub Desktop.
XQuery module for transforming CMIF files to KML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1" encoding "UTF-8"; | |
(:~ | |
: XQuery module for extracting place information in KML from a CMIF file | |
: | |
: CMIF is the Correspondence Metadata Interchange format, | |
: see http://correspsearch.net/index.xql?id=participate_cmi-format | |
: The exported KML flavor is based on the europeana connect KML specification, | |
: see https://www.dropbox.com/s/httk62myn2tsi4p/M3.3.2_eConnect_KML_Specification_v1.0_UGOE.pdf | |
:) | |
module namespace cmif2kml="http://github.com/peterstadler/cmif2kml"; | |
declare namespace tei="http://www.tei-c.org/ns/1.0"; | |
declare namespace kml="http://www.opengis.net/kml/2.2"; | |
declare namespace httpclient="http://exist-db.org/xquery/httpclient"; | |
declare namespace wgs84_pos="http://www.w3.org/2003/01/geo/wgs84_pos#"; | |
declare namespace map="http://www.w3.org/2005/xpath-functions/map"; | |
(:import module namespace functx="http://www.functx.com" at "http://www.xqueryfunctions.com/xq/functx-1.0-nodoc-2007-01.xq";:) | |
(: | |
: Main entry point for the module. | |
: | |
: @param $correspDescs the correspDesc elements to extract the place information from | |
: @param $get-coordinates a callback function that receives a geonames ID and returns the coordinates as xs:string (e.g. '8.68417,50.11552') | |
: if callback is the empty sequence, a generic callback is created which retrieves this information from geonames | |
: @return a kml document with a kml:Placemark entry for every tei:placeName | |
:) | |
declare function cmif2kml:kml($correspDescs as element(tei:correspDesc)*, $get-coordinates as function(*)?) as element(kml:kml)? { | |
let $callback := | |
if(empty($get-coordinates)) then | |
let $geonamesRDFs := (distinct-values($correspDescs/tei:correspAction/tei:placeName/@ref) ! substring-after(., '.org/')) => cmif2kml:grab-geonames-rdf() | |
return cmif2kml:get-coordinates(?,$geonamesRDFs) | |
else $get-coordinates | |
return | |
<kml xmlns="http://www.opengis.net/kml/2.2"> | |
<Document>{ $correspDescs/tei:correspAction/tei:placeName[@ref] ! cmif2kml:placemark(., $callback) }</Document> | |
</kml> | |
}; | |
(: | |
: Helper function for creating a single kml:Placemark entry | |
: | |
: @param $place a tei:placeName element | |
: @param $get-coordinates a callback function that receives a geonames ID and returns the coordinates as xs:string (e.g. '8.68417,50.11552') | |
: @return a kml:Placemark element | |
:) | |
declare %private function cmif2kml:placemark($place as element(tei:placeName), $get-coordinates as function(xs:string) as xs:string) as element(kml:Placemark)? { | |
let $geoNamesID := substring-after($place/@ref, '.org/') | |
let $coordinates := $get-coordinates($geoNamesID) | |
return | |
<Placemark xmlns="http://www.opengis.net/kml/2.2"> | |
<name>{normalize-space($place) || ' (' || $place/parent::tei:correspAction/@type || ')'}</name> | |
<address>{normalize-space($place)}</address> | |
{cmif2kml:placemark-date($place/parent::tei:correspAction/tei:date[1])} | |
<Point><coordinates>{$coordinates}</coordinates></Point> | |
</Placemark> | |
}; | |
(: | |
: Helper function for converting tei:date information into kml:TimeStamp or kml:TimeSpan | |
: tei:date/@when will be converted into a kml:TimeStamp/kml:when | |
: whereas tei:date/@notBefore and tei:date/@notAfter (as well as tei:date/@from and tei:date/@to) will be turned into a kml:TimeSpan | |
: If none (or just one of notBefore, notAfter, from, or to) of these attributes are present, the whole element will be disgarded | |
: | |
: NB: There is currently no check for the datatype, since the CMI format itself is rather strict about these. | |
: | |
: @param $date a tei:date element with normalized date information provided by attributes @when, @notBefore, @notAfter, @from, and/or @to | |
: @return a kml:TimeStamp or kml:TimeSpan element when sufficient information is provided by the tei:date attributes, the empty sequence otherwise | |
:) | |
declare %private function cmif2kml:placemark-date($date as element(tei:date)?) as element()? { | |
if($date/@when) then <TimeStamp xmlns="http://www.opengis.net/kml/2.2"><when>{$date/data(@when)}</when></TimeStamp> | |
else if($date/@from and $date/@to) then <TimeSpan xmlns="http://www.opengis.net/kml/2.2"><begin>{$date/data(@from)}</begin><end>{$date/data(@to)}</end></TimeSpan> | |
else if($date/@notBefore and $date/@notAfter) then <TimeSpan xmlns="http://www.opengis.net/kml/2.2"><begin>{$date/data(@notBefore)}</begin><end>{$date/data(@notAfter)}</end></TimeSpan> | |
else () | |
}; | |
(: | |
: Helper function for creating a map object with the geonames RDF documents for every geonamesID | |
: | |
: @param $geonames-ids | |
:) | |
declare %private function cmif2kml:grab-geonames-rdf($geonamesIDs as xs:string*) as map(*) { | |
map:merge( | |
distinct-values($geonamesIDs) ! map { .: try { doc('http://www.geonames.org/' || . || '/about.rdf') } catch * {} } | |
) | |
}; | |
(: | |
: Helper function for grabbing longitude and latitude from a geonames RDF document | |
:) | |
declare %private function cmif2kml:get-coordinates($geonamesID as xs:string?, $geonamesRDFs as map(*)) as xs:string? { | |
let $geonamesRDF := $geonamesRDFs?($geonamesID) | |
return | |
string-join(($geonamesRDF//wgs84_pos:long,$geonamesRDF//wgs84_pos:lat), ',') | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A sample call: