Last active
February 10, 2022 10:25
-
-
Save peterstadler/72c0a81378affa03d095edd1423eeeca to your computer and use it in GitHub Desktop.
XQuery module for extracting network information (in GraphML format) from a CMIF file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1" encoding "UTF-8"; | |
(:~ | |
: XQuery module for extracting network information (in GraphML format) from a CMIF file | |
: | |
: CMIF is the Correspondence Metadata Interchange format, | |
: see https://correspsearch.net/en/documentation.html | |
: GraphML is an XML-based file format for graphs, | |
: see http://graphml.graphdrawing.org | |
: | |
:) | |
module namespace cmif2graphml="http://github.com/peterstadler/cmif2graphml"; | |
declare namespace tei="http://www.tei-c.org/ns/1.0"; | |
declare namespace graphml="http://graphml.graphdrawing.org/xmlns"; | |
(:~ | |
: Main entry point | |
: Create a full graphml by providing TEI correspDesc elements | |
~:) | |
declare function cmif2graphml:graphml($correspDescs as element(tei:correspDesc)*) as element(graphml:graphml) { | |
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">{ | |
cmif2graphml:graph($correspDescs) | |
}</graphml> | |
}; | |
declare function cmif2graphml:nodes($correspDescs as element(tei:correspDesc)*) as element(graphml:node)* { | |
for $agent in cmif2graphml:agents($correspDescs/tei:correspAction) | |
group by $id := cmif2graphml:id($agent) | |
return | |
<node xmlns="http://graphml.graphdrawing.org/xmlns" id="{$id}"> | |
<desc>{ $agent[1] => normalize-space() }</desc> | |
</node> | |
}; | |
declare function cmif2graphml:edges($correspDescs as element(tei:correspDesc)*) as element(graphml:edge)* { | |
for $correspDesc in $correspDescs | |
for $sender in $correspDesc/tei:correspAction[@type='sent'] => cmif2graphml:agents() | |
for $addressee in $correspDesc/tei:correspAction[@type='received'] => cmif2graphml:agents() | |
return | |
<edge xmlns="http://graphml.graphdrawing.org/xmlns" source="{ $sender => cmif2graphml:id() }" target="{ $addressee => cmif2graphml:id() }"> | |
<desc>{ $correspDesc/@ref => string() }</desc> | |
</edge> | |
}; | |
declare function cmif2graphml:graph($correspDescs as element(tei:correspDesc)*) as element(graphml:graph) { | |
<graph xmlns="http://graphml.graphdrawing.org/xmlns" edgedefault="directed">{ | |
cmif2graphml:nodes($correspDescs), | |
cmif2graphml:edges($correspDescs) | |
}</graph> | |
}; | |
(:~ | |
: Generate an unique ID (for agents) | |
: If a ref-attribute is present, this will be used, otherwise an ID will be generated. | |
: NB, this will result in different IDs for identical names (string values) | |
~:) | |
declare %private function cmif2graphml:id($item as element()?) as xs:string { | |
if($item/@ref) then $item/string(@ref) | |
else generate-id($item) | |
}; | |
declare %private function cmif2graphml:agents($correspActions as element(tei:correspAction)*) as element()* { | |
$correspActions/tei:persName | | |
$correspActions/tei:orgName | | |
$correspActions/tei:name | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A sample call: