Skip to content

Instantly share code, notes, and snippets.

@peterstadler
Last active February 10, 2022 10:25
Show Gist options
  • Save peterstadler/72c0a81378affa03d095edd1423eeeca to your computer and use it in GitHub Desktop.
Save peterstadler/72c0a81378affa03d095edd1423eeeca to your computer and use it in GitHub Desktop.
XQuery module for extracting network information (in GraphML format) from a CMIF file
xquery version "3.1" encoding "UTF-8";
(:~
: XQuery module for extracting network information (in GraphML format) from a CMIF file
:
: CMIF is the Correspondence Metadata Interchange format,
: see https://correspsearch.net/en/documentation.html
: GraphML is an XML-based file format for graphs,
: see http://graphml.graphdrawing.org
:
:)
module namespace cmif2graphml="http://github.com/peterstadler/cmif2graphml";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace graphml="http://graphml.graphdrawing.org/xmlns";
(:~
: Main entry point
: Create a full graphml by providing TEI correspDesc elements
~:)
declare function cmif2graphml:graphml($correspDescs as element(tei:correspDesc)*) as element(graphml:graphml) {
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">{
cmif2graphml:graph($correspDescs)
}</graphml>
};
declare function cmif2graphml:nodes($correspDescs as element(tei:correspDesc)*) as element(graphml:node)* {
for $agent in cmif2graphml:agents($correspDescs/tei:correspAction)
group by $id := cmif2graphml:id($agent)
return
<node xmlns="http://graphml.graphdrawing.org/xmlns" id="{$id}">
<desc>{ $agent[1] => normalize-space() }</desc>
</node>
};
declare function cmif2graphml:edges($correspDescs as element(tei:correspDesc)*) as element(graphml:edge)* {
for $correspDesc in $correspDescs
for $sender in $correspDesc/tei:correspAction[@type='sent'] => cmif2graphml:agents()
for $addressee in $correspDesc/tei:correspAction[@type='received'] => cmif2graphml:agents()
return
<edge xmlns="http://graphml.graphdrawing.org/xmlns" source="{ $sender => cmif2graphml:id() }" target="{ $addressee => cmif2graphml:id() }">
<desc>{ $correspDesc/@ref => string() }</desc>
</edge>
};
declare function cmif2graphml:graph($correspDescs as element(tei:correspDesc)*) as element(graphml:graph) {
<graph xmlns="http://graphml.graphdrawing.org/xmlns" edgedefault="directed">{
cmif2graphml:nodes($correspDescs),
cmif2graphml:edges($correspDescs)
}</graph>
};
(:~
: Generate an unique ID (for agents)
: If a ref-attribute is present, this will be used, otherwise an ID will be generated.
: NB, this will result in different IDs for identical names (string values)
~:)
declare %private function cmif2graphml:id($item as element()?) as xs:string {
if($item/@ref) then $item/string(@ref)
else generate-id($item)
};
declare %private function cmif2graphml:agents($correspActions as element(tei:correspAction)*) as element()* {
$correspActions/tei:persName |
$correspActions/tei:orgName |
$correspActions/tei:name
};
@peterstadler
Copy link
Author

peterstadler commented Feb 8, 2022

A sample call:

xquery version "1.0" encoding "UTF-8";

declare namespace tei="http://www.tei-c.org/ns/1.0";
import module namespace cmif2graphml="http://github.com/peterstadler/cmif2graphml" at "https://gist.githubusercontent.com/peterstadler/72c0a81378affa03d095edd1423eeeca/raw/cmif2graphml.xqm";

let $correspDescs := doc('https://weber-gesamtausgabe.de/correspDesc.xml')//tei:correspDesc
return
    cmif2graphml:graph($correspDescs)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment