Created
May 20, 2016 12:48
-
-
Save welblaud/bde1014ef1462d012b0708cad653168c to your computer and use it in GitHub Desktop.
A module for searching and replacing text nodes in in-memory documents (for eXist-db)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
module namespace cust-utils = 'http://46.28.111.241:8081/exist/db/apps/karolinum-x/modules/cust-utils'; | |
(: Replace Spaces Soft – This function replaces all spaces after Czech single-letter prepositions | |
: with no-breaking spaces. Done in-memory, without storaging the result in the DB. | |
: The limitation is it does not solve double occurence, e.g. “V hlavě a v těle.“. :) | |
declare function cust-utils:replace-spaces-soft($nodes as node()*) as item()* { | |
let $match := '(^|\s| )([szkvaiouSZKVAIOU])[\s]' | |
for $node in $nodes | |
return | |
typeswitch ($node) | |
(: return the whole document-node :) | |
case document-node() return | |
cust-utils:replace-spaces-soft($node/node()) | |
(: return every element with its attrs + pass it through :) | |
case element() return | |
element { node-name($node) } { | |
$node/@*, | |
cust-utils:replace-spaces-soft($node/node()) | |
} | |
(: if the node is text(), do the replacement :) | |
case text() return | |
replace($node, $match, '$1$2 ') | |
(: drop comment & processing-instruction nodes :) | |
default return | |
() | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment