Last active
December 31, 2015 19:29
-
-
Save CliffordAnderson/8034262 to your computer and use it in GitHub Desktop.
Produces word frequency list for speakers in Much Ado about Nothing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
(: Produces word lists with frequency per speaker :) | |
declare namespace tei="http://www.tei-c.org/ns/1.0"; | |
let $doc := fn:doc("db/shakespeare/Ado.xml") | |
for $character in fn:distinct-values($doc//tei:sp/tei:speaker/tei:w/text()) | |
let $bag := | |
for $speaker in $doc//tei:sp | |
let $words := $speaker/tei:ab//tei:w/text() | |
where $speaker/tei:speaker/tei:w/text() = $character | |
return $words | |
let $word-list := | |
for $word in fn:distinct-values($bag) | |
let $count := fn:count($bag[. = $word]) | |
let $weighted := $count div fn:count($bag) | |
order by $count descending | |
return <word speaker="{$character}" type="{$word}" count="{$count}" weighted="{$weighted}"/> | |
return $word-list |
Author
CliffordAnderson
commented
Dec 19, 2013
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment