Last active
August 29, 2015 14:02
-
-
Save kleem/e0eeb4f3b7c5b84a82a9 to your computer and use it in GitHub Desktop.
Clavius - Latin text annotation visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
json_data = [[{ | |
"case": "n", | |
"form": "botvitus", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "botvitus", | |
"token": "Botvitus" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"form": "nericius", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "nericius", | |
"token": "Nericius" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "d", | |
"form": "christophoro", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "christophorus", | |
"token": "Christophoro" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "d", | |
"form": "clavio", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "clavius", | |
"token": "Clavio" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "d", | |
"form": "s", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "suus", | |
"token": "S" | |
}, | |
{ | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
}, | |
{ | |
"token": "\n" | |
}], | |
[{ | |
"case": "d", | |
"form": "occupationibus", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "occupatio", | |
"token": "Occupationibus" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "d", | |
"form": "tuis", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "tuus", | |
"token": "tuis" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "v", | |
"degree": "s", | |
"form": "doctissime", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "doctus", | |
"token": "Doctissime" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "v", | |
"form": "clavi", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "clavius", | |
"token": "Clavi" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"mood": "p", | |
"form": "parcens", | |
"gender": "m", | |
"pos": "t", | |
"number": "s", | |
"lemma": "parco", | |
"token": "parcens", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "tertia", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "tertius", | |
"token": "tertia" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "mea", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "meus", | |
"token": "mea" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "ad", | |
"token": "ad", | |
"pos": "r", | |
"form": "ad" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "te", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "tu", | |
"token": "te" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "epistola", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "epistula", | |
"token": "epistola" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "praesertim", | |
"token": "praesertim", | |
"pos": "d", | |
"form": "praesertim" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "vero", | |
"token": "vero", | |
"pos": "d", | |
"form": "vero" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"case": "b", | |
"form": "secunda", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "secundus", | |
"token": "secunda" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "illius", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "ille", | |
"token": "illius" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "parte", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "pars", | |
"token": "parte" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"form": "quae", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "qui", | |
"token": "quae" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "erat", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "erat", | |
"tense": "i", | |
"voice": "a" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "de", | |
"token": "de", | |
"pos": "r", | |
"form": "de" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "puncto", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "punctum", | |
"token": "puncto" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "dinostrati", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "dinostratus", | |
"token": "Dino\nstrati" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "ultimo", | |
"gender": "n", | |
"pos": "a", | |
"number": "s", | |
"lemma": "ultimus", | |
"token": "ultimo" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "abs", | |
"token": "abs", | |
"pos": "r", | |
"form": "abs" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "te", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "tu", | |
"token": "te" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"mood": "p", | |
"form": "invento", | |
"gender": "n", | |
"pos": "t", | |
"number": "s", | |
"lemma": "invenio", | |
"token": "invento", | |
"tense": "r", | |
"voice": "p" | |
}, | |
{ | |
"lemma": ";", | |
"token": ";", | |
"pos": "u", | |
"form": ";" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"degree": "c", | |
"form": "brevior", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "brevis", | |
"token": "brevior" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "fortasse", | |
"token": "fortasse", | |
"pos": "d", | |
"form": "fortasse" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "fui", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "fui", | |
"tense": "r", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"lemma": "quam", | |
"token": "quam", | |
"pos": "d", | |
"form": "quam" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "oportuit", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "oportet", | |
"token": "oportuit", | |
"tense": "r", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "ideo", | |
"token": "ideoque", | |
"pos": "d", | |
"form": "ideoque" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "hac", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "hic", | |
"token": "hac" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "scriptione", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "scriptio", | |
"token": "scriptione" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "rem", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "res", | |
"token": "rem" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "totam", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "totus", | |
"token": "totam" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"case": "b", | |
"form": "paullo", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "paulus", | |
"token": "paullo" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"degree": "c", | |
"lemma": "uber", | |
"token": "uberius", | |
"pos": "d", | |
"form": "uberius" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "si", | |
"token": "si", | |
"pos": "c", | |
"form": "si" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "potero", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "possum", | |
"token": "potero", | |
"tense": "f", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "explicabo", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "explico", | |
"token": "explicabo", | |
"tense": "f", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
}, | |
{ | |
"token": " " | |
}], | |
[{ | |
"case": "b", | |
"form": "principio", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "principium", | |
"token": "Principio" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"lemma": "igitur", | |
"token": "igitur", | |
"pos": "c", | |
"form": "igitur" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "conchoidis", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "conchoidis", | |
"token": "conchoidis" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"form": "accidens", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "accidens", | |
"token": "accidens" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"degree": "s", | |
"form": "certissimum", | |
"gender": "n", | |
"pos": "a", | |
"number": "s", | |
"lemma": "certus", | |
"token": "certissimum" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "est", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "est", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"lemma": "nimirum", | |
"token": "nimirum", | |
"pos": "d", | |
"form": "nimirum" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "si", | |
"token": "si", | |
"pos": "c", | |
"form": "si" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "per", | |
"token": "per", | |
"pos": "r", | |
"form": "per" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "lineam", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineam" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "dinostrati", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "dinostratus", | |
"token": "Dinostrati" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "s", | |
"form": "transeat", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "transeo", | |
"token": "transeat", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "eam", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "is", | |
"token": "eam" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"lemma": "per", | |
"token": "per", | |
"pos": "r", | |
"form": "per" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "lineas", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "linea", | |
"token": "lineas" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "rectas", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "rectus", | |
"token": "rectas" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "n", | |
"form": "dividere", | |
"pos": "v", | |
"lemma": "divido", | |
"token": "dividere", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "partes", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "pars", | |
"token": "partes" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "supra", | |
"token": "supra", | |
"pos": "d", | |
"form": "supra" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "infraque", | |
"token": "infraque", | |
"pos": "d", | |
"form": "infraque" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"case": "a", | |
"form": "aequales", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "aequalis", | |
"token": "aequales" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "propter", | |
"token": "propter", | |
"pos": "r", | |
"form": "propter" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "motum", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "motus", | |
"token": "motus" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "a", | |
"token": "a", | |
"pos": "r", | |
"form": "a" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "summo", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "summum", | |
"token": "summo" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "velut", | |
"token": "velut", | |
"pos": "d", | |
"form": "velut" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "polo", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "polus", | |
"token": "polo" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"case": "a", | |
"form": "basim", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "basis", | |
"token": "basim" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "g", | |
"mood": "p", | |
"form": "permeantis", | |
"gender": "f", | |
"pos": "t", | |
"number": "s", | |
"lemma": "permeo", | |
"token": "permeantis", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ";", | |
"token": ";", | |
"pos": "u", | |
"form": ";" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"form": "qui", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "qui", | |
"token": "qui" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "quidem", | |
"token": "quidem", | |
"pos": "d", | |
"form": "quidem" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "n", | |
"form": "motus", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "motus", | |
"token": "motus" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "a", | |
"form": "familiam", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "familia", | |
"token": "familiam" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, | |
{ | |
"token": "\n" | |
}, | |
{ | |
"case": "b", | |
"form": "hoc", | |
"gender": "n", | |
"pos": "p", | |
"number": "s", | |
"lemma": "hic", | |
"token": "hoc" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "negozio", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "negotium", | |
"token": "negozio" | |
}, | |
{ | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "vel", | |
"token": "vel", | |
"pos": "c", | |
"form": "vel" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"case": "b", | |
"form": "primis", | |
"gender": "n", | |
"pos": "a", | |
"number": "p", | |
"lemma": "primus", | |
"token": "primis" | |
}, | |
{ | |
"token": " " | |
}, | |
{ | |
"mood": "i", | |
"form": "ducit", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "duco", | |
"token": "ducit", | |
"tense": "p", | |
"voice": "a" | |
}, | |
{ | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
}]]; | |
### convert to VAnn format ### | |
json_data = _.reduce json_data, (acc, sent) -> acc.concat [{splitter: 'sentence'}], sent | |
json_data = [{splitter: 'begin'}].concat(json_data, [{splitter: 'end'}]) | |
data = [] | |
for term in json_data | |
token = {text: term.token} | |
if term.lemma? | |
token.lemma = term.lemma | |
if term.pos? | |
token.pos = switch term.pos | |
when 'n' then 'noun' | |
when 'v' then 'verb' | |
when 't' then 'participle' | |
when 'a' then 'adjective' | |
when 'd' then 'adverb' | |
when 'c' then 'conjunction' | |
when 'r' then 'preposition' | |
when 'p' then 'pronoun' | |
when 'm' then 'numeral' | |
when 'i' then 'interjection' | |
when 'e' then 'exclamation' | |
else undefined | |
# skip 'other' parts of speech | |
if term.pos is 'u' | |
token.skip = true | |
# if term.entity? | |
# token.netype = entity_map[term.entity].type | |
if term.splitter? | |
token.splitter = term.splitter | |
token.skip = true | |
data.push token | |
# terms without lemma have to be skipped | |
if not term.lemma? | |
token.skip = true | |
elems = d3.select('#text').selectAll('span') | |
.data(data) | |
.enter().append('span') | |
elems.filter((d)->d.skip? and d.skip) | |
.html((d) -> d.text) | |
### splitters need some room ### | |
elems.filter((d)->d.splitter?) | |
.style | |
'padding-left': (d) -> if d.splitter in ['begin','end'] then '11px' else '22px' | |
rubys = elems.filter((d)->not d.skip? or not d.skip).append('ruby') | |
rubys.append('rb') | |
.html((d) -> d.text) # html is needed to support | |
### lemma ### | |
rubys.filter((d) -> d.lemma?).append('rt') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) | |
### store textual representations into data ### | |
elems.each (d) -> | |
d.elem = this | |
### VISUALIZATION ### | |
svg = d3.select('#annotations') | |
### SVG lemma | |
lemmas = svg.selectAll('.lemma') | |
.data(data.filter((d) -> d.lemma?)) | |
.enter().append('text') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) ### | |
### proper noun halo ### | |
proper_r = 9 | |
propers = svg.selectAll('.proper') | |
.data(data.filter((d) -> d.proper? and d.proper)) | |
.enter().append('circle') | |
.attr('class', 'proper') | |
.attr('r', proper_r) | |
propers.append('title') | |
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos) | |
### pos ### | |
poss = svg.selectAll('.pos') | |
.data(data.filter((d) -> d.pos?)) | |
.enter().append('use') | |
.attr('class', 'pos') | |
.attr('xlink:href', (d)->"#pos_#{d.pos}") | |
poss.append('title') | |
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos) | |
### named entity halo ### | |
nes = svg.selectAll('.ne') | |
.data(data.filter((d) -> d.netype?)) | |
.enter().append('circle') | |
.attr('class', 'ne') | |
### normal token underline ### | |
tokens = svg.selectAll('.token') | |
.data(data.filter((d) -> not d.skip)) | |
.enter().append('rect') | |
.attr('class', 'token') | |
### polarity underline (positive, negative or neutral) ### | |
polarities = svg.selectAll('.polarity') | |
.data(data.filter((d) -> d.polarity? and d.polarity in ['positive','negative','neutral'])) | |
.enter().append('path') | |
.attr('class', 'polarity') | |
polarities.append('title') | |
.text((d) -> "#{d.polarity} polarity") | |
### sentiment modifier underline (intensifier or weakener) ### | |
senmods = svg.selectAll('.senmod') | |
.data(data.filter((d) -> d.sentiment_modifier? and d.sentiment_modifier in ['intensifier','weakener'])) | |
.enter().append('path') | |
.attr('class', 'senmod') | |
senmods.append('title') | |
.text((d) -> d.sentiment_modifier) | |
### splitters ### | |
splitters = svg.selectAll('.splitter') | |
.data(data.filter((d) -> d.splitter?)) | |
.enter().append('path') | |
.attr('class', 'splitter') | |
### visualization parameters ### | |
gap = 0 # distance between token underlines | |
dist = 1 # distance between text and token underlines | |
th = 1 # thickness of token and polarity underlines | |
ldist = 10 # distance between token underlines and lemma baselines | |
pold = 22 | |
# pos symbol center (from underline bottom left corner) | |
pos_dx = 4 | |
pos_dy = 6 | |
# parameters that control the curvature of the polarity underline | |
xc = 2 | |
yc = 12 | |
neradius = 22 # radius of ne halos | |
necolor = d3.scale.ordinal() | |
.domain(['person','location','date','organization','misc']) | |
.range(['#00A777','#F58020','#999','#00A1CF','#E08566']) | |
# height and offset of splitters | |
hsplit = 58 | |
osplit = 6 | |
### redraw the annotations ### | |
redraw = () -> | |
### adpat the annotation svg to the text div ### | |
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect() | |
svg | |
.attr('width', new_svg_bbox.width) | |
.attr('height', new_svg_bbox.height) | |
### compute new bboxes ### | |
for d in data | |
d.bbox = d.elem.getBoundingClientRect() | |
d.bbox.width = d.bbox.right - d.bbox.left | |
d.bbox.height = d.bbox.bottom - d.bbox.top | |
tokens | |
.attr('x', (d) -> d.bbox.left+gap/2) | |
.attr('y', (d) -> d.bbox.bottom+dist) | |
.attr('width', (d) -> d.bbox.width-gap) | |
.attr('height', th) | |
### | |
lemmas | |
.attr('x', (d) -> d.bbox.left+d.bbox.width/2) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist)### | |
poss | |
.attr('x', (d) -> d.bbox.left+gap+pos_dx) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+pos_dy) | |
propers | |
.attr('cx', (d) -> d.bbox.left+gap+pos_dx) | |
.attr('cy', (d) -> d.bbox.bottom+dist+th+pos_dy) | |
polarities | |
.attr('d', (d) -> | |
x1 = d.bbox.left+gap/2 | |
x2 = d.bbox.right-gap/2 | |
y = d.bbox.bottom+pold+dist-2*yc/3 | |
y_eq = d.bbox.bottom+pold+dist-2*th | |
#y_eq2 = d.bbox.bottom+pold+dist+2 | |
if d.polarity is 'neutral' | |
return "M#{x1} #{y_eq} L#{x2} #{y_eq} L#{x2} #{y_eq+th} L#{x1} #{y_eq+th}" | |
else | |
return "M#{x1} #{y} C#{x1+xc} #{y+yc} #{x2-xc} #{y+yc} #{x2} #{y} L#{x2} #{y+th} C#{x2-xc} #{y+th+yc} #{x1+xc} #{y+th+yc} #{x1} #{y+th} z" | |
) | |
polarities.filter((d)->d.polarity is 'negative') | |
.attr('transform', (d)->"scale(1,-1) translate(0,#{-2*(d.bbox.bottom+pold+dist-2)})") | |
senmods | |
.attr('d', (d) -> | |
x1 = d.bbox.left+gap/2 | |
x2 = d.bbox.right-gap/2 | |
yl = d.bbox.bottom+pold+dist-2*th+2 | |
yh = yl-5 | |
if d.sentiment_modifier is 'intensifier' | |
return "M#{x1} #{yl} L#{x2} #{yh} L#{x2} #{yl} L#{x1} #{yl}" | |
else | |
return "M#{x1} #{yh} L#{x2} #{yl} L#{x2} #{yl} L#{x1} #{yl}" | |
) | |
nes | |
.attr('cx', (d) -> d.bbox.left + d.bbox.width/2) | |
.attr('cy', (d) -> d.bbox.top + d.bbox.height/2) | |
.attr('r', neradius) | |
.attr('fill', (d) -> necolor(d.netype)) | |
splitters | |
.attr | |
d: (d) -> | |
switch d.splitter | |
when 'sentence' then "M#{d.bbox.left+d.bbox.width/2} #{d.bbox.top+d.bbox.height/2-hsplit/2+osplit} l0 #{hsplit}" | |
when 'begin' then "M#{d.bbox.left+0.5} #{d.bbox.top+d.bbox.height/2-hsplit/2+osplit} l0 #{hsplit} l2 0 l6 0 l-6 0 l0 #{-hsplit} l12 0 z" | |
when 'end' then "M#{d.bbox.right-0.5} #{d.bbox.top+d.bbox.height/2-hsplit/2+osplit} l0 #{hsplit} l-2 0 l-12 0 l12 0 l0 #{-hsplit} l-6 0 z" | |
redraw() | |
window.onresize = redraw |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
html, body { | |
margin: 0; | |
padding: 0; | |
background: white; | |
} | |
#text { | |
position: absolute; | |
/* this is needed to have svg events work */ | |
pointer-events: none; | |
line-height: 4em; | |
font-family: Georgia; | |
font-size: 18px; | |
/*text-align: justify;*/ | |
/* padding is used to make sure the svg fits */ | |
padding: 12px; | |
} | |
#text > span { | |
padding-left: 1px; | |
padding-right: 1px; | |
} | |
rb { | |
/* this enables text selection */ | |
pointer-events: all; | |
padding-bottom: 2px; | |
} | |
rt { | |
padding-left: 16px; | |
padding-right: 16px; | |
} | |
#annotations { | |
position: absolute; | |
} | |
.token { | |
fill: #999; | |
} | |
.lemma { | |
font-size: 9px; | |
font-family: sans-serif; | |
text-anchor: middle; | |
color: #999; | |
text-align: center; | |
} | |
ruby { | |
ruby-position: after; | |
-webkit-ruby-position: after; | |
} | |
.pos, .polarity, .senmod { | |
fill: #2A9DC2; | |
} | |
.proper { | |
fill: #555; | |
} | |
.ne, .proper { | |
fill-opacity: 0.15; | |
} | |
.splitter { | |
fill: #999; | |
stroke: #999; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="description" content="Clavius - Latin text annotation visualization" /> | |
<title>Clavius - Latin text annotation visualization</title> | |
<link rel="stylesheet" href="index.css"> | |
<script src="http://d3js.org/d3.v3.min.js"></script> | |
<script src="http://cdnjs.cloudflare.com/ajax/libs/lodash.js/2.4.1/lodash.min.js"></script> | |
</head> | |
<body> | |
<svg id="annotations"> | |
<defs> | |
<path id="pos_noun" d="m -3,-3 6,0 0,6 -6,0 z"/> | |
<path id="pos_verb" d="M -3,-4 4,0 -3,4 z"/> | |
<path id="pos_adjective" d="m -3,-3 0,6 6,0 0,-6 z m 2,2 2,0 0,2 -2,0 z"/> | |
<path id="pos_adverb" d="M -3 -4 L -3 4 L 4 0 L -3 -4 z M -1.4375 -1.5 L 1.1875 0 L -1.4375 1.5 L -1.4375 -1.5 z"/> | |
<path id="pos_pronoun" d="M -3 -3 L -3 -1 L -1 -1 L -1 -3 L -3 -3 z M 1 -3 L 1 -1 L 3 -1 L 3 -3 L 1 -3 z M -3 1 L -3 3 L -1 3 L -1 1 L -3 1 z M 1 1 L 1 3 L 3 3 L 3 1 L 1 1 z"/> | |
<path id="pos_preposition" d="m -1,-6 0,5 2,0 2,0 0,-2 -2,0 0,-3 z"/> | |
<path id="pos_determiner" d="m -1,-6 0,5 2,0 0,-5 z"/> | |
<path id="pos_conjunction" d="m -1,-6 0,3 -2,0 0,2 2,0 0,2 2,0 0,-2 2,0 0,-2 -2,0 0,-3 z"/> | |
<path id="pos_participle" d="M 0 -4.25 L -4.25 0 L 0 4.25 L 4.25 0 L 0 -4.25 z M 0 -1.40625 L 1.40625 0 L 0 1.40625 L -1.40625 0 L 0 -1.40625 z"/> | |
<path id="pos_other" d="m -1,-6 0,2 2,0 0,-2 z"/> | |
</defs> | |
</svg> | |
<div id="text"></div> | |
<script src="index.js"></script> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
var data, dist, elems, gap, hsplit, json_data, ldist, necolor, neradius, nes, osplit, polarities, pold, pos_dx, pos_dy, poss, proper_r, propers, redraw, rubys, senmods, splitters, svg, term, th, token, tokens, xc, yc, _i, _len; | |
json_data = [ | |
[ | |
{ | |
"case": "n", | |
"form": "botvitus", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "botvitus", | |
"token": "Botvitus" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"form": "nericius", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "nericius", | |
"token": "Nericius" | |
}, { | |
"token": " " | |
}, { | |
"case": "d", | |
"form": "christophoro", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "christophorus", | |
"token": "Christophoro" | |
}, { | |
"token": " " | |
}, { | |
"case": "d", | |
"form": "clavio", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "clavius", | |
"token": "Clavio" | |
}, { | |
"token": " " | |
}, { | |
"case": "d", | |
"form": "s", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "suus", | |
"token": "S" | |
}, { | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
}, { | |
"token": "\n" | |
} | |
], [ | |
{ | |
"case": "d", | |
"form": "occupationibus", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "occupatio", | |
"token": "Occupationibus" | |
}, { | |
"token": " " | |
}, { | |
"case": "d", | |
"form": "tuis", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "tuus", | |
"token": "tuis" | |
}, { | |
"token": " " | |
}, { | |
"case": "v", | |
"degree": "s", | |
"form": "doctissime", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "doctus", | |
"token": "Doctissime" | |
}, { | |
"token": " " | |
}, { | |
"case": "v", | |
"form": "clavi", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "clavius", | |
"token": "Clavi" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"mood": "p", | |
"form": "parcens", | |
"gender": "m", | |
"pos": "t", | |
"number": "s", | |
"lemma": "parco", | |
"token": "parcens", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": "\n" | |
}, { | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "tertia", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "tertius", | |
"token": "tertia" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "mea", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "meus", | |
"token": "mea" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "ad", | |
"token": "ad", | |
"pos": "r", | |
"form": "ad" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "te", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "tu", | |
"token": "te" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "epistola", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "epistula", | |
"token": "epistola" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "praesertim", | |
"token": "praesertim", | |
"pos": "d", | |
"form": "praesertim" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "vero", | |
"token": "vero", | |
"pos": "d", | |
"form": "vero" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, { | |
"token": "\n" | |
}, { | |
"case": "b", | |
"form": "secunda", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "secundus", | |
"token": "secunda" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "illius", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "ille", | |
"token": "illius" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "parte", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "pars", | |
"token": "parte" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"form": "quae", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "qui", | |
"token": "quae" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "erat", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "erat", | |
"tense": "i", | |
"voice": "a" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "de", | |
"token": "de", | |
"pos": "r", | |
"form": "de" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "puncto", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "punctum", | |
"token": "puncto" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "dinostrati", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "dinostratus", | |
"token": "Dino\nstrati" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "ultimo", | |
"gender": "n", | |
"pos": "a", | |
"number": "s", | |
"lemma": "ultimus", | |
"token": "ultimo" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "abs", | |
"token": "abs", | |
"pos": "r", | |
"form": "abs" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "te", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "tu", | |
"token": "te" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"mood": "p", | |
"form": "invento", | |
"gender": "n", | |
"pos": "t", | |
"number": "s", | |
"lemma": "invenio", | |
"token": "invento", | |
"tense": "r", | |
"voice": "p" | |
}, { | |
"lemma": ";", | |
"token": ";", | |
"pos": "u", | |
"form": ";" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"degree": "c", | |
"form": "brevior", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "brevis", | |
"token": "brevior" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "fortasse", | |
"token": "fortasse", | |
"pos": "d", | |
"form": "fortasse" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "fui", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "fui", | |
"tense": "r", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": "\n" | |
}, { | |
"lemma": "quam", | |
"token": "quam", | |
"pos": "d", | |
"form": "quam" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "oportuit", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "oportet", | |
"token": "oportuit", | |
"tense": "r", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "ideo", | |
"token": "ideoque", | |
"pos": "d", | |
"form": "ideoque" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "hac", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "hic", | |
"token": "hac" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "scriptione", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "scriptio", | |
"token": "scriptione" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "rem", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "res", | |
"token": "rem" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "totam", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "totus", | |
"token": "totam" | |
}, { | |
"token": "\n" | |
}, { | |
"case": "b", | |
"form": "paullo", | |
"gender": "m", | |
"pos": "a", | |
"number": "s", | |
"lemma": "paulus", | |
"token": "paullo" | |
}, { | |
"token": " " | |
}, { | |
"degree": "c", | |
"lemma": "uber", | |
"token": "uberius", | |
"pos": "d", | |
"form": "uberius" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "si", | |
"token": "si", | |
"pos": "c", | |
"form": "si" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "potero", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "possum", | |
"token": "potero", | |
"tense": "f", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "explicabo", | |
"person": "1", | |
"number": "s", | |
"pos": "v", | |
"lemma": "explico", | |
"token": "explicabo", | |
"tense": "f", | |
"voice": "a" | |
}, { | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
}, { | |
"token": " " | |
} | |
], [ | |
{ | |
"case": "b", | |
"form": "principio", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "principium", | |
"token": "Principio" | |
}, { | |
"token": "\n" | |
}, { | |
"lemma": "igitur", | |
"token": "igitur", | |
"pos": "c", | |
"form": "igitur" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "conchoidis", | |
"gender": "f", | |
"pos": "a", | |
"number": "s", | |
"lemma": "conchoidis", | |
"token": "conchoidis" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"form": "accidens", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "accidens", | |
"token": "accidens" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"degree": "s", | |
"form": "certissimum", | |
"gender": "n", | |
"pos": "a", | |
"number": "s", | |
"lemma": "certus", | |
"token": "certissimum" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "est", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "sum", | |
"token": "est", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": "\n" | |
}, { | |
"lemma": "nimirum", | |
"token": "nimirum", | |
"pos": "d", | |
"form": "nimirum" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "si", | |
"token": "si", | |
"pos": "c", | |
"form": "si" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "per", | |
"token": "per", | |
"pos": "r", | |
"form": "per" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "lineam", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineam" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "dinostrati", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "dinostratus", | |
"token": "Dinostrati" | |
}, { | |
"token": " " | |
}, { | |
"mood": "s", | |
"form": "transeat", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "transeo", | |
"token": "transeat", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "eam", | |
"gender": "f", | |
"pos": "p", | |
"number": "s", | |
"lemma": "is", | |
"token": "eam" | |
}, { | |
"token": "\n" | |
}, { | |
"lemma": "per", | |
"token": "per", | |
"pos": "r", | |
"form": "per" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "lineas", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "linea", | |
"token": "lineas" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "rectas", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "rectus", | |
"token": "rectas" | |
}, { | |
"token": " " | |
}, { | |
"mood": "n", | |
"form": "dividere", | |
"pos": "v", | |
"lemma": "divido", | |
"token": "dividere", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "partes", | |
"gender": "f", | |
"pos": "n", | |
"number": "p", | |
"lemma": "pars", | |
"token": "partes" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "supra", | |
"token": "supra", | |
"pos": "d", | |
"form": "supra" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "infraque", | |
"token": "infraque", | |
"pos": "d", | |
"form": "infraque" | |
}, { | |
"token": "\n" | |
}, { | |
"case": "a", | |
"form": "aequales", | |
"gender": "f", | |
"pos": "a", | |
"number": "p", | |
"lemma": "aequalis", | |
"token": "aequales" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "propter", | |
"token": "propter", | |
"pos": "r", | |
"form": "propter" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "motum", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "motus", | |
"token": "motus" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"form": "lineae", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "linea", | |
"token": "lineae" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "a", | |
"token": "a", | |
"pos": "r", | |
"form": "a" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "summo", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "summum", | |
"token": "summo" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "velut", | |
"token": "velut", | |
"pos": "d", | |
"form": "velut" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "polo", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "polus", | |
"token": "polo" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": "\n" | |
}, { | |
"case": "a", | |
"form": "basim", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "basis", | |
"token": "basim" | |
}, { | |
"token": " " | |
}, { | |
"case": "g", | |
"mood": "p", | |
"form": "permeantis", | |
"gender": "f", | |
"pos": "t", | |
"number": "s", | |
"lemma": "permeo", | |
"token": "permeantis", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"lemma": ";", | |
"token": ";", | |
"pos": "u", | |
"form": ";" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"form": "qui", | |
"gender": "m", | |
"pos": "p", | |
"number": "s", | |
"lemma": "qui", | |
"token": "qui" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "quidem", | |
"token": "quidem", | |
"pos": "d", | |
"form": "quidem" | |
}, { | |
"token": " " | |
}, { | |
"case": "n", | |
"form": "motus", | |
"gender": "m", | |
"pos": "n", | |
"number": "s", | |
"lemma": "motus", | |
"token": "motus" | |
}, { | |
"token": " " | |
}, { | |
"case": "a", | |
"form": "familiam", | |
"gender": "f", | |
"pos": "n", | |
"number": "s", | |
"lemma": "familia", | |
"token": "familiam" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, { | |
"token": "\n" | |
}, { | |
"case": "b", | |
"form": "hoc", | |
"gender": "n", | |
"pos": "p", | |
"number": "s", | |
"lemma": "hic", | |
"token": "hoc" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "negozio", | |
"gender": "n", | |
"pos": "n", | |
"number": "s", | |
"lemma": "negotium", | |
"token": "negozio" | |
}, { | |
"lemma": ",", | |
"token": ",", | |
"pos": "u", | |
"form": "," | |
}, { | |
"token": " " | |
}, { | |
"lemma": "vel", | |
"token": "vel", | |
"pos": "c", | |
"form": "vel" | |
}, { | |
"token": " " | |
}, { | |
"lemma": "in", | |
"token": "in", | |
"pos": "r", | |
"form": "in" | |
}, { | |
"token": " " | |
}, { | |
"case": "b", | |
"form": "primis", | |
"gender": "n", | |
"pos": "a", | |
"number": "p", | |
"lemma": "primus", | |
"token": "primis" | |
}, { | |
"token": " " | |
}, { | |
"mood": "i", | |
"form": "ducit", | |
"person": "3", | |
"number": "s", | |
"pos": "v", | |
"lemma": "duco", | |
"token": "ducit", | |
"tense": "p", | |
"voice": "a" | |
}, { | |
"lemma": ".", | |
"token": ".", | |
"pos": "u", | |
"form": "." | |
} | |
] | |
]; | |
/* convert to VAnn format | |
*/ | |
json_data = _.reduce(json_data, function(acc, sent) { | |
return acc.concat([ | |
{ | |
splitter: 'sentence' | |
} | |
], sent); | |
}); | |
json_data = [ | |
{ | |
splitter: 'begin' | |
} | |
].concat(json_data, [ | |
{ | |
splitter: 'end' | |
} | |
]); | |
data = []; | |
for (_i = 0, _len = json_data.length; _i < _len; _i++) { | |
term = json_data[_i]; | |
token = { | |
text: term.token | |
}; | |
if (term.lemma != null) { | |
token.lemma = term.lemma; | |
} | |
if (term.pos != null) { | |
token.pos = (function() { | |
switch (term.pos) { | |
case 'n': | |
return 'noun'; | |
case 'v': | |
return 'verb'; | |
case 't': | |
return 'participle'; | |
case 'a': | |
return 'adjective'; | |
case 'd': | |
return 'adverb'; | |
case 'c': | |
return 'conjunction'; | |
case 'r': | |
return 'preposition'; | |
case 'p': | |
return 'pronoun'; | |
case 'm': | |
return 'numeral'; | |
case 'i': | |
return 'interjection'; | |
case 'e': | |
return 'exclamation'; | |
default: | |
return void 0; | |
} | |
})(); | |
if (term.pos === 'u') { | |
token.skip = true; | |
} | |
} | |
if (term.splitter != null) { | |
token.splitter = term.splitter; | |
token.skip = true; | |
} | |
data.push(token); | |
if (term.lemma == null) { | |
token.skip = true; | |
} | |
} | |
elems = d3.select('#text').selectAll('span').data(data).enter().append('span'); | |
elems.filter(function(d) { | |
return (d.skip != null) && d.skip; | |
}).html(function(d) { | |
return d.text; | |
}); | |
/* splitters need some room | |
*/ | |
elems.filter(function(d) { | |
return d.splitter != null; | |
}).style({ | |
'padding-left': function(d) { | |
var _ref; | |
if ((_ref = d.splitter) === 'begin' || _ref === 'end') { | |
return '11px'; | |
} else { | |
return '22px'; | |
} | |
} | |
}); | |
rubys = elems.filter(function(d) { | |
return (d.skip == null) || !d.skip; | |
}).append('ruby'); | |
rubys.append('rb').html(function(d) { | |
return d.text; | |
}); | |
/* lemma | |
*/ | |
rubys.filter(function(d) { | |
return d.lemma != null; | |
}).append('rt').attr('class', 'lemma').text(function(d) { | |
return d.lemma; | |
}); | |
/* store textual representations into data | |
*/ | |
elems.each(function(d) { | |
return d.elem = this; | |
}); | |
/* VISUALIZATION | |
*/ | |
svg = d3.select('#annotations'); | |
/* SVG lemma | |
lemmas = svg.selectAll('.lemma') | |
.data(data.filter((d) -> d.lemma?)) | |
.enter().append('text') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) | |
*/ | |
/* proper noun halo | |
*/ | |
proper_r = 9; | |
propers = svg.selectAll('.proper').data(data.filter(function(d) { | |
return (d.proper != null) && d.proper; | |
})).enter().append('circle').attr('class', 'proper').attr('r', proper_r); | |
propers.append('title').text(function(d) { | |
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos; | |
}); | |
/* pos | |
*/ | |
poss = svg.selectAll('.pos').data(data.filter(function(d) { | |
return d.pos != null; | |
})).enter().append('use').attr('class', 'pos').attr('xlink:href', function(d) { | |
return "#pos_" + d.pos; | |
}); | |
poss.append('title').text(function(d) { | |
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos; | |
}); | |
/* named entity halo | |
*/ | |
nes = svg.selectAll('.ne').data(data.filter(function(d) { | |
return d.netype != null; | |
})).enter().append('circle').attr('class', 'ne'); | |
/* normal token underline | |
*/ | |
tokens = svg.selectAll('.token').data(data.filter(function(d) { | |
return !d.skip; | |
})).enter().append('rect').attr('class', 'token'); | |
/* polarity underline (positive, negative or neutral) | |
*/ | |
polarities = svg.selectAll('.polarity').data(data.filter(function(d) { | |
var _ref; | |
return (d.polarity != null) && ((_ref = d.polarity) === 'positive' || _ref === 'negative' || _ref === 'neutral'); | |
})).enter().append('path').attr('class', 'polarity'); | |
polarities.append('title').text(function(d) { | |
return "" + d.polarity + " polarity"; | |
}); | |
/* sentiment modifier underline (intensifier or weakener) | |
*/ | |
senmods = svg.selectAll('.senmod').data(data.filter(function(d) { | |
var _ref; | |
return (d.sentiment_modifier != null) && ((_ref = d.sentiment_modifier) === 'intensifier' || _ref === 'weakener'); | |
})).enter().append('path').attr('class', 'senmod'); | |
senmods.append('title').text(function(d) { | |
return d.sentiment_modifier; | |
}); | |
/* splitters | |
*/ | |
splitters = svg.selectAll('.splitter').data(data.filter(function(d) { | |
return d.splitter != null; | |
})).enter().append('path').attr('class', 'splitter'); | |
/* visualization parameters | |
*/ | |
gap = 0; | |
dist = 1; | |
th = 1; | |
ldist = 10; | |
pold = 22; | |
pos_dx = 4; | |
pos_dy = 6; | |
xc = 2; | |
yc = 12; | |
neradius = 22; | |
necolor = d3.scale.ordinal().domain(['person', 'location', 'date', 'organization', 'misc']).range(['#00A777', '#F58020', '#999', '#00A1CF', '#E08566']); | |
hsplit = 58; | |
osplit = 6; | |
/* redraw the annotations | |
*/ | |
redraw = function() { | |
/* adpat the annotation svg to the text div | |
*/ | |
var d, new_svg_bbox, _j, _len1; | |
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect(); | |
svg.attr('width', new_svg_bbox.width).attr('height', new_svg_bbox.height); | |
/* compute new bboxes | |
*/ | |
for (_j = 0, _len1 = data.length; _j < _len1; _j++) { | |
d = data[_j]; | |
d.bbox = d.elem.getBoundingClientRect(); | |
d.bbox.width = d.bbox.right - d.bbox.left; | |
d.bbox.height = d.bbox.bottom - d.bbox.top; | |
} | |
tokens.attr('x', function(d) { | |
return d.bbox.left + gap / 2; | |
}).attr('y', function(d) { | |
return d.bbox.bottom + dist; | |
}).attr('width', function(d) { | |
return d.bbox.width - gap; | |
}).attr('height', th); | |
/* | |
lemmas | |
.attr('x', (d) -> d.bbox.left+d.bbox.width/2) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist) | |
*/ | |
poss.attr('x', function(d) { | |
return d.bbox.left + gap + pos_dx; | |
}).attr('y', function(d) { | |
return d.bbox.bottom + dist + th + pos_dy; | |
}); | |
propers.attr('cx', function(d) { | |
return d.bbox.left + gap + pos_dx; | |
}).attr('cy', function(d) { | |
return d.bbox.bottom + dist + th + pos_dy; | |
}); | |
polarities.attr('d', function(d) { | |
var x1, x2, y, y_eq; | |
x1 = d.bbox.left + gap / 2; | |
x2 = d.bbox.right - gap / 2; | |
y = d.bbox.bottom + pold + dist - 2 * yc / 3; | |
y_eq = d.bbox.bottom + pold + dist - 2 * th; | |
if (d.polarity === 'neutral') { | |
return "M" + x1 + " " + y_eq + " L" + x2 + " " + y_eq + " L" + x2 + " " + (y_eq + th) + " L" + x1 + " " + (y_eq + th); | |
} else { | |
return "M" + x1 + " " + y + " C" + (x1 + xc) + " " + (y + yc) + " " + (x2 - xc) + " " + (y + yc) + " " + x2 + " " + y + " L" + x2 + " " + (y + th) + " C" + (x2 - xc) + " " + (y + th + yc) + " " + (x1 + xc) + " " + (y + th + yc) + " " + x1 + " " + (y + th) + " z"; | |
} | |
}); | |
polarities.filter(function(d) { | |
return d.polarity === 'negative'; | |
}).attr('transform', function(d) { | |
return "scale(1,-1) translate(0," + (-2 * (d.bbox.bottom + pold + dist - 2)) + ")"; | |
}); | |
senmods.attr('d', function(d) { | |
var x1, x2, yh, yl; | |
x1 = d.bbox.left + gap / 2; | |
x2 = d.bbox.right - gap / 2; | |
yl = d.bbox.bottom + pold + dist - 2 * th + 2; | |
yh = yl - 5; | |
if (d.sentiment_modifier === 'intensifier') { | |
return "M" + x1 + " " + yl + " L" + x2 + " " + yh + " L" + x2 + " " + yl + " L" + x1 + " " + yl; | |
} else { | |
return "M" + x1 + " " + yh + " L" + x2 + " " + yl + " L" + x2 + " " + yl + " L" + x1 + " " + yl; | |
} | |
}); | |
nes.attr('cx', function(d) { | |
return d.bbox.left + d.bbox.width / 2; | |
}).attr('cy', function(d) { | |
return d.bbox.top + d.bbox.height / 2; | |
}).attr('r', neradius).attr('fill', function(d) { | |
return necolor(d.netype); | |
}); | |
return splitters.attr({ | |
d: function(d) { | |
switch (d.splitter) { | |
case 'sentence': | |
return "M" + (d.bbox.left + d.bbox.width / 2) + " " + (d.bbox.top + d.bbox.height / 2 - hsplit / 2 + osplit) + " l0 " + hsplit; | |
case 'begin': | |
return "M" + (d.bbox.left + 0.5) + " " + (d.bbox.top + d.bbox.height / 2 - hsplit / 2 + osplit) + " l0 " + hsplit + " l2 0 l6 0 l-6 0 l0 " + (-hsplit) + " l12 0 z"; | |
case 'end': | |
return "M" + (d.bbox.right - 0.5) + " " + (d.bbox.top + d.bbox.height / 2 - hsplit / 2 + osplit) + " l0 " + hsplit + " l-2 0 l-12 0 l12 0 l0 " + (-hsplit) + " l-6 0 z"; | |
} | |
} | |
}); | |
}; | |
redraw(); | |
window.onresize = redraw; | |
}).call(this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment