Last active
August 29, 2015 14:02
-
-
Save kleem/c8bed17528993f1e0dba to your computer and use it in GitHub Desktop.
OpeNER - Text annotation visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example reviews | |
# 52a73a9fae9eef5a506472b9 "Ancienne cours..." | |
# 533eccf9ae9eef521e6292b1 "Nieuw Dakota..." | |
# 533eccf9ae9eef521e6294d3 "La peor discoteca..." | |
d3.json "http://tour-pedia.org/api/getReviewDetails?id=533eccf9ae9eef521e6292b1", (kaf_data) -> | |
# console.log kaf_data | |
### convert to VAnn format ### | |
# entity_map = objectify kaf_data.analysis.json.entities | |
data = [] | |
for tid, term of kaf_data.analysis.json.terms | |
token = {text: term.text} | |
if term.lemma? | |
token.lemma = term.lemma | |
if term.pos? | |
token.pos = switch term.pos | |
when 'N' then 'noun' | |
when 'R' then 'noun' # proper noun! | |
when 'V' then 'verb' | |
when 'G' then 'adjective' | |
when 'A' then 'adverb' | |
when 'Q' then 'pronoun' # check | |
when 'P' then 'preposition' | |
when 'D' then 'determiner' | |
when 'C' then 'conjunction' | |
# when 'O' then 'other' | |
else undefined | |
# skip 'other' parts of speech | |
if term.pos is 'O' | |
token.skip = true | |
if term.sentiment? | |
if term.sentiment.polarity? | |
token.polarity = term.sentiment.polarity | |
else if term.sentiment.sentiment_modifier? | |
token.sentiment_modifier = term.sentiment.sentiment_modifier | |
# if term.entity? | |
# token.netype = entity_map[term.entity].type | |
if term.pos? and term.pos is 'R' | |
# proper noun | |
token.proper = true | |
data.push token | |
data.push {text: ' ', skip: true} | |
elems = d3.select('#text').selectAll('span') | |
.data(data) | |
.enter().append('span') | |
elems.filter((d)->d.skip? and d.skip) | |
.html((d) -> d.text) | |
rubys = elems.filter((d)->not d.skip? or not d.skip).append('ruby') | |
rubys.append('rb') | |
.html((d) -> d.text) # html is needed to support | |
### lemma ### | |
rubys.filter((d) -> d.lemma?).append('rt') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) | |
### store textual representations into data ### | |
elems.each (d) -> | |
d.elem = this | |
### VISUALIZATION ### | |
svg = d3.select('#annotations') | |
### SVG lemma | |
lemmas = svg.selectAll('.lemma') | |
.data(data.filter((d) -> d.lemma?)) | |
.enter().append('text') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) ### | |
### proper noun halo ### | |
proper_r = 9 | |
propers = svg.selectAll('.proper') | |
.data(data.filter((d) -> d.proper? and d.proper)) | |
.enter().append('circle') | |
.attr('class', 'proper') | |
.attr('r', proper_r) | |
propers.append('title') | |
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos) | |
### pos ### | |
poss = svg.selectAll('.pos') | |
.data(data.filter((d) -> d.pos?)) | |
.enter().append('use') | |
.attr('class', 'pos') | |
.attr('xlink:href', (d)->"#pos_#{d.pos}") | |
poss.append('title') | |
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos) | |
### named entity halo ### | |
nes = svg.selectAll('.ne') | |
.data(data.filter((d) -> d.netype?)) | |
.enter().append('circle') | |
.attr('class', 'ne') | |
### normal token underline ### | |
tokens = svg.selectAll('.token') | |
.data(data.filter((d) -> not d.skip)) | |
.enter().append('rect') | |
.attr('class', 'token') | |
### polarity underline (positive, negative or neutral) ### | |
polarities = svg.selectAll('.polarity') | |
.data(data.filter((d) -> d.polarity? and d.polarity in ['positive','negative','neutral'])) | |
.enter().append('path') | |
.attr('class', 'polarity') | |
polarities.append('title') | |
.text((d) -> "#{d.polarity} polarity") | |
### sentiment modifier underline (intensifier or weakener) ### | |
senmods = svg.selectAll('.senmod') | |
.data(data.filter((d) -> d.sentiment_modifier? and d.sentiment_modifier in ['intensifier','weakener'])) | |
.enter().append('path') | |
.attr('class', 'senmod') | |
senmods.append('title') | |
.text((d) -> d.sentiment_modifier) | |
### visualization parameters ### | |
gap = 0 # distance between token underlines | |
dist = 1 # distance between text and token underlines | |
th = 1 # thickness of token and polarity underlines | |
ldist = 10 # distance between token underlines and lemma baselines | |
pold = 22 | |
# pos symbol center (from underline bottom left corner) | |
pos_dx = 4 | |
pos_dy = 6 | |
# parameters that control the curvature of the polarity underline | |
xc = 2 | |
yc = 12 | |
neradius = 22 # radius of ne halos | |
necolor = d3.scale.ordinal() | |
.domain(['person','location','date','organization','misc']) | |
.range(['#00A777','#F58020','#999','#00A1CF','#E08566']) | |
### redraw the annotations ### | |
redraw = () -> | |
### adpat the annotation svg to the text div ### | |
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect() | |
svg | |
.attr('width', new_svg_bbox.width) | |
.attr('height', new_svg_bbox.height) | |
### compute new bboxes ### | |
for d in data | |
d.bbox = d.elem.getBoundingClientRect() | |
d.bbox.width = d.bbox.right - d.bbox.left | |
d.bbox.height = d.bbox.bottom - d.bbox.top | |
tokens | |
.attr('x', (d) -> d.bbox.left+gap/2) | |
.attr('y', (d) -> d.bbox.bottom+dist) | |
.attr('width', (d) -> d.bbox.width-gap) | |
.attr('height', th) | |
### | |
lemmas | |
.attr('x', (d) -> d.bbox.left+d.bbox.width/2) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist)### | |
poss | |
.attr('x', (d) -> d.bbox.left+gap+pos_dx) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+pos_dy) | |
propers | |
.attr('cx', (d) -> d.bbox.left+gap+pos_dx) | |
.attr('cy', (d) -> d.bbox.bottom+dist+th+pos_dy) | |
polarities | |
.attr('d', (d) -> | |
x1 = d.bbox.left+gap/2 | |
x2 = d.bbox.right-gap/2 | |
y = d.bbox.bottom+pold+dist-2*yc/3 | |
y_eq = d.bbox.bottom+pold+dist-2*th | |
#y_eq2 = d.bbox.bottom+pold+dist+2 | |
if d.polarity is 'neutral' | |
return "M#{x1} #{y_eq} L#{x2} #{y_eq} L#{x2} #{y_eq+th} L#{x1} #{y_eq+th}" | |
else | |
return "M#{x1} #{y} C#{x1+xc} #{y+yc} #{x2-xc} #{y+yc} #{x2} #{y} L#{x2} #{y+th} C#{x2-xc} #{y+th+yc} #{x1+xc} #{y+th+yc} #{x1} #{y+th} z" | |
) | |
polarities.filter((d)->d.polarity is 'negative') | |
.attr('transform', (d)->"scale(1,-1) translate(0,#{-2*(d.bbox.bottom+pold+dist-2)})") | |
senmods | |
.attr('d', (d) -> | |
x1 = d.bbox.left+gap/2 | |
x2 = d.bbox.right-gap/2 | |
yl = d.bbox.bottom+pold+dist-2*th+2 | |
yh = yl-5 | |
if d.sentiment_modifier is 'intensifier' | |
return "M#{x1} #{yl} L#{x2} #{yh} L#{x2} #{yl} L#{x1} #{yl}" | |
else | |
return "M#{x1} #{yh} L#{x2} #{yl} L#{x2} #{yl} L#{x1} #{yl}" | |
) | |
nes | |
.attr('cx', (d) -> d.bbox.left + d.bbox.width/2) | |
.attr('cy', (d) -> d.bbox.top + d.bbox.height/2) | |
.attr('r', neradius) | |
.attr('fill', (d) -> necolor(d.netype)) | |
redraw() | |
window.onresize = redraw | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
html, body { | |
margin: 0; | |
padding: 0; | |
background: white; | |
} | |
#text { | |
position: absolute; | |
/* this is needed to have svg events work */ | |
pointer-events: none; | |
line-height: 4em; | |
font-family: Georgia; | |
font-size: 18px; | |
/*text-align: justify;*/ | |
/* padding is used to make sure the svg fits */ | |
padding: 12px; | |
} | |
#text > span { | |
padding-left: 1px; | |
padding-right: 1px; | |
} | |
rb { | |
/* this enables text selection */ | |
pointer-events: all; | |
padding-bottom: 2px; | |
} | |
rt { | |
padding-left: 16px; | |
padding-right: 16px; | |
} | |
#annotations { | |
position: absolute; | |
} | |
.token { | |
fill: #999; | |
} | |
.lemma { | |
font-size: 9px; | |
font-family: sans-serif; | |
text-anchor: middle; | |
color: #999; | |
text-align: center; | |
} | |
ruby { | |
ruby-position: after; | |
-webkit-ruby-position: after; | |
} | |
.pos, .polarity, .senmod { | |
fill: #2A9DC2; | |
} | |
.proper { | |
fill: #555; | |
} | |
.ne, .proper { | |
fill-opacity: 0.15; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="description" content="OpeNER - Text annotation visualization" /> | |
<title>OpeNER - Text annotation visualization</title> | |
<link rel="stylesheet" href="index.css"> | |
<script src="http://d3js.org/d3.v3.min.js"></script> | |
<script src="http://d3js.org/queue.v1.min.js"></script> | |
</head> | |
<body> | |
<svg id="annotations"> | |
<defs> | |
<path id="pos_noun" d="m -3,-3 6,0 0,6 -6,0 z"/> | |
<path id="pos_verb" d="M -3,-4 4,0 -3,4 z"/> | |
<path id="pos_adjective" d="m -3,-3 0,6 6,0 0,-6 z m 2,2 2,0 0,2 -2,0 z"/> | |
<path id="pos_adverb" d="M -3 -4 L -3 4 L 4 0 L -3 -4 z M -1.4375 -1.5 L 1.1875 0 L -1.4375 1.5 L -1.4375 -1.5 z"/> | |
<path id="pos_pronoun" d="M -3 -3 L -3 -1 L -1 -1 L -1 -3 L -3 -3 z M 1 -3 L 1 -1 L 3 -1 L 3 -3 L 1 -3 z M -3 1 L -3 3 L -1 3 L -1 1 L -3 1 z M 1 1 L 1 3 L 3 3 L 3 1 L 1 1 z"/> | |
<path id="pos_preposition" d="m -1,-6 0,5 2,0 2,0 0,-2 -2,0 0,-3 z"/> | |
<path id="pos_determiner" d="m -1,-6 0,5 2,0 0,-5 z"/> | |
<path id="pos_conjunction" d="m -1,-6 0,3 -2,0 0,2 2,0 0,2 2,0 0,-2 2,0 0,-2 -2,0 0,-3 z"/> | |
<path id="pos_other" d="m -1,-6 0,2 2,0 0,-2 z"/> | |
</defs> | |
</svg> | |
<div id="text"></div> | |
<script src="index.js"></script> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
d3.json("http://tour-pedia.org/api/getReviewDetails?id=533eccf9ae9eef521e6292b1", function(kaf_data) { | |
/* convert to VAnn format | |
*/ | |
var data, dist, elems, gap, ldist, necolor, neradius, nes, polarities, pold, pos_dx, pos_dy, poss, proper_r, propers, redraw, rubys, senmods, svg, term, th, tid, token, tokens, xc, yc, _ref; | |
data = []; | |
_ref = kaf_data.analysis.json.terms; | |
for (tid in _ref) { | |
term = _ref[tid]; | |
token = { | |
text: term.text | |
}; | |
if (term.lemma != null) { | |
token.lemma = term.lemma; | |
} | |
if (term.pos != null) { | |
token.pos = (function() { | |
switch (term.pos) { | |
case 'N': | |
return 'noun'; | |
case 'R': | |
return 'noun'; | |
case 'V': | |
return 'verb'; | |
case 'G': | |
return 'adjective'; | |
case 'A': | |
return 'adverb'; | |
case 'Q': | |
return 'pronoun'; | |
case 'P': | |
return 'preposition'; | |
case 'D': | |
return 'determiner'; | |
case 'C': | |
return 'conjunction'; | |
default: | |
return void 0; | |
} | |
})(); | |
if (term.pos === 'O') { | |
token.skip = true; | |
} | |
} | |
if (term.sentiment != null) { | |
if (term.sentiment.polarity != null) { | |
token.polarity = term.sentiment.polarity; | |
} else if (term.sentiment.sentiment_modifier != null) { | |
token.sentiment_modifier = term.sentiment.sentiment_modifier; | |
} | |
} | |
if ((term.pos != null) && term.pos === 'R') { | |
token.proper = true; | |
} | |
data.push(token); | |
data.push({ | |
text: ' ', | |
skip: true | |
}); | |
} | |
elems = d3.select('#text').selectAll('span').data(data).enter().append('span'); | |
elems.filter(function(d) { | |
return (d.skip != null) && d.skip; | |
}).html(function(d) { | |
return d.text; | |
}); | |
rubys = elems.filter(function(d) { | |
return (d.skip == null) || !d.skip; | |
}).append('ruby'); | |
rubys.append('rb').html(function(d) { | |
return d.text; | |
}); | |
/* lemma | |
*/ | |
rubys.filter(function(d) { | |
return d.lemma != null; | |
}).append('rt').attr('class', 'lemma').text(function(d) { | |
return d.lemma; | |
}); | |
/* store textual representations into data | |
*/ | |
elems.each(function(d) { | |
return d.elem = this; | |
}); | |
/* VISUALIZATION | |
*/ | |
svg = d3.select('#annotations'); | |
/* SVG lemma | |
lemmas = svg.selectAll('.lemma') | |
.data(data.filter((d) -> d.lemma?)) | |
.enter().append('text') | |
.attr('class', 'lemma') | |
.text((d) -> d.lemma) | |
*/ | |
/* proper noun halo | |
*/ | |
proper_r = 9; | |
propers = svg.selectAll('.proper').data(data.filter(function(d) { | |
return (d.proper != null) && d.proper; | |
})).enter().append('circle').attr('class', 'proper').attr('r', proper_r); | |
propers.append('title').text(function(d) { | |
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos; | |
}); | |
/* pos | |
*/ | |
poss = svg.selectAll('.pos').data(data.filter(function(d) { | |
return d.pos != null; | |
})).enter().append('use').attr('class', 'pos').attr('xlink:href', function(d) { | |
return "#pos_" + d.pos; | |
}); | |
poss.append('title').text(function(d) { | |
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos; | |
}); | |
/* named entity halo | |
*/ | |
nes = svg.selectAll('.ne').data(data.filter(function(d) { | |
return d.netype != null; | |
})).enter().append('circle').attr('class', 'ne'); | |
/* normal token underline | |
*/ | |
tokens = svg.selectAll('.token').data(data.filter(function(d) { | |
return !d.skip; | |
})).enter().append('rect').attr('class', 'token'); | |
/* polarity underline (positive, negative or neutral) | |
*/ | |
polarities = svg.selectAll('.polarity').data(data.filter(function(d) { | |
var _ref1; | |
return (d.polarity != null) && ((_ref1 = d.polarity) === 'positive' || _ref1 === 'negative' || _ref1 === 'neutral'); | |
})).enter().append('path').attr('class', 'polarity'); | |
polarities.append('title').text(function(d) { | |
return "" + d.polarity + " polarity"; | |
}); | |
/* sentiment modifier underline (intensifier or weakener) | |
*/ | |
senmods = svg.selectAll('.senmod').data(data.filter(function(d) { | |
var _ref1; | |
return (d.sentiment_modifier != null) && ((_ref1 = d.sentiment_modifier) === 'intensifier' || _ref1 === 'weakener'); | |
})).enter().append('path').attr('class', 'senmod'); | |
senmods.append('title').text(function(d) { | |
return d.sentiment_modifier; | |
}); | |
/* visualization parameters | |
*/ | |
gap = 0; | |
dist = 1; | |
th = 1; | |
ldist = 10; | |
pold = 22; | |
pos_dx = 4; | |
pos_dy = 6; | |
xc = 2; | |
yc = 12; | |
neradius = 22; | |
necolor = d3.scale.ordinal().domain(['person', 'location', 'date', 'organization', 'misc']).range(['#00A777', '#F58020', '#999', '#00A1CF', '#E08566']); | |
/* redraw the annotations | |
*/ | |
redraw = function() { | |
/* adpat the annotation svg to the text div | |
*/ | |
var d, new_svg_bbox, _i, _len; | |
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect(); | |
svg.attr('width', new_svg_bbox.width).attr('height', new_svg_bbox.height); | |
/* compute new bboxes | |
*/ | |
for (_i = 0, _len = data.length; _i < _len; _i++) { | |
d = data[_i]; | |
d.bbox = d.elem.getBoundingClientRect(); | |
d.bbox.width = d.bbox.right - d.bbox.left; | |
d.bbox.height = d.bbox.bottom - d.bbox.top; | |
} | |
tokens.attr('x', function(d) { | |
return d.bbox.left + gap / 2; | |
}).attr('y', function(d) { | |
return d.bbox.bottom + dist; | |
}).attr('width', function(d) { | |
return d.bbox.width - gap; | |
}).attr('height', th); | |
/* | |
lemmas | |
.attr('x', (d) -> d.bbox.left+d.bbox.width/2) | |
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist) | |
*/ | |
poss.attr('x', function(d) { | |
return d.bbox.left + gap + pos_dx; | |
}).attr('y', function(d) { | |
return d.bbox.bottom + dist + th + pos_dy; | |
}); | |
propers.attr('cx', function(d) { | |
return d.bbox.left + gap + pos_dx; | |
}).attr('cy', function(d) { | |
return d.bbox.bottom + dist + th + pos_dy; | |
}); | |
polarities.attr('d', function(d) { | |
var x1, x2, y, y_eq; | |
x1 = d.bbox.left + gap / 2; | |
x2 = d.bbox.right - gap / 2; | |
y = d.bbox.bottom + pold + dist - 2 * yc / 3; | |
y_eq = d.bbox.bottom + pold + dist - 2 * th; | |
if (d.polarity === 'neutral') { | |
return "M" + x1 + " " + y_eq + " L" + x2 + " " + y_eq + " L" + x2 + " " + (y_eq + th) + " L" + x1 + " " + (y_eq + th); | |
} else { | |
return "M" + x1 + " " + y + " C" + (x1 + xc) + " " + (y + yc) + " " + (x2 - xc) + " " + (y + yc) + " " + x2 + " " + y + " L" + x2 + " " + (y + th) + " C" + (x2 - xc) + " " + (y + th + yc) + " " + (x1 + xc) + " " + (y + th + yc) + " " + x1 + " " + (y + th) + " z"; | |
} | |
}); | |
polarities.filter(function(d) { | |
return d.polarity === 'negative'; | |
}).attr('transform', function(d) { | |
return "scale(1,-1) translate(0," + (-2 * (d.bbox.bottom + pold + dist - 2)) + ")"; | |
}); | |
senmods.attr('d', function(d) { | |
var x1, x2, yh, yl; | |
x1 = d.bbox.left + gap / 2; | |
x2 = d.bbox.right - gap / 2; | |
yl = d.bbox.bottom + pold + dist - 2 * th + 2; | |
yh = yl - 5; | |
if (d.sentiment_modifier === 'intensifier') { | |
return "M" + x1 + " " + yl + " L" + x2 + " " + yh + " L" + x2 + " " + yl + " L" + x1 + " " + yl; | |
} else { | |
return "M" + x1 + " " + yh + " L" + x2 + " " + yl + " L" + x2 + " " + yl + " L" + x1 + " " + yl; | |
} | |
}); | |
return nes.attr('cx', function(d) { | |
return d.bbox.left + d.bbox.width / 2; | |
}).attr('cy', function(d) { | |
return d.bbox.top + d.bbox.height / 2; | |
}).attr('r', neradius).attr('fill', function(d) { | |
return necolor(d.netype); | |
}); | |
}; | |
redraw(); | |
return window.onresize = redraw; | |
}); | |
}).call(this); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This experiment needs a readme! :D