Skip to content

Instantly share code, notes, and snippets.

@erochest
Created February 3, 2012 16:43
Show Gist options
  • Save erochest/1731042 to your computer and use it in GitHub Desktop.
Save erochest/1731042 to your computer and use it in GitHub Desktop.
Playing around with a CoffeeScript console on a Prism document page.
# http://jashkenas.github.com/coffee-script/
# https://developer.mozilla.org/en/JavaScript/Reference/
# http://docs.jquery.com/Main_Page
# http://mbostock.github.com/d3/
# 1.
#
# Alert!
alert('Danger, Will Robinson!')
# 2.
#
# This highlights all the words that start with 'N'.
text = $('#text')
text.html(text.html().replace(
/\b[gk]?n\w*/gi,
'<span class="hi">$&</span>'
))
$('.hi').css(
background: '#ccc'
)
# 3.
#
# Let's tokenize everything.
getTextNodes = (el) ->
$(el)
.find('*')
.andSelf()
.contents()
.filter( -> this.nodeType == this.TEXT_NODE )
addTokenSpans = (text) ->
text.textContent.replace(
/\w+/g,
(input) -> "<span class='#{input.toLowerCase()} t'>#{ input }</span>"
)
texts = getTextNodes('#text')
texts.replaceWith( -> addTokenSpans(this) )
# Now look at the elements. Each token is in its own span, with a class of the
# word and 't'.
#
# Let's pretend we're Yossarian.
$('.the').css(
background: '#000'
)
$('.t').css(
background: '#000'
)
$('#text p:last-child')
.after('<p>I yearn for you tragically. A. T. Tappman, Chaplain, U.S. Army</p>')
# Now we can undo all that.
$('#text')
.find('p:last-child').remove().end()
.find('.t').css('background-color', 'transparent')
# 4.
#
# Finally, something a little more complicated. Let's do some data
# visualizations.
#
# Everything's still tokenized, so let's use that to get the frequencies of
# each token.
#
# This returns a JavaScript object with properties of the tokens and values of
# the frequencies. For the purposes of this, think of it as a Hash.
countTokens = (texts) ->
freq = {}
texts.each( ->
text = this.textContent.toLowerCase()
if freq[text]?
freq[text]++
else
freq[text] = 1
)
freq
# The visualization library that we're going to use needs the data formatted
# slightly differently, however. This handles the conversion.
#
# This takes JavaScript object associating tokens with frequencies (see
# countTokens) and returns an array full of JS objects, each with the
# properties `type` and `count`.
toData = (type, count) ->
{ type: type, count: count }
tokens = countTokens($('#text .t'))
data = (toData(type, count) for type, count of tokens when count > 1)
# The comparison function returns
#
# * -1 if a < b
# * 0 if a == b
# * +1 if a > b
#
# Subtracting two numbers also does that. (It doesn't care about the value of
# the number, just the sign.) Here, I've also reversed the two values from how
# they're passed in so that it sorts in reverse order.
data.sort( (a, b) -> b.count - a.count )
# Now let's add a place to hang the visualization. The visualization will be in
# SVG, so let's position an SVG element in a new DIV that's floated right at
# the top of the text.
# First, we need someplace to put the data. We'll create a new div and hang it
# on the right next to the text.
$('#text p:first')
.before('<div id="viz"></div>')
$('#viz').css(
float: 'right'
width: '450px'
)
# Second, we need to have something to scale the data.
scale = d3.scale
.linear()
.domain([0, d3.max(data, (d) -> d.count)])
.range(['10px', '450px'])
# Now we create the visualization. Run it, and then we can style through it.
viz = d3
.select('#viz')
.selectAll('div')
.data(data)
.enter()
.append('div')
.style('font', '10pt sans-serif')
.style('background-color', '#4682B4')
.style('text-align', 'right')
.style('padding', '3px')
.style('margin', '1px')
.style('color', 'white')
.style('width', (d) -> scale(d.count))
.text( (d) -> "#{d.type} (#{d.count})" )
# 5.
#
# Now let's take that data and create a histogrm of the frequencies.
#
# The first bit is copied from above, just to make sure we have access to the
# data.
countTokens = (texts) ->
freq = {}
texts.each( ->
text = this.textContent.toLowerCase()
if freq[text]?
freq[text]++
else
freq[text] = 1
)
freq
# The visualization library that we're going to use needs the data formatted
# slightly differently, however. This handles the conversion.
#
# This takes JavaScript object associating tokens with frequencies (see
# countTokens) and returns an array full of JS objects, each with the
# properties `type` and `count`.
toData = (type, count) ->
{ type: type, count: count }
tokens = countTokens($('#text .t'))
data = (toData(type, count) for type, count of tokens)
# Now to histogramming.
makeBuckets = (data) ->
buckets = {}
for datum in data
if buckets[datum.count]?
buckets[datum.count]++
else
buckets[datum.count] = 1
buckets
toHistData = (freq, count) ->
{ freq: freq, count: count }
buckets = makeBuckets(data)
histData = (toHistData(freq, count) for freq, count of buckets)
histData.sort( (a, b) -> a.freq - b.freq )
# We'll need a new scale, keyed off of the histograms.
scale = d3.scale
.linear()
.domain([0, d3.max(histData, (d) -> d.count)])
.range(['10px', '450px'])
# First, clear out the visualization space.
$('#viz').children().remove()
d3
.select('#viz')
.selectAll('div')
.data(histData)
.enter()
.append('div')
.style('font', '10pt sans-serif')
.style('background-color', 'steelblue')
.style('padding', '3px')
.style('margin', '1px')
.style('color', 'white')
.style('width', (d) -> scale(d.count))
.text( (d) -> d.freq.toString() )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment