erochest · February 3, 2012 16:43
diff --git a/scratch.coffee b/scratch.coffee
 # http://jashkenas.github.com/coffee-script/
 # https://developer.mozilla.org/en/JavaScript/Reference/
 # http://docs.jquery.com/Main_Page
 # http://mbostock.github.com/d3/
 # 1.
 #
 # Alert!
 alert('Danger, Will Robinson!')

 # 2.
 #
 # This highlights all the words that start with 'N'.

 text = $('#text')
 text.html(text.html().replace(
    /\b[gk]?n\w*/gi,
    '<span class="hi">$&</span>'
 ))

 $('.hi').css(
  background: '#ccc'
 )

 # 3.
 #
 # Let's tokenize everything.

 getTextNodes = (el) ->
  $(el)
    .find('*')
    .andSelf()
    .contents()
    .filter( -> this.nodeType == this.TEXT_NODE )

 addTokenSpans = (text) ->
  text.textContent.replace(
    /\w+/g,
    (input) -> "<span class='#{input.toLowerCase()} t'>#{ input }</span>"
  )

 texts = getTextNodes('#text')
 texts.replaceWith( -> addTokenSpans(this) )

 # Now look at the elements. Each token is in its own span, with a class of the
 # word and 't'.
 #
 # Let's pretend we're Yossarian.

 $('.the').css(
  background: '#000'
 )

 $('.t').css(
  background: '#000'
 )
 $('#text p:last-child')
  .after('<p>I yearn for you tragically. A. T. Tappman, Chaplain, U.S. Army</p>')

 # Now we can undo all that.

 $('#text')
  .find('p:last-child').remove().end()
  .find('.t').css('background-color', 'transparent')

 # 4.
 #
 # Finally, something a little more complicated. Let's do some data
 # visualizations.
 #
 # Everything's still tokenized, so let's use that to get the frequencies of
 # each token.
 #
 # This returns a JavaScript object with properties of the tokens and values of
 # the frequencies. For the purposes of this, think of it as a Hash.

 countTokens = (texts) ->
  freq = {}
  texts.each( ->
    text = this.textContent.toLowerCase()
    if freq[text]?
      freq[text]++
    else
      freq[text] = 1
  )
  freq

 # The visualization library that we're going to use needs the data formatted
 # slightly differently, however. This handles the conversion.
 #
 # This takes JavaScript object associating tokens with frequencies (see
 # countTokens) and returns an array full of JS objects, each with the
 # properties `type` and `count`.

 toData = (type, count) ->
  { type: type, count: count }

 tokens = countTokens($('#text .t'))
 data = (toData(type, count) for type, count of tokens when count > 1)

 # The comparison function returns
 #
 # * -1 if a < b
 # *  0 if a == b
 # * +1 if a > b
 #
 # Subtracting two numbers also does that. (It doesn't care about the value of
 # the number, just the sign.) Here, I've also reversed the two values from how
 # they're passed in so that it sorts in reverse order.
 data.sort( (a, b) -> b.count - a.count )

 # Now let's add a place to hang the visualization. The visualization will be in
 # SVG, so let's position an SVG element in a new DIV that's floated right at
 # the top of the text.

 # First, we need someplace to put the data. We'll create a new div and hang it
 # on the right next to the text.

 $('#text p:first')
  .before('<div id="viz"></div>')
 $('#viz').css(
    float: 'right'
    width: '450px'
  )

 # Second, we need to have something to scale the data.

 scale = d3.scale
  .linear()
  .domain([0, d3.max(data, (d) -> d.count)])
  .range(['10px', '450px'])

 # Now we create the visualization. Run it, and then we can style through it.

 viz = d3
  .select('#viz')
  .selectAll('div')
  .data(data)
  .enter()
  .append('div')
  .style('font', '10pt sans-serif')
  .style('background-color', '#4682B4')
  .style('text-align', 'right')
  .style('padding', '3px')
  .style('margin', '1px')
  .style('color', 'white')
  .style('width', (d) -> scale(d.count))
  .text( (d) -> "#{d.type} (#{d.count})" )

 # 5.
 #
 # Now let's take that data and create a histogrm of the frequencies.
 #
 # The first bit is copied from above, just to make sure we have access to the
 # data.

 countTokens = (texts) ->
  freq = {}
  texts.each( ->
    text = this.textContent.toLowerCase()
    if freq[text]?
      freq[text]++
    else
      freq[text] = 1
  )
  freq

 # The visualization library that we're going to use needs the data formatted
 # slightly differently, however. This handles the conversion.
 #
 # This takes JavaScript object associating tokens with frequencies (see
 # countTokens) and returns an array full of JS objects, each with the
 # properties `type` and `count`.

 toData = (type, count) ->
  { type: type, count: count }

 tokens = countTokens($('#text .t'))
 data = (toData(type, count) for type, count of tokens)

 # Now to histogramming.

 makeBuckets = (data) ->
  buckets = {}
  for datum in data
    if buckets[datum.count]?
      buckets[datum.count]++
    else
      buckets[datum.count] = 1
  buckets

 toHistData = (freq, count) ->
  { freq: freq, count: count }

 buckets = makeBuckets(data)
 histData = (toHistData(freq, count) for freq, count of buckets)
 histData.sort( (a, b) -> a.freq - b.freq )

 # We'll need a new scale, keyed off of the histograms.
 scale = d3.scale
  .linear()
  .domain([0, d3.max(histData, (d) -> d.count)])
  .range(['10px', '450px'])

 # First, clear out the visualization space.
 $('#viz').children().remove()
 d3
  .select('#viz')
  .selectAll('div')
  .data(histData)
  .enter()
  .append('div')
  .style('font', '10pt sans-serif')
  .style('background-color', 'steelblue')
  .style('padding', '3px')
  .style('margin', '1px')
  .style('color', 'white')
  .style('width', (d) -> scale(d.count))
  .text( (d) -> d.freq.toString() )
	# http://jashkenas.github.com/coffee-script/
	# https://developer.mozilla.org/en/JavaScript/Reference/
	# http://docs.jquery.com/Main_Page
	# http://mbostock.github.com/d3/
	# 1.
	#
	# Alert!
	alert('Danger, Will Robinson!')

	# 2.
	#
	# This highlights all the words that start with 'N'.

	text = $('#text')
	text.html(text.html().replace(
	/\b[gk]?n\w*/gi,
	'<span class="hi">$&</span>'
	))

	$('.hi').css(
	background: '#ccc'
	)

	# 3.
	#
	# Let's tokenize everything.

	getTextNodes = (el) ->
	$(el)
	.find('*')
	.andSelf()
	.contents()
	.filter( -> this.nodeType == this.TEXT_NODE )

	addTokenSpans = (text) ->
	text.textContent.replace(
	/\w+/g,
	(input) -> "<span class='#{input.toLowerCase()} t'>#{ input }</span>"
	)

	texts = getTextNodes('#text')
	texts.replaceWith( -> addTokenSpans(this) )

	# Now look at the elements. Each token is in its own span, with a class of the
	# word and 't'.
	#
	# Let's pretend we're Yossarian.

	$('.the').css(
	background: '#000'
	)

	$('.t').css(
	background: '#000'
	)
	$('#text p:last-child')
	.after('<p>I yearn for you tragically. A. T. Tappman, Chaplain, U.S. Army</p>')

	# Now we can undo all that.

	$('#text')
	.find('p:last-child').remove().end()
	.find('.t').css('background-color', 'transparent')

	# 4.
	#
	# Finally, something a little more complicated. Let's do some data
	# visualizations.
	#
	# Everything's still tokenized, so let's use that to get the frequencies of
	# each token.
	#
	# This returns a JavaScript object with properties of the tokens and values of
	# the frequencies. For the purposes of this, think of it as a Hash.

	countTokens = (texts) ->
	freq = {}
	texts.each( ->
	text = this.textContent.toLowerCase()
	if freq[text]?
	freq[text]++
	else
	freq[text] = 1
	)
	freq

	# The visualization library that we're going to use needs the data formatted
	# slightly differently, however. This handles the conversion.
	#
	# This takes JavaScript object associating tokens with frequencies (see
	# countTokens) and returns an array full of JS objects, each with the
	# properties `type` and `count`.

	toData = (type, count) ->
	{ type: type, count: count }

	tokens = countTokens($('#text .t'))
	data = (toData(type, count) for type, count of tokens when count > 1)

	# The comparison function returns
	#
	# * -1 if a < b
	# * 0 if a == b
	# * +1 if a > b
	#
	# Subtracting two numbers also does that. (It doesn't care about the value of
	# the number, just the sign.) Here, I've also reversed the two values from how
	# they're passed in so that it sorts in reverse order.
	data.sort( (a, b) -> b.count - a.count )

	# Now let's add a place to hang the visualization. The visualization will be in
	# SVG, so let's position an SVG element in a new DIV that's floated right at
	# the top of the text.

	# First, we need someplace to put the data. We'll create a new div and hang it
	# on the right next to the text.

	$('#text p:first')
	.before('<div id="viz"></div>')
	$('#viz').css(
	float: 'right'
	width: '450px'
	)

	# Second, we need to have something to scale the data.

	scale = d3.scale
	.linear()
	.domain([0, d3.max(data, (d) -> d.count)])
	.range(['10px', '450px'])

	# Now we create the visualization. Run it, and then we can style through it.

	viz = d3
	.select('#viz')
	.selectAll('div')
	.data(data)
	.enter()
	.append('div')
	.style('font', '10pt sans-serif')
	.style('background-color', '#4682B4')
	.style('text-align', 'right')
	.style('padding', '3px')
	.style('margin', '1px')
	.style('color', 'white')
	.style('width', (d) -> scale(d.count))
	.text( (d) -> "#{d.type} (#{d.count})" )

	# 5.
	#
	# Now let's take that data and create a histogrm of the frequencies.
	#
	# The first bit is copied from above, just to make sure we have access to the
	# data.

	countTokens = (texts) ->
	freq = {}
	texts.each( ->
	text = this.textContent.toLowerCase()
	if freq[text]?
	freq[text]++
	else
	freq[text] = 1
	)
	freq

	# The visualization library that we're going to use needs the data formatted
	# slightly differently, however. This handles the conversion.
	#
	# This takes JavaScript object associating tokens with frequencies (see
	# countTokens) and returns an array full of JS objects, each with the
	# properties `type` and `count`.

	toData = (type, count) ->
	{ type: type, count: count }

	tokens = countTokens($('#text .t'))
	data = (toData(type, count) for type, count of tokens)

	# Now to histogramming.

	makeBuckets = (data) ->
	buckets = {}
	for datum in data
	if buckets[datum.count]?
	buckets[datum.count]++
	else
	buckets[datum.count] = 1
	buckets

	toHistData = (freq, count) ->
	{ freq: freq, count: count }

	buckets = makeBuckets(data)
	histData = (toHistData(freq, count) for freq, count of buckets)
	histData.sort( (a, b) -> a.freq - b.freq )

	# We'll need a new scale, keyed off of the histograms.
	scale = d3.scale
	.linear()
	.domain([0, d3.max(histData, (d) -> d.count)])
	.range(['10px', '450px'])

	# First, clear out the visualization space.
	$('#viz').children().remove()
	d3
	.select('#viz')
	.selectAll('div')
	.data(histData)
	.enter()
	.append('div')
	.style('font', '10pt sans-serif')
	.style('background-color', 'steelblue')
	.style('padding', '3px')
	.style('margin', '1px')
	.style('color', 'white')
	.style('width', (d) -> scale(d.count))
	.text( (d) -> d.freq.toString() )