garbados · December 17, 2015 21:09
diff --git a/gistfile1.coffee b/gistfile1.coffee
 map = (doc) ->
  size = 4 # = the n in ngram
  # chunk function
  chunk = (arr, len) ->
    chunks = []
    i = 0
    while i < arr.length
      chunks.push arr.slice(i, i += len)
    return (x for x in chunks when x.length is len)
  # reduce to tokens
  tokenize = (value) ->
    # isolate tokens
    toSplit = new RegExp('\\s+', 'g');
    return value.split(toSplit)
  # yield ngrams
  process = (value) ->
    tokens = tokenize value
    chunks = []
    if tokens
      for i in [0...size]
        chunks = chunks.concat chunk(tokens.slice(i), size)
    else
      # string composed of entirely non-alphanumeric characters
    return chunks
  if doc.user and doc.text
    emit(doc.user.screen_name, process(doc.text))
	map = (doc) ->
	size = 4 # = the n in ngram
	# chunk function
	chunk = (arr, len) ->
	chunks = []
	i = 0
	while i < arr.length
	chunks.push arr.slice(i, i += len)
	return (x for x in chunks when x.length is len)
	# reduce to tokens
	tokenize = (value) ->
	# isolate tokens
	toSplit = new RegExp('\\s+', 'g');
	return value.split(toSplit)
	# yield ngrams
	process = (value) ->
	tokens = tokenize value
	chunks = []
	if tokens
	for i in [0...size]
	chunks = chunks.concat chunk(tokens.slice(i), size)
	else
	# string composed of entirely non-alphanumeric characters
	return chunks
	if doc.user and doc.text
	emit(doc.user.screen_name, process(doc.text))