Skip to content

Instantly share code, notes, and snippets.

@pthrasher
Last active December 15, 2015 07:29
Show Gist options
  • Select an option

  • Save pthrasher/5223308 to your computer and use it in GitHub Desktop.

Select an option

Save pthrasher/5223308 to your computer and use it in GitHub Desktop.
_ = require 'underscore'
fs = require 'fs'
rawText = fs.readFileSync 'alice.txt', 'utf8'
getWords = (text) ->
words = text.toLowerCase().match /\b[a-z]{4,}\b/g
_.uniq words
genAndRoll = (words) ->
stash = {}
split = String.prototype.split
join = Array.prototype.join
sort = Array.prototype.sort
seen = {}
for firstword, a in words
for secondword, b in words
continue if a is b
bp = if a <= b then a + " " + b else b + " " + a
continue if seen.hasOwnProperty(bp)
seen[bp] = 1
# key = sorted(firstword + secondword)
# key = join.call(sort.call(split.call(firstword + secondword, '')), '')
key = (firstword + secondword).split('').sort().join('')
unless stash.hasOwnProperty(key)
stash[key] = [firstword, secondword]
else
stash[key].push firstword
stash[key].push secondword
stash
filterRolled = (rolled) ->
for key, val of rolled
if val.length >= 20
good = []
seen = {}
for word, i in val by 2
_word = val[i+1]
if not seen.hasOwnProperty(word) and not seen.hasOwnProperty(_word)
good.push word + " " + _word
seen[word] = 1
seen[_word] = 1
if good.length >= 10
console.log good.join ', '
console.time 'getWords'
words = getWords rawText
console.timeEnd 'getWords'
console.time 'stashed'
stashed = genAndRoll words
console.timeEnd 'stashed'
console.time 'filtered'
filterRolled stashed
console.timeEnd 'filtered'
@pthrasher
Copy link
Copy Markdown
Author

@tebriel Latest revision runs in ~33s

    ¬ anagram: time coffee anagrams.coffee

getWords: 97ms
stashed: 33516ms
earth soles, other seals, those earls, later shoes, roast heels, hoarse lest, shore tales, lose hearts, least horse, share stole
cats shore, reach toss, shoes cart, sorts each, case short, coast hers, rats chose, hate cross, crash toes, cost share
filtered: 1467ms
coffee anagrams.coffee  34.89s user 0.64s system 100% cpu 35.313 total

@pthrasher
Copy link
Copy Markdown
Author

I bet you could get even better perf using linked lists instead of arrays for each key.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment