bmaland · November 18, 2010 19:05
diff --git a/fdist.py b/fdist.py
 fdist = nltk.FreqDist([w.lower() for w in words])
diff --git a/fdist2.py b/fdist2.py
 fdist_freq = nltk.FreqDist([group(freq) for freq in fdist.values()])
diff --git a/group.py b/group.py
 def group(i):
    if i < 11: return str(i)
    elif i in range(11, 51): return("11-50")
    elif i in range(51, 101): return("51-100")
    else: return(">100")
diff --git a/words.py b/words.py
 ## Assuming that nltk is available, and that the text is in the current
 ## directory, named 'twain-tomsawyer.txt'
 import nltk
 reader = nltk.corpus.reader.PlaintextCorpusReader('.', 'twain-tomsawyer.txt')
 words = reader.words('twain-tomsawyer.txt') # List of all the words in the text
	## Assuming that nltk is available, and that the text is in the current
	## directory, named 'twain-tomsawyer.txt'
	import nltk
	reader = nltk.corpus.reader.PlaintextCorpusReader('.', 'twain-tomsawyer.txt')
	words = reader.words('twain-tomsawyer.txt') # List of all the words in the text
No results found