bmaland · August 25, 2009 10:49
diff --git a/freq-of-freq.py b/freq-of-freq.py
 #!/usr/bin/env python

 import nltk

 ## Tom Sawyer, by Mark Twain.
 ## Available from the Gutenberg Project:
 ## http://www.gutenberg.org/files/74/74.txt
 reader = nltk.corpus.reader.PlaintextCorpusReader('.', 'twain-tomsawyer.txt')
 words = reader.words('twain-tomsawyer.txt') # List of all the words in the text

 fdist = nltk.FreqDist([w.lower() for w in words])

 ## Simply figures out which of the 13 labels this dist should be sampled under
 def group(i):
    if i < 11: return str(i)
    elif i in range(11, 51): return("11-50")
    elif i in range(51, 101): return("51-100")
    else: return(">100")

 ## Now we can create another frequency distribution of the previously obtained
 ## values:
 fdist_freq = nltk.FreqDist([group(freq) for freq in fdist.values()])

 fdist_freq.plot()
	#!/usr/bin/env python

	import nltk

	## Tom Sawyer, by Mark Twain.
	## Available from the Gutenberg Project:
	## http://www.gutenberg.org/files/74/74.txt
	reader = nltk.corpus.reader.PlaintextCorpusReader('.', 'twain-tomsawyer.txt')
	words = reader.words('twain-tomsawyer.txt') # List of all the words in the text

	fdist = nltk.FreqDist([w.lower() for w in words])

	## Simply figures out which of the 13 labels this dist should be sampled under
	def group(i):
	if i < 11: return str(i)
	elif i in range(11, 51): return("11-50")
	elif i in range(51, 101): return("51-100")
	else: return(">100")

	## Now we can create another frequency distribution of the previously obtained
	## values:
	fdist_freq = nltk.FreqDist([group(freq) for freq in fdist.values()])

	fdist_freq.plot()
No results found