ixtel · October 19, 2015 12:58
diff --git a/tokens.py b/tokens.py
 import nltk
 import string

 def tokenize(text):
    stopwords = set(nltk.corpus.stopwords.words('english'))
    for token in nltk.word_tokenize(text):
        if token in stopwords or token in string.punctuation:
            continue
        yield token.lower()

 def count(text):
    return nltk.FreqDist(tokenize(text))
    
 if __name__ == "__main__":
    for token, count in count("The cat in the hat sat on the cat mat, with aplumb.").values():
        print "%s: %i" % (token, count)
	import nltk
	import string

	def tokenize(text):
	stopwords = set(nltk.corpus.stopwords.words('english'))
	for token in nltk.word_tokenize(text):
	if token in stopwords or token in string.punctuation:
	continue
	yield token.lower()

	def count(text):
	return nltk.FreqDist(tokenize(text))

	if __name__ == "__main__":
	for token, count in count("The cat in the hat sat on the cat mat, with aplumb.").values():
	print "%s: %i" % (token, count)
No results found