language-engineering · October 4, 2012 17:17
diff --git a/gistfile1.py b/gistfile1.py
 from sussex_nltk.corpus_readers import ReutersCorpusReader
 from sussex_nltk.stats import expected_token_freq

 rcr = ReutersCorpusReader()
 sample_size = 1000     #The number of sentences in a sample

 #Randomly sample 1000 sentences, and get a list of the tokens in those sentences
 tokens = rcr.sample_words_by_sents(sample_size)

 #Calculate and print the expected token frequency for this one sample of tokens for the token "elephant"
 print "Expected token frequency per 5000 tokens %s" % expected_token_freq(tokens,"elephant")
	from sussex_nltk.corpus_readers import ReutersCorpusReader
	from sussex_nltk.stats import expected_token_freq

	rcr = ReutersCorpusReader()
	sample_size = 1000 #The number of sentences in a sample

	#Randomly sample 1000 sentences, and get a list of the tokens in those sentences
	tokens = rcr.sample_words_by_sents(sample_size)

	#Calculate and print the expected token frequency for this one sample of tokens for the token "elephant"
	print "Expected token frequency per 5000 tokens %s" % expected_token_freq(tokens,"elephant")
No results found