language-engineering · October 1, 2012 16:16
diff --git a/gistfile1.py b/gistfile1.py
 #Import a corpus reader
 from sussex_nltk.corpus_readers import ReutersCorpusReader

 rcr = ReutersCorpusReader() 

 #Decide on the number of sentences that should be in your sample
 sample_size = 1000

 #See below, you have 2 different ways to get a random sample.
 #  1. As a list of tokens, or
 #  2. As a list of sentences
 #Which one you want, depends on the statistic you want to calculate

 #Sample 1000 sentences as a list of tokens
 tokens = rcr.sample_words_by_sents(sample_size)

 #Sample 1000 sentences as a list of sentences, where each sentence is a list of tokens
 sents = rcr.sample_sents(sample_size)
	#Import a corpus reader
	from sussex_nltk.corpus_readers import ReutersCorpusReader

	rcr = ReutersCorpusReader()

	#Decide on the number of sentences that should be in your sample
	sample_size = 1000

	#See below, you have 2 different ways to get a random sample.
	# 1. As a list of tokens, or
	# 2. As a list of sentences
	#Which one you want, depends on the statistic you want to calculate

	#Sample 1000 sentences as a list of tokens
	tokens = rcr.sample_words_by_sents(sample_size)

	#Sample 1000 sentences as a list of sentences, where each sentence is a list of tokens
	sents = rcr.sample_sents(sample_size)
No results found