gupul2k · December 10, 2012 22:59
diff --git a/Most_freq_500_BoWs.py b/Most_freq_500_BoWs.py
 #!/usr/bin/python
 #Script to generate most frequent 500 BoWs from a corpus (ie lexicon).
 #Date: Nov 2 2012
 #Author: Hota Sobhan

 from string import punctuation
 from operator import itemgetter

 N = 1000
 words = {}
 #total_words = 0

 words_gen = (word.strip(punctuation).lower() for line in open("C:\Python27\Corpus.txt")
                                             for word in line.split())

 for word in words_gen:
    words[word] = words.get(word, 0) + 1
    
 top_words = sorted(words.iteritems(), key=itemgetter(1), reverse=True)[:N]

 #print total_words

 for word, frequency in top_words: 
 print "%s %d" % (word, frequency)
	#!/usr/bin/python
	#Script to generate most frequent 500 BoWs from a corpus (ie lexicon).
	#Date: Nov 2 2012
	#Author: Hota Sobhan

	from string import punctuation
	from operator import itemgetter

	N = 1000
	words = {}
	#total_words = 0

	words_gen = (word.strip(punctuation).lower() for line in open("C:\Python27\Corpus.txt")
	for word in line.split())

	for word in words_gen:
	words[word] = words.get(word, 0) + 1

	top_words = sorted(words.iteritems(), key=itemgetter(1), reverse=True)[:N]

	#print total_words

	for word, frequency in top_words:
	print "%s %d" % (word, frequency)
No results found