jarobins · December 15, 2015 15:19 · jarobins · Mar 31, 2013
diff --git a/word_counter.py b/word_counter.py
 from datetime import datetime
 import operator, string

 table = string.maketrans("","")
 thelist = {}

 loop_condition = True
 txt_file = open('gs2.txt')
 word_list = [x for x in txt_file.read().decode('utf-8').split()]
 # Translate removes punctuation 
 # Should add a .undercase 04APR2013
 word_list_dup = [
    str(word).translate(table, string.punctuation) for word in word_list]
 word_list_nodup = set(word_list_dup)

 print "Starting find at", str(datetime.now())
 word_gen = (item for item in word_list_nodup)
 cur_word = word_gen.next()
 # For displaying progress
 length = len(word_list_nodup)
 counter = 0
 while loop_condition:
    try:
        thelist.update({'%s' % cur_word: word_list_dup.count(cur_word)})
        cur_word = word_gen.next()
        counter += 1
        if counter % 1000 == 0:
            print '%s out of %s' % (counter, length)
    except StopIteration:
        loop_condition = False
 print "Finished find at", str(datetime.now())
 sorted_items = sorted(thelist.iteritems(), key=operator.itemgetter(1))
 sorted_items.reverse()
 with open('output.txt', 'w') as d:
    for item in sorted_items:
        d.write(str(item[0].encode('utf-8')) + ' ' + str(item[1]) + '\n')
	from datetime import datetime
	import operator, string

	table = string.maketrans("","")
	thelist = {}

	loop_condition = True
	txt_file = open('gs2.txt')
	word_list = [x for x in txt_file.read().decode('utf-8').split()]
	# Translate removes punctuation
	# Should add a .undercase 04APR2013
	word_list_dup = [
	str(word).translate(table, string.punctuation) for word in word_list]
	word_list_nodup = set(word_list_dup)

	print "Starting find at", str(datetime.now())
	word_gen = (item for item in word_list_nodup)
	cur_word = word_gen.next()
	# For displaying progress
	length = len(word_list_nodup)
	counter = 0
	while loop_condition:
	try:
	thelist.update({'%s' % cur_word: word_list_dup.count(cur_word)})
	cur_word = word_gen.next()
	counter += 1
	if counter % 1000 == 0:
	print '%s out of %s' % (counter, length)
	except StopIteration:
	loop_condition = False
	print "Finished find at", str(datetime.now())
	sorted_items = sorted(thelist.iteritems(), key=operator.itemgetter(1))
	sorted_items.reverse()
	with open('output.txt', 'w') as d:
	for item in sorted_items:
	d.write(str(item[0].encode('utf-8')) + ' ' + str(item[1]) + '\n')
No results found