MattDietz · December 20, 2015 14:09 · MattDietz · Aug 2, 2013
diff --git a/gistfile1.txt b/gistfile1.txt
 import re

 data = open("a bunch of text", 'r')
 words = {}

 strip_punc = re.compile("[!.,]")
 whitespace_collapse = re.compile("\s+")

 for line in data.readlines():
    line = strip_punc.sub('', line)
    line = whitespace_collapse.sub(' ', line)
    line = line.lower()
    tokens = line.split(' ')
    for token in tokens:
        if token not in words:
            words[token] = 0
        words[token] += 1

 sorted_words = sorted(words.iteritems(), key=lambda x: x[1])
 print sorted_words
	import re

	data = open("a bunch of text", 'r')
	words = {}

	strip_punc = re.compile("[!.,]")
	whitespace_collapse = re.compile("\s+")

	for line in data.readlines():
	line = strip_punc.sub('', line)
	line = whitespace_collapse.sub(' ', line)
	line = line.lower()
	tokens = line.split(' ')
	for token in tokens:
	if token not in words:
	words[token] = 0
	words[token] += 1

	sorted_words = sorted(words.iteritems(), key=lambda x: x[1])
	print sorted_words
No results found