johndavidback · August 13, 2014 14:35
diff --git a/analyze.py b/analyze.py
 # Usage:
 # $ python analyze.py somefile.txt
 # Easy breezy.

 import sys
 import string
 import operator

 # These I just grabbed the top 50 from wikipedia: http://en.wikipedia.org/wiki/Most_common_words_in_English
 COMMON_WORDS = 'the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me'.split()

 def analyze():
    # Grab out the text file name
    textfile = sys.argv[1]

    # Read the text document
    with open(textfile) as f:
        # Get the content of the text file
        text = f.read()

        # Remove all the punctuation from the text, similar to string.punctuation but I removed some.
        exclude = set('!"#$%&()*+,./:;<=>?@[\\]^_`{|}~')
        text = ''.join(ch.lower() for ch in text if ch not in exclude).split()  # Turn into list

    # Go through the words and build them up, buttercup. Strip out the 50 most common
    counts = {}
    for word in text:
        if not word in COMMON_WORDS:
            if word in counts:
                counts[word] += 1
            else:
                counts[word] = 1

    # Now, sort them based on their keys
    sorted_counts = sorted(counts.iteritems(), key=operator.itemgetter(1))

    # Go through it reversed and we're golden grahams.
    for word_tuple in reversed(sorted_counts):
        print word_tuple[0], word_tuple[1]


 if __name__ == '__main__':
    analyze()
	# Usage:
	# $ python analyze.py somefile.txt
	# Easy breezy.

	import sys
	import string
	import operator

	# These I just grabbed the top 50 from wikipedia: http://en.wikipedia.org/wiki/Most_common_words_in_English
	COMMON_WORDS = 'the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me'.split()

	def analyze():
	# Grab out the text file name
	textfile = sys.argv[1]

	# Read the text document
	with open(textfile) as f:
	# Get the content of the text file
	text = f.read()

	# Remove all the punctuation from the text, similar to string.punctuation but I removed some.
	exclude = set('!"#$%&()*+,./:;<=>?@[\\]^_`{\|}~')
	text = ''.join(ch.lower() for ch in text if ch not in exclude).split() # Turn into list

	# Go through the words and build them up, buttercup. Strip out the 50 most common
	counts = {}
	for word in text:
	if not word in COMMON_WORDS:
	if word in counts:
	counts[word] += 1
	else:
	counts[word] = 1

	# Now, sort them based on their keys
	sorted_counts = sorted(counts.iteritems(), key=operator.itemgetter(1))

	# Go through it reversed and we're golden grahams.
	for word_tuple in reversed(sorted_counts):
	print word_tuple[0], word_tuple[1]


	if __name__ == '__main__':
	analyze()
No results found