THEMVFFINMAN · May 19, 2016 19:20
diff --git a/reducer.py b/reducer.py
 #!/usr/bin/env python
 # 
 # Adapted from script by Diana MacLean 2011
 #
 # Adapted fro CS448G from Michael Noll's tutorial on : http://www.michael-noll.com/tutorials/writing-an-hadoop-mapreduce-program-in-python/
 #
 #

 from operator import itemgetter
 import sys
 import operator

 # maps words to their counts
 word2count = {}

 # input comes from STDIN
 for line in sys.stdin:
 	# remove leading and trailing whitespace
 	line = line.strip()

 	# parse the input we got from mapper.py
 	word, count = line.split('\t', 1)
 	# convert count (currently a string) to int
 	try:
 		count = int(count)
 		word2count[word] = word2count.get(word, 0) + count
 	except ValueError:
 		# count was not a number, so silently
 		# ignore/discard this line
 		pass

 counted_words = word2count.items()

 # This will sort all the tuples in the list by the 2nd value
 # in reverse order
 counted_words.sort(key=operator.itemgetter(1), reverse=True)

 #While not the most elegant solution, if it's not in the stop array, then it doesn't get printed
 #A better design might be to never collect their information in the first place
 stop=['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
 # write the results to STDOUT (standard output)
 for word, count in counted_words:
 	if word not in stop:
 		print '%s\t%s'% (word, count)
	#!/usr/bin/env python
	#
	# Adapted from script by Diana MacLean 2011
	#
	# Adapted fro CS448G from Michael Noll's tutorial on : http://www.michael-noll.com/tutorials/writing-an-hadoop-mapreduce-program-in-python/
	#
	#

	from operator import itemgetter
	import sys
	import operator

	# maps words to their counts
	word2count = {}

	# input comes from STDIN
	for line in sys.stdin:
	# remove leading and trailing whitespace
	line = line.strip()

	# parse the input we got from mapper.py
	word, count = line.split('\t', 1)
	# convert count (currently a string) to int
	try:
	count = int(count)
	word2count[word] = word2count.get(word, 0) + count
	except ValueError:
	# count was not a number, so silently
	# ignore/discard this line
	pass

	counted_words = word2count.items()

	# This will sort all the tuples in the list by the 2nd value
	# in reverse order
	counted_words.sort(key=operator.itemgetter(1), reverse=True)

	#While not the most elegant solution, if it's not in the stop array, then it doesn't get printed
	#A better design might be to never collect their information in the first place
	stop=['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now']
	# write the results to STDOUT (standard output)
	for word, count in counted_words:
	if word not in stop:
	print '%s\t%s'% (word, count)