laclefyoshi · March 5, 2011 04:28 · floer32 · Jan 8, 2015 · floer32 · Jan 8, 2015
diff --git a/mapreduce.py b/mapreduce.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # Copyright : (c) SAEKI Yoshiyasu
 # License   : MIT-style license
 #             <http://www.opensource.org/licenses/mit-license.php>
 # last updated: 2011/03/04

 from akka.actor import Actors, UntypedActor
 # from akka.dispatch import Futures
 import collections
 import string


 class MapReduceActor(UntypedActor):
    def onReceive(self, (func, input_data)):
        # print (func, input_data)
        result = func(input_data)
        self.getContext().reply(result)


 class MapReduce(object):
    def __init__(self, mapFunc, reduceFunc, numWorkers=4, timeout=180 * 1000):
        self.mapFunc = mapFunc
        self.reduceFunc = reduceFunc
        self.numWorkers = numWorkers
        self.timeout = timeout

    def __call__(self, input_data):
        self.actors = [Actors.actorOf(MapReduceActor)
                  for i in range(self.numWorkers)]
        map(lambda actor: actor.start(), self.actors)
        mapped_values = self.executeMap(input_data)
        partitioned_data = self.partition(mapped_values)
        reduced_values = self.executeReduce(partitioned_data.items())
        map(lambda actor: actor.stop(), self.actors)
        return reduced_values

    def executeMap(self, input_data):
        return self.executeFunc(self.mapFunc, input_data)

    def executeReduce(self, input_data):
        return self.executeFunc(self.reduceFunc, input_data)

    def executeFunc(self, func, input_data):
        futures = [
            self.actors[i % self.numWorkers].sendRequestReplyFuture(
                (func, data),
                self.timeout, self.actors[i % self.numWorkers])
            for i, data in enumerate(input_data)]
        map(lambda future: future.await(), futures)
        return [future.result().get() for future in futures]

    def partition(self, mapped_values):
        partitioned_data = collections.defaultdict(list)
        for sublist in mapped_values:
            for key, value in sublist:
                partitioned_data[key].append(value)
        return partitioned_data


 def file_to_words(filename):
    # reference to http://en.wikipedia.org/wiki/Stop_words
    STOP_WORDS = set([
 "a", "about", "above", "above", "across", "after", "afterwards",
 "again", "against", "all", "almost", "alone", "along", "already",
 "also", "although", "always", "am", "among", "amongst", "amoungst",
 "amount", "an", "and", "another",
 "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
 "around", "as", "at", "back", "be", "became",
 "because", "become", "becomes", "becoming", "been", "before",
 "beforehand", "behind", "being", "below", "beside", "besides",
 "between", "beyond", "bill", "both", "bottom", "but", "by", "call",
 "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de",
 "describe", "detail", "do", "done", "down", "due", "during", "each",
 "eg", "eight", "either", "eleven", "else", "elsewhere", "empty",
 "enough", "etc", "even", "ever", "every", "everyone", "everything",
 "everywhere", "except", "few", "fifteen", "fify", "fill", "find",
 "fire", "first", "five", "for", "former", "formerly", "forty",
 "found", "four", "from", "front", "full", "further", "get", "give",
 "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here",
 "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him",
 "himself", "his", "how", "however", "hundred", "ie", "if", "in",
 "inc", "indeed", "interest", "into", "is", "it", "its", "itself",
 "keep", "last", "latter", "latterly", "least", "less", "ltd", "made",
 "many", "may", "me", "meanwhile", "might", "mill", "mine", "more",
 "moreover", "most", "mostly", "move", "much", "must", "my", "myself",
 "name", "namely", "neither", "never", "nevertheless", "next", "nine",
 "no", "nobody", "none", "noone", "nor", "not", "nothing", "now",
 "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto",
 "or", "other", "others", "otherwise", "our", "ours", "ourselves",
 "out", "over", "own", "part", "per", "perhaps", "please", "put",
 "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems",
 "serious", "several", "she", "should", "show", "side", "since",
 "sincere", "six", "sixty", "so", "some", "somehow", "someone",
 "something", "sometime", "sometimes", "somewhere", "still", "such",
 "system", "take", "ten", "than", "that", "the", "their", "them",
 "themselves", "then", "thence", "there", "thereafter", "thereby",
 "therefore", "therein", "thereupon", "these", "they", "thickv",
 "thin", "third", "this", "those", "though", "three", "through",
 "throughout", "thru", "thus", "to", "together", "too", "top",
 "toward", "towards", "twelve", "twenty", "two", "un", "under",
 "until", "up", "upon", "us", "very", "via", "was", "we", "well",
 "were", "what", "whatever", "when", "whence", "whenever", "where",
 "whereafter", "whereas", "whereby", "wherein", "whereupon",
 "wherever", "whether", "which", "while", "whither", "who", "whoever",
 "whole", "whom", "whose", "why", "will", "with", "within", "without",
 "would", "yet", "you", "your", "yours", "yourself", "yourselves",
 "the"
        ])
    TR = string.maketrans(string.punctuation, ' ' * len(string.punctuation))
    output = []
    f = open(filename, 'rt')       # open file with read/text mode
    for line in f:
        line = line.translate(TR)  # Strip punctuation
        for word in line.split():
            word = word.lower()
            if word.isalpha() and word not in STOP_WORDS:
                output.append((word, 1))
    return output


 def count_words(item):
    word, occurances = item
    return (word, sum(occurances))
	#!/usr/bin/python
	# -- coding: utf-8 --
	# Copyright : (c) SAEKI Yoshiyasu
	# License : MIT-style license
	# <http://www.opensource.org/licenses/mit-license.php>
	# last updated: 2011/03/04

	from akka.actor import Actors, UntypedActor
	# from akka.dispatch import Futures
	import collections
	import string


	class MapReduceActor(UntypedActor):
	def onReceive(self, (func, input_data)):
	# print (func, input_data)
	result = func(input_data)
	self.getContext().reply(result)


	class MapReduce(object):
	def __init__(self, mapFunc, reduceFunc, numWorkers=4, timeout=180 * 1000):
	self.mapFunc = mapFunc
	self.reduceFunc = reduceFunc
	self.numWorkers = numWorkers
	self.timeout = timeout

	def __call__(self, input_data):
	self.actors = [Actors.actorOf(MapReduceActor)
	for i in range(self.numWorkers)]
	map(lambda actor: actor.start(), self.actors)
	mapped_values = self.executeMap(input_data)
	partitioned_data = self.partition(mapped_values)
	reduced_values = self.executeReduce(partitioned_data.items())
	map(lambda actor: actor.stop(), self.actors)
	return reduced_values

	def executeMap(self, input_data):
	return self.executeFunc(self.mapFunc, input_data)

	def executeReduce(self, input_data):
	return self.executeFunc(self.reduceFunc, input_data)

	def executeFunc(self, func, input_data):
	futures = [
	self.actors[i % self.numWorkers].sendRequestReplyFuture(
	(func, data),
	self.timeout, self.actors[i % self.numWorkers])
	for i, data in enumerate(input_data)]
	map(lambda future: future.await(), futures)
	return [future.result().get() for future in futures]

	def partition(self, mapped_values):
	partitioned_data = collections.defaultdict(list)
	for sublist in mapped_values:
	for key, value in sublist:
	partitioned_data[key].append(value)
	return partitioned_data


	def file_to_words(filename):
	# reference to http://en.wikipedia.org/wiki/Stop_words
	STOP_WORDS = set([
	"a", "about", "above", "above", "across", "after", "afterwards",
	"again", "against", "all", "almost", "alone", "along", "already",
	"also", "although", "always", "am", "among", "amongst", "amoungst",
	"amount", "an", "and", "another",
	"any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
	"around", "as", "at", "back", "be", "became",
	"because", "become", "becomes", "becoming", "been", "before",
	"beforehand", "behind", "being", "below", "beside", "besides",
	"between", "beyond", "bill", "both", "bottom", "but", "by", "call",
	"can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de",
	"describe", "detail", "do", "done", "down", "due", "during", "each",
	"eg", "eight", "either", "eleven", "else", "elsewhere", "empty",
	"enough", "etc", "even", "ever", "every", "everyone", "everything",
	"everywhere", "except", "few", "fifteen", "fify", "fill", "find",
	"fire", "first", "five", "for", "former", "formerly", "forty",
	"found", "four", "from", "front", "full", "further", "get", "give",
	"go", "had", "has", "hasnt", "have", "he", "hence", "her", "here",
	"hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him",
	"himself", "his", "how", "however", "hundred", "ie", "if", "in",
	"inc", "indeed", "interest", "into", "is", "it", "its", "itself",
	"keep", "last", "latter", "latterly", "least", "less", "ltd", "made",
	"many", "may", "me", "meanwhile", "might", "mill", "mine", "more",
	"moreover", "most", "mostly", "move", "much", "must", "my", "myself",
	"name", "namely", "neither", "never", "nevertheless", "next", "nine",
	"no", "nobody", "none", "noone", "nor", "not", "nothing", "now",
	"nowhere", "of", "off", "often", "on", "once", "one", "only", "onto",
	"or", "other", "others", "otherwise", "our", "ours", "ourselves",
	"out", "over", "own", "part", "per", "perhaps", "please", "put",
	"rather", "re", "same", "see", "seem", "seemed", "seeming", "seems",
	"serious", "several", "she", "should", "show", "side", "since",
	"sincere", "six", "sixty", "so", "some", "somehow", "someone",
	"something", "sometime", "sometimes", "somewhere", "still", "such",
	"system", "take", "ten", "than", "that", "the", "their", "them",
	"themselves", "then", "thence", "there", "thereafter", "thereby",
	"therefore", "therein", "thereupon", "these", "they", "thickv",
	"thin", "third", "this", "those", "though", "three", "through",
	"throughout", "thru", "thus", "to", "together", "too", "top",
	"toward", "towards", "twelve", "twenty", "two", "un", "under",
	"until", "up", "upon", "us", "very", "via", "was", "we", "well",
	"were", "what", "whatever", "when", "whence", "whenever", "where",
	"whereafter", "whereas", "whereby", "wherein", "whereupon",
	"wherever", "whether", "which", "while", "whither", "who", "whoever",
	"whole", "whom", "whose", "why", "will", "with", "within", "without",
	"would", "yet", "you", "your", "yours", "yourself", "yourselves",
	"the"
	])
	TR = string.maketrans(string.punctuation, ' ' * len(string.punctuation))
	output = []
	f = open(filename, 'rt') # open file with read/text mode
	for line in f:
	line = line.translate(TR) # Strip punctuation
	for word in line.split():
	word = word.lower()
	if word.isalpha() and word not in STOP_WORDS:
	output.append((word, 1))
	return output


	def count_words(item):
	word, occurances = item
	return (word, sum(occurances))
No results found