Created
September 9, 2009 22:28
-
-
Save pims/184137 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
Example of friendships computing for mapreduce | |
inspired by this post : http://stevekrenzel.com/finding-friends-with-mapreduce | |
""" | |
__author__ = "Tim Bart" | |
__copyright__ = "No copyright" | |
__credits__ = ["Steve Krenzel for the inspiration", "Michael Nielsen for the MapReduce implementation"] | |
__license__ = "MIT" | |
from operator import itemgetter | |
import itertools | |
def map_reduce(i,mapper,reducer): | |
""" | |
Defines a single function, map_reduce, which takes an input | |
dictionary i and applies the user-defined function mapper to each | |
(input_key,input_value) pair, producing a list of intermediate | |
keys and intermediate values. Repeated intermediate keys then | |
have their values grouped into a list, and the user-defined | |
function reducer is applied to the intermediate key and list of | |
intermediate values. The results are returned as a list. | |
All rights reserved to Michael Nielsen | |
http://michaelnielsen.org/blog/write-your-first-mapreduce-program-in-20-minutes/ | |
""" | |
intermediate = [] | |
for (key,value) in i.items(): | |
intermediate.extend(mapper(key,value)) | |
groups = {} | |
for key, group in itertools.groupby(sorted(intermediate),lambda x: x[0]): | |
groups[key] = list([y for x, y in group]) | |
return [reducer(intermediate_key,groups[intermediate_key]) for intermediate_key in groups] | |
def sortedKey(couple): | |
"""sorts list of two strings and returns concatenated string""" | |
couple.sort() | |
return ''.join(couple) | |
def cmpByKey(a,b): | |
"""compare two items by their first key""" | |
return cmp(a[0],b[0]) | |
def mapper(input_key,input_value): | |
""" | |
split input in multiple lines and returns a tuple (key,value) | |
ex: ("AB",[C,D,E]) | |
""" | |
res = [] | |
for line in input_value.split('\n'): | |
person = line.split('|')[0] | |
friends = line.split('|')[1].split(';') | |
for friend in friends: | |
key = sortedKey([person,friend]) | |
value = friends | |
res.append((key,value)) | |
return res | |
def intersect(lists): | |
""" return list of intersections""" | |
intersection = [] | |
if len(lists) > 1: | |
for f in lists[1]: | |
if f in lists[0]: | |
intersection.append(f) | |
return intersection | |
def reducer(intermediate_key,intermediate_value_list): | |
return (intermediate_key,intersect(intermediate_value_list)) | |
def main(): | |
"""run mapreduce job and outputs sorted results""" | |
""" | |
friends.txt should contain something like: | |
A|B;C;D | |
B|A;C;D;E | |
C|A;B;D;E | |
D|A;B;C;E | |
E|B;C;D | |
""" | |
i = {} | |
i["friends"] = open('friends.txt').read() | |
friendships = map_reduce(i,mapper,reducer) | |
friendships.sort(cmpByKey) | |
for friendship in friendships: | |
print "%s & %s have %s as common friends " % (friendship[0][0],friendship[0][1],friendship[1]) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment