Created
August 26, 2012 22:49
-
-
Save marcelcaraciolo/3484056 to your computer and use it in GitHub Desktop.
mapreduce_job2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def count_max_of_mutual_friends(self, key, values): | |
''' | |
Prepare the dataset to yield the source and | |
get the top suggestions. | |
Input ({[friend1, friend2], numberOfMutualFriends}): | |
["fabio", "marcel"] 1 | |
["fabiola", "marcel"] 1 | |
["marcel", "patricia"] 1 | |
["marcel", "paula"] 1 | |
["carol", "marcel"] 2 | |
Output ({friend1, [numberOfMutualFriends, friend2]}, | |
{friend2, [numberOfMutualFriends, friend1]}): | |
"fabio", [1,"marcel"] | |
"marcel", [1,"fabio"] | |
"fabiola", [1,"marcel"] | |
"marcel", [1,"fabiola"] | |
"marcel", [1,"patricia"] | |
"patricia", [1,"marcel"] | |
"marcel", [1,"paula"] | |
"paula", [1,"marcel"] | |
"marcel", [2,"carol"] | |
"carol", [2,"marcel"] | |
''' | |
f1, f2 = key | |
# for score in values: | |
yield f1, (f2, int(values)) | |
yield f2, (f1, int(values)) | |
def top_recommendations(self, key, values): | |
''' | |
Get the TOP N recommendations for user. | |
Input ({friend1, [(numberOfMutualFriends, friend), | |
(numberOfMutualFriends2, friend)]}): | |
"fabio", [[1,"marcel"]] | |
"marcel", [[2,"carol"], [1,"fabio"], [1,"fabiola"], [1,"patricia"], | |
[1,"paula"]] | |
"fabiola", [[1,"marcel"]] | |
"patricia", [[1,"marcel"]] | |
"paula", [[1,"marcel"]] | |
"carol", [[2,"marcel"]] | |
Output ({friend1, [(numberOfMutualFriends, friend), | |
(numberOfMutualFriends2, friend)]}): | |
Ex: Get the top 3 suggestions. | |
"fabio", [[1,"marcel"]] | |
"marcel", [[2,"carol"], [1,"fabio"], [1,"fabiola"]] | |
"fabiola", [[1,"marcel"]] | |
"patricia", [[1,"marcel"]] | |
"paula", [[1,"marcel"]] | |
"carol", [[2,"marcel"]] | |
''' | |
recommendations = [] | |
for idx, (item, score) in enumerate(values): | |
recommendations.append((item, score)) | |
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment