Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 26, 2012 22:49
Show Gist options
  • Save marcelcaraciolo/3484056 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/3484056 to your computer and use it in GitHub Desktop.
mapreduce_job2
def count_max_of_mutual_friends(self, key, values):
'''
Prepare the dataset to yield the source and
get the top suggestions.
Input ({[friend1, friend2], numberOfMutualFriends}):
["fabio", "marcel"] 1
["fabiola", "marcel"] 1
["marcel", "patricia"] 1
["marcel", "paula"] 1
["carol", "marcel"] 2
Output ({friend1, [numberOfMutualFriends, friend2]},
{friend2, [numberOfMutualFriends, friend1]}):
"fabio", [1,"marcel"]
"marcel", [1,"fabio"]
"fabiola", [1,"marcel"]
"marcel", [1,"fabiola"]
"marcel", [1,"patricia"]
"patricia", [1,"marcel"]
"marcel", [1,"paula"]
"paula", [1,"marcel"]
"marcel", [2,"carol"]
"carol", [2,"marcel"]
'''
f1, f2 = key
# for score in values:
yield f1, (f2, int(values))
yield f2, (f1, int(values))
def top_recommendations(self, key, values):
'''
Get the TOP N recommendations for user.
Input ({friend1, [(numberOfMutualFriends, friend),
(numberOfMutualFriends2, friend)]}):
"fabio", [[1,"marcel"]]
"marcel", [[2,"carol"], [1,"fabio"], [1,"fabiola"], [1,"patricia"],
[1,"paula"]]
"fabiola", [[1,"marcel"]]
"patricia", [[1,"marcel"]]
"paula", [[1,"marcel"]]
"carol", [[2,"marcel"]]
Output ({friend1, [(numberOfMutualFriends, friend),
(numberOfMutualFriends2, friend)]}):
Ex: Get the top 3 suggestions.
"fabio", [[1,"marcel"]]
"marcel", [[2,"carol"], [1,"fabio"], [1,"fabiola"]]
"fabiola", [[1,"marcel"]]
"patricia", [[1,"marcel"]]
"paula", [[1,"marcel"]]
"carol", [[2,"marcel"]]
'''
recommendations = []
for idx, (item, score) in enumerate(values):
recommendations.append((item, score))
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment