Created
August 26, 2012 13:08
-
-
Save marcelcaraciolo/3479031 to your computer and use it in GitHub Desktop.
friends recommender
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class FriendsRecommender(MRJob): | |
def steps(self): | |
return [self.mr(self.map_input, self.count_number_of_friends), | |
self.mr(self.count_max_of_mutual_friends, | |
self.top_recommendations)] | |
def map_input(self, key, line): | |
''' | |
Compute a cartesian product using nested loops | |
for each friend in connection_list | |
Input (source -> {“friend1”, “friend2”, “friend3”}): | |
marcel,jonas,maria,jose,amanda | |
Output {[source, friend1], -1}; | |
{[friend1, friend2], 1};): | |
["jonas", "marcel"] -1 | |
["jonas", "maria"] 1 | |
["jonas", "jose"] 1 | |
["amanda", "jonas"] 1 | |
["marcel", "maria"] -1 | |
["jose", "maria"] 1 | |
["amanda", "maria"] 1 | |
["jose", "marcel"] -1 | |
["amanda", "jose"] 1 | |
["amanda", "marcel"] -1 | |
''' | |
input = line.split(';') | |
user_id, item_ids = input[0], input[1:] | |
for i in range(len(item_ids)): | |
f1 = item_ids[i] | |
if user_id < f1: | |
yield (user_id, f1), -1 | |
else: | |
yield (f1, user_id), -1 | |
for j in range(i + 1, len(item_ids)): | |
f2 = item_ids[j] | |
if f1 < f2: | |
yield (f1, f2), 1 | |
else: | |
yield (f2, f1), 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment