Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 26, 2012 13:08
Show Gist options
  • Save marcelcaraciolo/3479031 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/3479031 to your computer and use it in GitHub Desktop.
friends recommender
class FriendsRecommender(MRJob):
def steps(self):
return [self.mr(self.map_input, self.count_number_of_friends),
self.mr(self.count_max_of_mutual_friends,
self.top_recommendations)]
def map_input(self, key, line):
'''
Compute a cartesian product using nested loops
for each friend in connection_list
Input (source -> {“friend1”, “friend2”, “friend3”}):
marcel,jonas,maria,jose,amanda
Output {[source, friend1], -1};
{[friend1, friend2], 1};):
["jonas", "marcel"] -1
["jonas", "maria"] 1
["jonas", "jose"] 1
["amanda", "jonas"] 1
["marcel", "maria"] -1
["jose", "maria"] 1
["amanda", "maria"] 1
["jose", "marcel"] -1
["amanda", "jose"] 1
["amanda", "marcel"] -1
'''
input = line.split(';')
user_id, item_ids = input[0], input[1:]
for i in range(len(item_ids)):
f1 = item_ids[i]
if user_id < f1:
yield (user_id, f1), -1
else:
yield (f1, user_id), -1
for j in range(i + 1, len(item_ids)):
f2 = item_ids[j]
if f1 < f2:
yield (f1, f2), 1
else:
yield (f2, f1), 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment