Skip to content

Instantly share code, notes, and snippets.

@masayang
Created March 10, 2013 23:48
Show Gist options
  • Save masayang/5131047 to your computer and use it in GitHub Desktop.
Save masayang/5131047 to your computer and use it in GitHub Desktop.
友達推奨MrJob
foo, bar, masayang, steve, nakamura, ryoma, corbert
bar, foo, chibi, loki, ichikawa
masayang, foo, steve, loki, corbert, ichikawa, moraimon, sada, matsuno
steve, bar, nakamura, micky, samuel, chibi, ryoma, moses, anko, moraimon
nakamura, foo, bar, masayang, steve, wendy, ryoma, ichikawa, moraimon, matsuno
micky, steve, loki, corbert, ichikawa, regan, moraimon, sada
samuel, bar, chester, loki, regan
wendy, foo, bar, samuel, chibi, ryoma, loki, moses, matsuno
chester, foo, samuel, loki, regan, sada
chibi, masayang, steve, anko
ryoma, bar, steve, nakamura, samuel, loki, anko, ichikawa
loki, bar, masayang, chester, anko, moraimon, sada
moses, bar, samuel, wendy, ichikawa, sada, nakagawa
anko, nakamura, samuel, ryoma, corbert, matsuno
corbert, masayang, chester, chibi, regan, moraimon, sada, matsuno
ichikawa, foo, samuel, chester, chibi, loki, anko, regan, moraimon, sada
regan, masayang, micky, chibi
moraimon, chibi, regan, sada, nakagawa, matsuno
sada, masayang, samuel, chester, ryoma, loki, moses, regan, matsuno
nakagawa, nakamura, wendy, ryoma, loki, moses, anko, regan
matsuno, foo, steve, nakamura, samuel, wendy, chester, loki, moses, anko, regan, sada
from mrjob.job import MRJob
TOP_N = 3
class Recommendation(MRJob):
def steps(self):
return [self.mr(self.step1_mapper, self.step1_reducer),
self.mr(self.step2_mapper, self.step2_reducer)]
def step1_mapper(self, key, line):
input = line.split(',')
user, friends = input[0], input[1:]
for i in range(len(friends)):
f1 = friends[i].strip()
for j in range(i+1, len(friends)):
f2 = friends[j].strip()
if f1 < f2:
yield(f1, f2), 1
else:
yield(f2, f1), 1
def step1_reducer(self, key, values):
f1, f2 = key
mutual_friends_count = 0
for value in values:
mutual_friends_count += value
yield (f1, f2), mutual_friends_count
def step2_mapper(self, key, values):
f1, f2 = key
yield f1, (f2, int(values))
yield f2, (f1, int(values))
def step2_reducer(self, key, values):
recommendations = []
for idx, (item, score) in enumerate(values):
recommendations.append((item, score))
yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N]
if __name__ == '__main__':
Recommendation.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment