Created
August 21, 2012 19:14
-
-
Save marcelcaraciolo/3418460 to your computer and use it in GitHub Desktop.
movies count
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*-coding: utf-8 -*- | |
''' | |
This module computes the number of movies rated by each | |
user. | |
''' | |
__author__ = 'Marcel Caraciolo <[email protected]>' | |
from mrjob.job import MRJob | |
class MoviesCount(MRJob): | |
def mapper(self, key, line): | |
""" | |
Mapper: send score from a single movie to | |
other movies | |
""" | |
#user_id, item_id, rating = line.split('|') | |
#yield item_id, (user_id, rating, 1) | |
user_id, item_id, rating = line.split('|') | |
yield item_id, (user_id, float(rating)) | |
def reducer(self, movie, values): | |
#yield(movie, sum(values)) | |
total = 0 | |
final = [] | |
for user_id, rating in values: | |
total += 1 | |
final.append((user_id, rating)) | |
for user_id, rating in final: | |
yield '%s|%s|%.2f|%d' % (user_id, movie, rating, total), None | |
if __name__ == '__main__': | |
MoviesCount.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment