Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 21, 2012 19:14
Show Gist options
  • Save marcelcaraciolo/3418460 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/3418460 to your computer and use it in GitHub Desktop.
movies count
#-*-coding: utf-8 -*-
'''
This module computes the number of movies rated by each
user.
'''
__author__ = 'Marcel Caraciolo <[email protected]>'
from mrjob.job import MRJob
class MoviesCount(MRJob):
def mapper(self, key, line):
"""
Mapper: send score from a single movie to
other movies
"""
#user_id, item_id, rating = line.split('|')
#yield item_id, (user_id, rating, 1)
user_id, item_id, rating = line.split('|')
yield item_id, (user_id, float(rating))
def reducer(self, movie, values):
#yield(movie, sum(values))
total = 0
final = []
for user_id, rating in values:
total += 1
final.append((user_id, rating))
for user_id, rating in final:
yield '%s|%s|%.2f|%d' % (user_id, movie, rating, total), None
if __name__ == '__main__':
MoviesCount.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment