Created
August 22, 2012 20:25
-
-
Save marcelcaraciolo/3429048 to your computer and use it in GitHub Desktop.
VectorSimilarities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class VectorSimilarities(MRJob): | |
def steps(self): | |
return [self.mr(self.input, | |
self.group_by_user_rating), | |
self.mr(None, self.count_ratings_users_freq), | |
self.mr(self.pairwise_items, self.calculate_similarity), | |
self.mr(self.calculate_ranking, self.top_similar_items) | |
] | |
def configure_options(self): | |
super(VectorSimilarities, self).configure_options() | |
self.add_passthrough_option( | |
'--priorcount', dest='prior_count', default=10, type='int', | |
help='PRIOR_COUNT: Parameter to regularize correlation') | |
self.add_passthrough_option( | |
'--priorcorrelation', dest='prior_correlation', default=0, | |
type='int', | |
help='PRIOR_CORRELATION: Parameter to regularize correlation') | |
self.add_passthrough_option( | |
'--minraters', dest='min_num_raters', default=3, type='int', | |
help='the minimum number of raters') | |
self.add_passthrough_option( | |
'--maxraters', dest='max_num_raters', default=10000, type='int', | |
help='the maximum number of raters') | |
self.add_passthrough_option( | |
'--minintersec', dest='min_intersection', default=0, type='int', | |
help='the minimum intersection') | |
def input(self, key, line): | |
''' | |
Subclasses should override this to define their own input | |
''' | |
raise NotImplementedError('Implement this in the subclass') | |
... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment