Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 16, 2012 20:25
Show Gist options
  • Save marcelcaraciolo/3373360 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/3373360 to your computer and use it in GitHub Desktop.
metrics
from math import sqrt
def correlation(size, dot_product, rating_sum, \
rating2sum, rating_norm_squared, rating2_norm_squared):
'''
The correlation between two vectors A, B is
cov(A, B) / (stdDev(A) * stdDev(B))
'''
numerator = size * dot_product - rating_sum * rating2sum
denominator = sqrt(size * rating_norm_squared - rating_sum * rating_sum) * \
sqrt(size * rating2_norm_squared - rating2sum * rating2sum)
return (numerator / (float(denominator))) if denominator else 0.0
def normalized_correlation(size, dot_product, rating_sum, \
rating2sum, rating_norm_squared, rating2_norm_squared):
'''
The correlation between two vectors A, B is
cov(A, B) / (stdDev(A) * stdDev(B))
The normalization is to give the scale between [0,1].
'''
similarity = correlation(size, dot_product, rating_sum, \
rating2sum, rating_norm_squared, rating2_norm_squared)
return (similarity + 1.0) / 2.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment