Created
August 14, 2008 21:05
-
-
Save jessehattabaugh/5496 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function cron_analyze_users(){ | |
// get a user that needs to be analyzed | |
var subject = storage.users.sort(lastAnalyzed).first() | |
// make a hash of their interests and the difference between the volume of | |
// the term for the user and the volume of the term on the whole site. | |
var interests = [] | |
storage.interests.filter({user:subject,relevant:true}).forEach(function(i){ | |
interests[i.term.string] = i.difference | |
}) | |
// this is a long way of sorting users by the last time they were compared to the subject user | |
var analyzed = [] | |
storage.relations.filter({subject:subject}).forEach(function(r){ | |
analyzed[r.object.id] = r.analyzed | |
}) | |
storage.users.sort(function(a,b){ | |
if(!analyzed[a.id] && analyzed[b.id]) return 1 | |
else if(analyzed[a.id] && !analyzed[b.id]) return -1 | |
else if(analyzed[a.id] && analyzed[b.id]) return analyzed[a.id] - analyzed[b.id] | |
else return a.created - b.created | |
}).limit(20).forEach(function(object){ | |
if(object.email != subject.email){ // don't compare a user to themselves | |
relation = storage.relations.filter({subject:subject, object:object}).first() | |
if(!relation) relation = storage.relations.add({subject:subject, object:object}) | |
relation.euclidean = euclidean(subject, object, interests) | |
relation.pearson = pearson(subject, object, interests) | |
relation.tanimoto = tanimoto(subject, object, interests) | |
relation.analyzed = new Date() | |
// copy all that to the flip side of this relationship, screw normal-form! | |
flip = storage.relations.filter({subject:object, object:subject}).first() | |
if(!flip) flip = storage.relations.add({subject:object, object:subject}) | |
flip.euclidean = relation.euclidean | |
flip.pearson = relation.pearson | |
flip.tanimoto = relation.tanimoto | |
flip.analyzed = new Date() | |
} | |
}) | |
subject.analyzed = new Date() | |
} | |
function euclidean(subject, object, interests){ | |
sum = 0 | |
storage.interests.filter({user:object,relevant:true}).forEach(function(i){ | |
if(interests[i.term.string]){ | |
sum += Math.pow(interests[i.term.string] - i.difference, 2) | |
} | |
}) | |
if(sum) return 1/(1+Math.sqrt(sum)) // convert to number between 1-0 | |
else return false | |
} | |
function pearson(subject, object, interests){ | |
var shared = 0 | |
var subjectSum = 0 | |
var objectSum = 0 | |
var subjectSqSum = 0 | |
var objectSqSum = 0 | |
var productSum = 0 | |
storage.interests.filter({user:object,relevant:true}).forEach(function(i){ | |
if(interests[i.term.string]){ | |
shared++ | |
subjectSum += interests[i.term.string] | |
objectSum += i.difference | |
subjectSqSum += Math.pow(interests[i.term.string],2) | |
objectSqSum += Math.pow(i.difference,2) | |
productSum += interests[i.term.string] * i.difference | |
} | |
}) | |
if(shared) { | |
num = productSum - (subjectSum * objectSum / shared) | |
den = Math.sqrt( | |
(subjectSqSum-Math.pow(subjectSum,2) / shared) * | |
(objectSqSum -Math.pow(objectSum ,2) / shared) | |
) | |
if(den) return Math.round((num / den) * 100)/100 | |
} | |
return false | |
} | |
function tanimoto(subject, object, interests){ | |
var shared = 0 | |
var subjectSum = 0 | |
var objectSum = 0 | |
var termSum = 0 | |
storage.interests.filter({user:object,relevant:true}).forEach(function(i){ | |
if(interests[i.term.string]){ | |
shared++ | |
subjectSum += interests[i.term.string] | |
objectSum += i.difference | |
termSum += i.term.difference | |
} | |
}) | |
if(shared) { | |
num = subjectSum * objectSum / shared | |
den = (Math.pow(termSum,2) / shared) * 2 - (subjectSum * objectSum) / shared | |
if(den) return num/den | |
} | |
return false | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment