Created
December 16, 2018 19:18
-
-
Save berstend/5864e430e9d29aa9ad9f46bcc27fa4e3 to your computer and use it in GitHub Desktop.
Given star ratings and votes, calculate bayesian estimate ratings to sort by popularity (taking number of votes into consideration)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
bayesian rating = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C | |
where: | |
R = stars for the entry | |
v = number of votes for the entry | |
m = minimum votes required to be listed (eg. 10) | |
C = the mean stars across the whole list (eg. 4.1) | |
This rating only includes entries that have at least 10 votes. | |
The bayesian estimate is a statistical technique used to reduce the noise | |
due to low sample counts. In effect, the less an entry has votes, the more | |
it is pulled towards the mean (e.g. 4.1). In other words, these are the entry | |
that many people agree are great. | |
https://www.animenewsnetwork.com/encyclopedia/ratings-anime.php#best_bayesian | |
*/ | |
const calculateBayesianEstimate = (R,v,m,C)=>{ | |
return (v / (v + m)) * R + (m / (v + m)) * C | |
} | |
// calculate mean (= average) | |
const mean = arr=>arr.reduce((p,c)=>p + c, 0) / arr.length; | |
const entries = [{ | |
id: 1, | |
stars: 4.5, | |
votes: 700 | |
}, { | |
id: 2, | |
stars: 3.7, | |
votes: 1000 | |
}, { | |
id: 3, | |
stars: 5, | |
votes: 10 | |
}, { | |
id: 4, | |
stars: 4.8, | |
votes: 169 | |
}, { | |
id: 5, | |
stars: 2.3, | |
votes: 8000 | |
}, ] | |
const totalVotesMean = mean(entries.map(e=>e.votes)) | |
const totalStarsMean = mean(entries.map(e=>e.stars)) | |
const minVotes = totalVotesMean * 0.1 // 10 percent of mean votes is minimum | |
console.log({ | |
totalVotesMean, | |
totalStarsMean, | |
minVotes | |
}) | |
// => {totalVotesMean: 1975.8, totalStarsMean: 4.0600000000000005, minVotes: 197.58} | |
for (const entry of entries) { | |
const rank = calculateBayesianEstimate(entry.stars, entry.votes, minVotes, totalStarsMean) | |
entry.rank = rank | |
} | |
console.log(entries.sort((a,b)=>a.rank - b.rank).reverse()) | |
/* => | |
[ | |
{ | |
"id": 1, | |
"stars": 4.5, | |
"votes": 700, | |
"rank": 4.403144900733082 | |
}, | |
{ | |
"id": 4, | |
"stars": 4.8, | |
"votes": 169, | |
"rank": 4.401153363522287 | |
}, | |
{ | |
"id": 3, | |
"stars": 5, | |
"votes": 10, | |
"rank": 4.105283746025629 | |
}, | |
{ | |
"id": 2, | |
"stars": 3.7, | |
"votes": 1000, | |
"rank": 3.759393777451194 | |
}, | |
{ | |
"id": 5, | |
"stars": 2.3, | |
"votes": 8000, | |
"rank": 2.3424199336877467 | |
} | |
] | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This took way too long to figure out 😅