-
-
Save matt-west/6500993 to your computer and use it in GitHub Desktop.
| /** | |
| * @fileoverview Pearson correlation score algorithm. | |
| * @author [email protected] (Matt West) | |
| * @license Copyright 2013 Matt West. | |
| * Licensed under MIT (http://opensource.org/licenses/MIT). | |
| */ | |
| /** | |
| * Calculate the person correlation score between two items in a dataset. | |
| * | |
| * @param {object} prefs The dataset containing data about both items that | |
| * are being compared. | |
| * @param {string} p1 Item one for comparison. | |
| * @param {string} p2 Item two for comparison. | |
| * @return {float} The pearson correlation score. | |
| */ | |
| function pearsonCorrelation(prefs, p1, p2) { | |
| var si = []; | |
| for (var key in prefs[p1]) { | |
| if (prefs[p2][key]) si.push(key); | |
| } | |
| var n = si.length; | |
| if (n == 0) return 0; | |
| var sum1 = 0; | |
| for (var i = 0; i < si.length; i++) sum1 += prefs[p1][si[i]]; | |
| var sum2 = 0; | |
| for (var i = 0; i < si.length; i++) sum2 += prefs[p2][si[i]]; | |
| var sum1Sq = 0; | |
| for (var i = 0; i < si.length; i++) { | |
| sum1Sq += Math.pow(prefs[p1][si[i]], 2); | |
| } | |
| var sum2Sq = 0; | |
| for (var i = 0; i < si.length; i++) { | |
| sum2Sq += Math.pow(prefs[p2][si[i]], 2); | |
| } | |
| var pSum = 0; | |
| for (var i = 0; i < si.length; i++) { | |
| pSum += prefs[p1][si[i]] * prefs[p2][si[i]]; | |
| } | |
| var num = pSum - (sum1 * sum2 / n); | |
| var den = Math.sqrt((sum1Sq - Math.pow(sum1, 2) / n) * | |
| (sum2Sq - Math.pow(sum2, 2) / n)); | |
| if (den == 0) return 0; | |
| return num / den; | |
| } |
@joracha your version is perfect! It's fastest version at this time!
Unfortunately I found that it doesn't respect to "empty" values, but it have to.
I've improved it a bit:
function pearson (x, y) {
const promedio = l => l.reduce((s, a) => s + a, 0) / l.length
const calc = (v, prom) => Math.sqrt(v.reduce((s, a) => (s + a * a), 0) - n * prom * prom)
let n = x.length
let nn = 0
for (let i = 0; i < n; i++, nn++) {
if ((!x[i] && x[i] !== 0) || (!y[i] && y[i] !== 0)) {
nn--
continue
}
x[nn] = x[i]
y[nn] = y[i]
}
if (n !== nn) {
x = x.splice(0, nn)
y = y.splice(0, nn)
n = nn
}
const prom_x = promedio(x), prom_y = promedio(y)
return (x
.map((e, i) => ({ x: e, y: y[i] }))
.reduce((v, a) => v + a.x * a.y, 0) - n * prom_x * prom_y
) / (calc(x, prom_x) * calc(y, prom_y))
}Test showed that the speed degradation is insignificant (6.92%):
// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')
const series1 = [80.026413,80.330908,76.68564,72.095302,75.473899,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]
console.log('initialPearson', initialPearson(series1, series2))
console.log('improvedPearson', improvedPearson(series1, series2))
benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)Output
$ node test.js
initialPearson 0.6917288370450843
improvedPearson 0.6917288370450843
[
{
name: 'initialPearson',
elapsed: 4397,
checks: 3,
totalIterations: 48518134,
perSecondIterations: 11034372,
isOptimized: null,
diff: '0%'
},
{
name: 'improvedPearson',
elapsed: 4395,
checks: 3,
totalIterations: 45139154,
perSecondIterations: 10270569,
isOptimized: null,
diff: '6.92%'
}
]In case of having a null it works as expected (and even faster actually):
// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')
const series1 = [80.026413,80.330908,76.68564,72.095302,null,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]
benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)$ node test.js
initialPearson 0.5995223578720159
improvedPearson 0.6570987279478532
[
{
name: 'initialPearson',
elapsed: 4136,
checks: 2,
totalIterations: 26454098,
perSecondIterations: 6396058,
isOptimized: null,
diff: '0%'
},
{
name: 'improvedPearson',
elapsed: 4139,
checks: 2,
totalIterations: 26027407,
perSecondIterations: 6288332,
isOptimized: null,
diff: '1.68%'
}
]Thank you!
The above code has a bug, n is used before being defined.
let n = x.length
needs to be before the const calc ... line.
The above code has a bug, n is used before being defined.
let n = x.lengthneeds to be before the
const calc ...line.
No it hasn't. The third line defines function calc without calling it.
The above code has a bug, n is used before being defined.
let n = x.length
needs to be before theconst calc ...line.No it hasn't. The third line defines function
calcwithout calling it.
Well, it's still bad form. You're referencing a global that's defined later in scope and generally that's bad form.
The current version of ESLint doesn't like this, but as you're only giving an example here I guess it might be fine.

This is my version!. I just did it as a practice for an exam that I have soon