Skip to content

Instantly share code, notes, and snippets.

@matt-west
Created September 9, 2013 20:24
Show Gist options
  • Select an option

  • Save matt-west/6500993 to your computer and use it in GitHub Desktop.

Select an option

Save matt-west/6500993 to your computer and use it in GitHub Desktop.
Pearson Correlation (JavaScript)
/**
* @fileoverview Pearson correlation score algorithm.
* @author matt.west@kojilabs.com (Matt West)
* @license Copyright 2013 Matt West.
* Licensed under MIT (http://opensource.org/licenses/MIT).
*/
/**
* Calculate the person correlation score between two items in a dataset.
*
* @param {object} prefs The dataset containing data about both items that
* are being compared.
* @param {string} p1 Item one for comparison.
* @param {string} p2 Item two for comparison.
* @return {float} The pearson correlation score.
*/
function pearsonCorrelation(prefs, p1, p2) {
var si = [];
for (var key in prefs[p1]) {
if (prefs[p2][key]) si.push(key);
}
var n = si.length;
if (n == 0) return 0;
var sum1 = 0;
for (var i = 0; i < si.length; i++) sum1 += prefs[p1][si[i]];
var sum2 = 0;
for (var i = 0; i < si.length; i++) sum2 += prefs[p2][si[i]];
var sum1Sq = 0;
for (var i = 0; i < si.length; i++) {
sum1Sq += Math.pow(prefs[p1][si[i]], 2);
}
var sum2Sq = 0;
for (var i = 0; i < si.length; i++) {
sum2Sq += Math.pow(prefs[p2][si[i]], 2);
}
var pSum = 0;
for (var i = 0; i < si.length; i++) {
pSum += prefs[p1][si[i]] * prefs[p2][si[i]];
}
var num = pSum - (sum1 * sum2 / n);
var den = Math.sqrt((sum1Sq - Math.pow(sum1, 2) / n) *
(sum2Sq - Math.pow(sum2, 2) / n));
if (den == 0) return 0;
return num / den;
}
@nahidakbar
Copy link
Copy Markdown

function pearson(values) {
const n = values.length;

if (n == 0) return 0;

let meanX = 0;
let meanY = 0;
for (var i = 0; i < n; i++) {
meanX += values[i].x / n
meanY += values[i].y / n
}

let num = 0;
let den1 = 0;
let den2 = 0;

for (var i = 0; i < n; i++) {
let dx = (values[i].x - meanX);
let dy = (values[i].y - meanY);
num += dx * dy
den1 += dx * dx
den2 += dy * dy
}

const den = Math.sqrt(den1) * Math.sqrt(den2);

if (den == 0) return 0;

return num / den;
}

pearson([{x: 0, y: 0}, {x:1, y:1}])

Copy link
Copy Markdown

ghost commented Jun 7, 2018

thank u ,i just need this

@comfuture
Copy link
Copy Markdown

I've rewrote this function in es6.
The code is more concise, but it does not work with associative arrays that were possible in the original code.

/**
 * calculates pearson correlation
 * @param {number[]} d1
 * @param {number[]} d2
 */
export function corr(d1, d2) {
  let { min, pow, sqrt } = Math
  let add = (a, b) => a + b
  let n = min(d1.length, d2.length)
  if (n === 0) {
    return 0
  }
  [d1, d2] = [d1.slice(0, n), d2.slice(0, n)]
  let [sum1, sum2] = [d1, d2].map(l => l.reduce(add))
  let [pow1, pow2] = [d1, d2].map(l => l.reduce((a, b) => a + pow(b, 2), 0))
  let mulSum = d1.map((n, i) => n * d2[i]).reduce(add)
  let dense = sqrt((pow1 - pow(sum1, 2) / n) * (pow2 - pow(sum2, 2) / n))
  if (dense === 0) {
    return 0
  }
  return (mulSum - (sum1 * sum2 / n)) / dense
}

@joracha
Copy link
Copy Markdown

joracha commented Nov 10, 2019

function pearson(x, y){
	let promedio = (lista) => { return lista.reduce((s, a) => s + a, 0) / lista.length };
	let n = x.length, prom_x = promedio(x) , prom_y = promedio(y);
	return (x.map( (e, i, r) => (r = {x:e, y:y[i]}) ).reduce( (s, a) => s + a.x * a.y, 0) - n * prom_x * prom_y) / 
	((Math.sqrt(x.reduce( (s, a) => (s + a * a) , 0) - n * prom_x * prom_x)) *
	(Math.sqrt(y.reduce( (s, a) => (s + a * a) , 0) - n * prom_y * prom_y)));
} 

This is my version!. I just did it as a practice for an exam that I have soon

@mbaev
Copy link
Copy Markdown

mbaev commented Apr 24, 2021

@joracha your version is perfect! It's fastest version at this time!

Unfortunately I found that it doesn't respect to "empty" values, but it have to.

image

I've improved it a bit:

function pearson (x, y) {
  const promedio = l => l.reduce((s, a) => s + a, 0) / l.length
  const calc = (v, prom) => Math.sqrt(v.reduce((s, a) => (s + a * a), 0) - n * prom * prom)
  let n = x.length
  let nn = 0
  for (let i = 0; i < n; i++, nn++) {
    if ((!x[i] && x[i] !== 0) || (!y[i] && y[i] !== 0)) {
      nn--
      continue
    }
    x[nn] = x[i]
    y[nn] = y[i]
  }
  if (n !== nn) {
    x = x.splice(0, nn)
    y = y.splice(0, nn)
    n = nn
  }
  const prom_x = promedio(x), prom_y = promedio(y)
  return (x
      .map((e, i) => ({ x: e, y: y[i] }))
      .reduce((v, a) => v + a.x * a.y, 0) - n * prom_x * prom_y
  ) / (calc(x, prom_x) * calc(y, prom_y))
}

Test showed that the speed degradation is insignificant (6.92%):

// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')

const series1 = [80.026413,80.330908,76.68564,72.095302,75.473899,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]

console.log('initialPearson', initialPearson(series1, series2))
console.log('improvedPearson', improvedPearson(series1, series2))

benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)

Output

$ node test.js
initialPearson 0.6917288370450843
improvedPearson 0.6917288370450843
[
  {
    name: 'initialPearson',
    elapsed: 4397,
    checks: 3,
    totalIterations: 48518134,
    perSecondIterations: 11034372,
    isOptimized: null,
    diff: '0%'
  },
  {
    name: 'improvedPearson',
    elapsed: 4395,
    checks: 3,
    totalIterations: 45139154,
    perSecondIterations: 10270569,
    isOptimized: null,
    diff: '6.92%'
  }
]

In case of having a null it works as expected (and even faster actually):

// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')

const series1 = [80.026413,80.330908,76.68564,72.095302,null,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]

benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)
$ node test.js
initialPearson 0.5995223578720159
improvedPearson 0.6570987279478532
[
  {
    name: 'initialPearson',
    elapsed: 4136,
    checks: 2,
    totalIterations: 26454098,
    perSecondIterations: 6396058,
    isOptimized: null,
    diff: '0%'
  },
  {
    name: 'improvedPearson',
    elapsed: 4139,
    checks: 2,
    totalIterations: 26027407,
    perSecondIterations: 6288332,
    isOptimized: null,
    diff: '1.68%'
  }
]

Thank you!

@netik
Copy link
Copy Markdown

netik commented Jul 7, 2021

The above code has a bug, n is used before being defined.

let n = x.length

needs to be before the const calc ... line.

@mbaev
Copy link
Copy Markdown

mbaev commented Jul 11, 2021

The above code has a bug, n is used before being defined.

let n = x.length

needs to be before the const calc ... line.

No it hasn't. The third line defines function calc without calling it.

@netik
Copy link
Copy Markdown

netik commented Jul 11, 2021

The above code has a bug, n is used before being defined.
let n = x.length
needs to be before the const calc ... line.

No it hasn't. The third line defines function calc without calling it.

Well, it's still bad form. You're referencing a global that's defined later in scope and generally that's bad form.
The current version of ESLint doesn't like this, but as you're only giving an example here I guess it might be fine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment