Skip to content

Instantly share code, notes, and snippets.

@ppseprus
Forked from jabney/entropy.js
Last active August 22, 2024 10:13
Show Gist options
  • Save ppseprus/afab8500dec6394c401734cb6922d220 to your computer and use it in GitHub Desktop.
Save ppseprus/afab8500dec6394c401734cb6922d220 to your computer and use it in GitHub Desktop.
Javascript implementation of a Shannon entropy calculation in bits per symbol
// Shannon entropy
const entropy = str => {
return [...new Set(str)]
.map(chr => {
return str.match(new RegExp(chr, 'g')).length;
})
.reduce((sum, frequency) => {
let p = frequency / str.length;
return sum + p * Math.log2(1 / p);
}, 0);
};
// entropy.js MIT License © 2014 James Abney http://github.com/jabney
// ES6 portation MIT License © 2017 Peter Seprus http://github.com/ppseprus
// Calculate the Shannon entropy of a string in bits per symbol.
(function (shannon) {
'use strict';
// Create an array of character frequencies.
const getFrequencies = str => {
let dict = new Set(str);
return [...dict].map(chr => {
return str.match(new RegExp(chr, 'g')).length;
});
};
// Measure the entropy of a string in bits per symbol.
shannon.entropy = str => getFrequencies(str)
.reduce((sum, frequency) => {
let p = frequency / str.length;
return sum - (p * Math.log(p) / Math.log(2));
}, 0);
// Measure the entropy of a string in total bits.
shannon.bits = str => shannon.entropy(str) * str.length;
// Log the entropy of a string to the console.
shannon.log = str => console.log(`Entropy of "${str}" in bits per symbol:`, shannon.entropy(str));
})(window.shannon = window.shannon || Object.create(null));
shannon.log('1223334444'); // 1.8464393446710154
shannon.log('0'); // 0
shannon.log('01'); // 1
shannon.log('0123'); // 2
shannon.log('01234567'); // 3
shannon.log('0123456789abcdef'); // 4
@tiagocesar
Copy link

Amazing, thanks for sharing!

@KooiInc
Copy link

KooiInc commented Aug 22, 2024

new RegExp(chr, 'g') will fail with unescaped characters like +*{} etc. This may be circumvented by using new RegExp('[' + chr + ']', 'g'). Or by not using a RegExp at all:

function frequencies(str) {
  const res = {};
  str = [...str];
  for (let chr of str) { res[chr] = (res[chr] ?? 0) + 1;  }
  return Object.values(res);
}

@ppseprus
Copy link
Author

This is a fork from 2017, so that I'd have a copy... You should probably to to the original.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment