-
-
Save ppseprus/afab8500dec6394c401734cb6922d220 to your computer and use it in GitHub Desktop.
Javascript implementation of a Shannon entropy calculation in bits per symbol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Shannon entropy | |
const entropy = str => { | |
return [...new Set(str)] | |
.map(chr => { | |
return str.match(new RegExp(chr, 'g')).length; | |
}) | |
.reduce((sum, frequency) => { | |
let p = frequency / str.length; | |
return sum + p * Math.log2(1 / p); | |
}, 0); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// entropy.js MIT License © 2014 James Abney http://github.com/jabney | |
// ES6 portation MIT License © 2017 Peter Seprus http://github.com/ppseprus | |
// Calculate the Shannon entropy of a string in bits per symbol. | |
(function (shannon) { | |
'use strict'; | |
// Create an array of character frequencies. | |
const getFrequencies = str => { | |
let dict = new Set(str); | |
return [...dict].map(chr => { | |
return str.match(new RegExp(chr, 'g')).length; | |
}); | |
}; | |
// Measure the entropy of a string in bits per symbol. | |
shannon.entropy = str => getFrequencies(str) | |
.reduce((sum, frequency) => { | |
let p = frequency / str.length; | |
return sum - (p * Math.log(p) / Math.log(2)); | |
}, 0); | |
// Measure the entropy of a string in total bits. | |
shannon.bits = str => shannon.entropy(str) * str.length; | |
// Log the entropy of a string to the console. | |
shannon.log = str => console.log(`Entropy of "${str}" in bits per symbol:`, shannon.entropy(str)); | |
})(window.shannon = window.shannon || Object.create(null)); | |
shannon.log('1223334444'); // 1.8464393446710154 | |
shannon.log('0'); // 0 | |
shannon.log('01'); // 1 | |
shannon.log('0123'); // 2 | |
shannon.log('01234567'); // 3 | |
shannon.log('0123456789abcdef'); // 4 |
new RegExp(chr, 'g')
will fail with unescaped characters like +*{}
etc. This may be circumvented by using new RegExp('[' + chr + ']', 'g')
. Or by not using a RegExp
at all:
function frequencies(str) {
const res = {};
str = [...str];
for (let chr of str) { res[chr] = (res[chr] ?? 0) + 1; }
return Object.values(res);
}
This is a fork from 2017, so that I'd have a copy... You should probably to to the original.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Amazing, thanks for sharing!