Last active
January 7, 2025 21:17
-
-
Save jabney/5018b4adc9b2bf488696 to your computer and use it in GitHub Desktop.
Javascript implementation of a Shannon entropy calculation in bits per symbol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// entropy.js MIT License © 2014 James Abney http://github.com/jabney | |
/*************************************** | |
* ES2015 | |
***************************************/ | |
// Shannon entropy in bits per symbol. | |
function entropy(str) { | |
const len = str.length | |
// Build a frequency map from the string. | |
const frequencies = Array.from(str) | |
.reduce((freq, c) => (freq[c] = (freq[c] || 0) + 1) && freq, {}) | |
// Sum the frequency of each character. | |
return Object.values(frequencies) | |
.reduce((sum, f) => sum - f/len * Math.log2(f/len), 0) | |
} | |
console.log(entropy('1223334444')) // 1.8464393446710154 | |
console.log(entropy('0')) // 0 | |
console.log(entropy('01')) // 1 | |
console.log(entropy('0123')) // 2 | |
console.log(entropy('01234567')) // 3 | |
console.log(entropy('0123456789abcdef')) // 4 | |
/*************************************** | |
* ES5 | |
***************************************/ | |
// Calculate the Shannon entropy of a string in bits per symbol. | |
(function(shannon) { | |
'use strict'; | |
// Create a dictionary of character frequencies and iterate over it. | |
function process(s, evaluator) { | |
var h = Object.create(null), k; | |
s.split('').forEach(function(c) { | |
h[c] && h[c]++ || (h[c] = 1); }); | |
if (evaluator) for (k in h) evaluator(k, h[k]); | |
return h; | |
}; | |
// Measure the entropy of a string in bits per symbol. | |
shannon.entropy = function(s) { | |
var sum = 0,len = s.length; | |
process(s, function(k, f) { | |
var p = f/len; | |
sum -= p * Math.log(p) / Math.log(2); | |
}); | |
return sum; | |
}; | |
// Measure the entropy of a string in total bits. | |
shannon.bits = function(s) { | |
return shannon.entropy(s) * s.length; | |
}; | |
// Log the entropy of a string to the console. | |
shannon.log = function(s) { | |
console.log('Entropy of "' + s + '" in bits per symbol:', shannon.entropy(s)); | |
}; | |
})(window.shannon = window.shannon || Object.create(null)); | |
shannon.log('1223334444'); // 1.8464393446710154 | |
shannon.log('0'); // 0 | |
shannon.log('01'); // 1 | |
shannon.log('0123'); // 2 | |
shannon.log('01234567'); // 3 | |
shannon.log('0123456789abcdef'); // 4 |
Thank you very much, quite useful!
Glad you found it helpful. Here's an updated (and more concise) version for ECMAScript 2015:
// Shannon entropy in bits per symbol.
function entropy(str) {
const len = str.length
// Build a frequency map from the string.
const frequencies = Array.from(str)
.reduce((freq, c) => (freq[c] = (freq[c] || 0) + 1) && freq, {})
// Sum the frequency of each character.
return Object.values(frequencies)
.reduce((sum, f) => sum - f/len * Math.log2(f/len), 0)
}
console.log(entropy('1223334444')) // 1.8464393446710154
console.log(entropy('0')) // 0
console.log(entropy('01')) // 1
console.log(entropy('0123')) // 2
console.log(entropy('01234567')) // 3
console.log(entropy('0123456789abcdef')) // 4
And here's a version for people who don't have an exobrain and don't wear the Hubble telescope as glasses:
function entropy(text) {
var textLength = text.length;
// find symbolCount of all symbols
var symbolCount = {};
for (var i = 0; i < textLength; i++) {
var symbol = text[i];
if (symbolCount[symbol] === undefined) {
symbolCount[symbol] = 1;
}
else {
symbolCount[symbol]++;
}
}
var complexity = 0;
var allCounts = Object.values(symbolCount);
var allCountsLength = allCounts.length;
for (var i = 0; i < allCountsLength; i++) {
complexity = complexity - allCounts[i]/textLength * Math.log2(allCounts[i]/textLength);
}
return complexity;
}
Thanks for this @jabney ! Just what I needed after busting my head trying to do figure out the Math behind this the last two nights.
I should add that was really thoughtful of you to provide different iterations of this through the years, and for demonstrating good use of built-in objects and methods.
Maybe its just a style I'm familiar with, but somehow, the currying and closures made it easy to navigate the codes without even paying much attention to the inline comments.
Love it! And thanks again for sharing!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you very much, quite useful!