Forked from jaradc/entropy_calculation_in_python.py
Created
September 6, 2020 21:53
-
-
Save virtadpt/a129f94e47c113f983a1ee361f837eb8 to your computer and use it in GitHub Desktop.
Four different ways to calculate entropy in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.stats import entropy | |
from math import log, e | |
import pandas as pd | |
import timeit | |
def entropy1(labels, base=None): | |
value,counts = np.unique(labels, return_counts=True) | |
return entropy(counts, base=base) | |
def entropy2(labels, base=None): | |
""" Computes entropy of label distribution. """ | |
n_labels = len(labels) | |
if n_labels <= 1: | |
return 0 | |
value,counts = np.unique(labels, return_counts=True) | |
probs = counts / n_labels | |
n_classes = np.count_nonzero(probs) | |
if n_classes <= 1: | |
return 0 | |
ent = 0. | |
# Compute entropy | |
base = e if base is None else base | |
for i in probs: | |
ent -= i * log(i, base) | |
return ent | |
def entropy3(labels, base=None): | |
vc = pd.Series(labels).value_counts(normalize=True, sort=False) | |
base = e if base is None else base | |
return -(vc * np.log(vc)/np.log(base)).sum() | |
def entropy4(labels, base=None): | |
value,counts = np.unique(labels, return_counts=True) | |
norm_counts = counts / counts.sum() | |
base = e if base is None else base | |
return -(norm_counts * np.log(norm_counts)/np.log(base)).sum() | |
labels = [1,3,5,2,3,5,3,2,1,3,4,5] | |
print(entropy1(labels)) | |
print(entropy2(labels)) | |
print(entropy3(labels)) | |
print(entropy4(labels)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment