Skip to content

Instantly share code, notes, and snippets.

@meddulla
Last active March 8, 2019 06:58
Show Gist options
  • Save meddulla/f545a6a521029313bb17dd796573c699 to your computer and use it in GitHub Desktop.
Save meddulla/f545a6a521029313bb17dd796573c699 to your computer and use it in GitHub Desktop.
Claude Shannon's entropy
import math
import itertools as it
def window(iterable, size):
shiftedStarts = [it.islice(iterable, s, None) for s in range(size)]
return zip(*shiftedStarts)
def calculate_shannon_entropy(mystring, ngram=1):
as_list = list(mystring)
chunked_items = list(window(as_list, ngram))
total_chars = float(len(chunked_items))
sorted_uniques = sorted(set(chunked_items), key=chunked_items.index)
freq_table = [chunked_items.count(l)/total_chars for l in sorted_uniques]
entropies = []
for letter_freq in freq_table:
entropy = letter_freq * math.log2(letter_freq)
entropies.append(entropy)
return -sum(entropies)
# print(calculate_shannon_entropy("aaaaaaaaaaaaaaaaaa"))
# print(calculate_shannon_entropy("abababababababababab"))
# print(calculate_shannon_entropy("12313542132422344"))
# As the ngram param goes up, the certainty regarding the prediction goes up and so the entropy goes down
print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=1)) # 4.7
print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=2)) # 4.643856189774723
print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=3)) # 4.584962500721156
print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=4)) # 4.523561956057013
import math
def calculate_shannon_entropy(mystring):
total_chars = float(len(mystring))
sorted_uniques = sorted(set(mystring), key=mystring.index)
freq_table = [mystring.count(l)/total_chars for l in sorted_uniques]
entropies = []
for letter_freq in freq_table:
entropy = letter_freq * math.log2(letter_freq)
entropies.append(entropy)
return -sum(entropies)
#print(calculate_entropy("anasofia"))
print(calculate_shannon_entropy("12313542132422344"))
print(calculate_shannon_entropy("12111111111111111"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment