Last active
March 8, 2019 06:58
-
-
Save meddulla/f545a6a521029313bb17dd796573c699 to your computer and use it in GitHub Desktop.
Claude Shannon's entropy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import math | |
| import itertools as it | |
| def window(iterable, size): | |
| shiftedStarts = [it.islice(iterable, s, None) for s in range(size)] | |
| return zip(*shiftedStarts) | |
| def calculate_shannon_entropy(mystring, ngram=1): | |
| as_list = list(mystring) | |
| chunked_items = list(window(as_list, ngram)) | |
| total_chars = float(len(chunked_items)) | |
| sorted_uniques = sorted(set(chunked_items), key=chunked_items.index) | |
| freq_table = [chunked_items.count(l)/total_chars for l in sorted_uniques] | |
| entropies = [] | |
| for letter_freq in freq_table: | |
| entropy = letter_freq * math.log2(letter_freq) | |
| entropies.append(entropy) | |
| return -sum(entropies) | |
| # print(calculate_shannon_entropy("aaaaaaaaaaaaaaaaaa")) | |
| # print(calculate_shannon_entropy("abababababababababab")) | |
| # print(calculate_shannon_entropy("12313542132422344")) | |
| # As the ngram param goes up, the certainty regarding the prediction goes up and so the entropy goes down | |
| print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=1)) # 4.7 | |
| print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=2)) # 4.643856189774723 | |
| print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=3)) # 4.584962500721156 | |
| print(calculate_shannon_entropy("abcdefghijklmnopqrstuvwxyz", ngram=4)) # 4.523561956057013 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import math | |
| def calculate_shannon_entropy(mystring): | |
| total_chars = float(len(mystring)) | |
| sorted_uniques = sorted(set(mystring), key=mystring.index) | |
| freq_table = [mystring.count(l)/total_chars for l in sorted_uniques] | |
| entropies = [] | |
| for letter_freq in freq_table: | |
| entropy = letter_freq * math.log2(letter_freq) | |
| entropies.append(entropy) | |
| return -sum(entropies) | |
| #print(calculate_entropy("anasofia")) | |
| print(calculate_shannon_entropy("12313542132422344")) | |
| print(calculate_shannon_entropy("12111111111111111")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment