Skip to content

Instantly share code, notes, and snippets.

@theabbie
Last active April 10, 2022 05:10
Show Gist options
  • Save theabbie/c903a774aed3716a18e773da154beb72 to your computer and use it in GitHub Desktop.
Save theabbie/c903a774aed3716a18e773da154beb72 to your computer and use it in GitHub Desktop.
Huffman coding implementation
from collections import Counter
import heapq
class Node:
def __init__(self, char = None, left = None, right = None):
self.char = char
self.left = left
self.right = right
def __gt__(self, node):
return True
class Huffman:
def getLookup(s):
ctr = Counter(s)
heap = []
for c, count in ctr.items():
heapq.heappush(heap, (count, Node(char = c)))
while len(heap) > 1:
a, anode = heapq.heappop(heap)
b, bnode = heapq.heappop(heap)
heapq.heappush(heap, (a + b, Node(left = anode, right = bnode)))
rootctr, root = heapq.heappop(heap)
lookup = {}
nodes = [(root, "")]
while len(nodes) > 0:
curr, currpath = nodes.pop()
if curr:
if curr.char != None:
lookup[curr.char] = currpath
nodes.append((curr.left, currpath + "0"))
nodes.append((curr.right, currpath + "1"))
return lookup
def invlookup(lookup):
inv = {}
for c, code in lookup.items():
inv[code] = c
return inv
def encode(s, lookup):
return "".join([lookup[c] for c in s])
def decode(encoded, lookup):
n = len(encoded)
decoded = []
i = 0
for j in range(n + 1):
if encoded[i:j] in lookup:
decoded.append(lookup[encoded[i:j]])
i = j
return "".join(decoded)
s = "abbcccddddeeeeeffffffggggggg"
lookup = Huffman.getLookup(s)
encoded = Huffman.encode(s, lookup)
decoded = Huffman.decode(encoded, Huffman.invlookup(lookup))
ascii_encoded = "".join(["{:b}".format(ord(c)) for c in s])
print(f'PLAIN TEXT = "{s}"\n')
print("LOOKUP TABLE:")
for c, code in lookup.items():
print(f'"{c}" <=> {code}')
print(f'\nENCODED = "{encoded}"')
print("VS.")
print(f'ASCII ENCODED = "{ascii_encoded}"')
print(f'\nDECODED = "{decoded}"')
# OUTPUT
# PLAIN TEXT = "abbcccddddeeeeeffffffggggggg"
# LOOKUP TABLE:
# "e" <=> 111
# "d" <=> 110
# "g" <=> 10
# "f" <=> 01
# "c" <=> 001
# "b" <=> 0001
# "a" <=> 0000
# ENCODED = "00000001000100100100111011011011011111111111111101010101010110101010101010"
# VS.
# ASCII ENCODED = "1100001110001011000101100011110001111000111100100110010011001001100100110010111001011100101110010111001011100110110011011001101100110110011011001101100111110011111001111100111110011111001111100111"
# DECODED = "abbcccddddeeeeeffffffggggggg"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment