Skip to content

Instantly share code, notes, and snippets.

@TobeTek
Created March 20, 2026 14:09
Show Gist options
  • Select an option

  • Save TobeTek/0e7cd69ef9c2a1c6ce7cc2ebcfc1d8dd to your computer and use it in GitHub Desktop.

Select an option

Save TobeTek/0e7cd69ef9c2a1c6ce7cc2ebcfc1d8dd to your computer and use it in GitHub Desktop.
An Attempt at Huffman Compression with Python: Supporting code for the tutorial walkthrough
class MinHeap:
@classmethod
def from_list(cls, arr: list):
instance = cls()
instance.heap = arr
print(f"Creating heap from list: {arr}")
n = len(instance.heap)
for i in range(n // 2 - 1, -1, -1):
print(f"\tHeapifying at index {i}")
instance._sift_down(i)
print(f"Final heap: {instance.heap}")
return instance
def __init__(self):
self.heap = []
def push(self, val):
self.heap.append(val)
print(f"Pushed {val}, heap: {self.heap}")
self._sift_up(len(self.heap) - 1)
def pop(self):
if len(self.heap) == 0:
print("Heap is empty")
return None
if len(self.heap) == 1:
return self.heap.pop()
root = self.heap[0]
self.heap[0] = self.heap.pop()
print(f"Popped {root}, heap: {self.heap}")
self._sift_down(0)
return root
def _sift_up(self, index):
while index > 0:
parent = (index - 1) // 2
if self.heap[index] < self.heap[parent]:
print(f"\tSift up: swapping {self.heap[index]} with {self.heap[parent]}")
self.heap[index], self.heap[parent] = self.heap[parent], self.heap[index]
index = parent
else:
break
def _sift_down(self, index):
smallest = index
left, right = 2 * index + 1, 2 * index + 2
if left < len(self.heap) and self.heap[left] < self.heap[smallest]:
smallest = left
if right < len(self.heap) and self.heap[right] < self.heap[smallest]:
smallest = right
if smallest != index:
print(f"\tSift down: swapping {self.heap[index]} with {self.heap[smallest]}")
self.heap[index], self.heap[smallest] = self.heap[smallest], self.heap[index]
self._sift_down(smallest)
def __len__(self):
return len(self.heap)
if __name__ == "__main__":
heap = MinHeap.from_list([10, 5, 20, 1, 15])
heap.pop()
heap.pop()
print('Final heap:', heap.heap)
from collections import Counter
from typing import Any, Dict, List, Tuple
from heap import MinHeap
def count_frequencies(text: str) -> Dict[str, int]:
words = text.split()
return dict(Counter(words))
def build_huffman_tree(frequencies: Dict[str, int]) -> Tuple[Dict[str, str], Any]:
heap = [(f, [(s, (0, 0))]) for s, f in frequencies.items()]
heap = MinHeap.from_list(heap)
while len(heap) > 1:
a_freq, a_list = heap.pop()
b_freq, b_list = heap.pop()
merged_list = []
for symbol, (bits, val) in a_list:
merged_list.append((symbol, (bits + 1, val)))
for symbol, (bits, val) in b_list:
# Logic: (1 << bits) + val effectively adds a '1' to the front
merged_list.append((symbol, (bits + 1, (1 << bits) + val)))
new_freq = a_freq + b_freq
heap.push((new_freq, merged_list))
final_list = heap.pop()[1]
# Convert their (bitsize, value) format back to your string format "0101"
codes = {}
for symbol, (bits, val) in final_list:
codes[symbol] = bin(val)[2:].rjust(bits, "0")
return codes, final_list
def huffman_encode(text: str, codes: Dict[str, str]) -> str:
words = text.split()
return "".join(codes[word] for word in words if word in codes)
def huffman_decode(encoded: str, codes: Dict[str, str]) -> str:
reverse_codes = {v: k for k, v in codes.items()}
decoded = []
current_buffer = ""
for bit in encoded:
current_buffer += bit
if current_buffer in reverse_codes:
symbol = reverse_codes[current_buffer]
decoded.append(symbol)
current_buffer = ""
return " ".join(decoded)
if __name__ == "__main__":
text= "one ring to rule them all one ring to find them one ring to bring them and in the darkness bind them"
frequencies = count_frequencies(text)
codes, _ = build_huffman_tree(frequencies)
print("\nHUFFMAN CODES:")
for symbol, code in sorted(codes.items(), key=lambda x: (len(x[1]), x[0])):
print(f"{symbol:10}: {code}")
encoded = huffman_encode(text, codes)
decoded = huffman_decode(encoded, codes)
print(f"\nEncoded : {encoded}")
print(f"Decoded : {decoded}")
print(f"Success : {text == decoded}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment