Last active
May 29, 2020 19:46
-
-
Save MNoorFawi/09d8789edb66efd9344153d3841fa63b to your computer and use it in GitHub Desktop.
Huffman Coding in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bisect import insort | |
| from collections import deque | |
| ## to store codes for huffman coding | |
| class CodeHeap(): | |
| def __init__(self): | |
| self._container = deque([]) | |
| def push(self, item): | |
| insort(self._container, item) # in by sort | |
| def pop(self): | |
| return self._container.popleft() # FIFO | |
| def __getitem__(self, item): | |
| return self._container[item] | |
| def __len__(self): | |
| return len(self._container) | |
| def __repr__(self): | |
| return repr(self._container) | |
| def huffman_coding(symb2freq): | |
| """Huffman coding to encode symbols/sequences. | |
| It takes the Counter of Compressed object and | |
| returns a code dictionary based on occurences""" | |
| if len(symb2freq) == 1: | |
| symb, _ = symb2freq | |
| return {symb: '0'} | |
| symb_code_queue = CodeHeap() | |
| code_dict = {symbs: "" for symbs in symb2freq} | |
| for symbs, freq in symb2freq.items(): | |
| symb_code_queue.push((freq, [symbs])) | |
| while len(symb_code_queue) > 1: | |
| low_freq, low_symbs = symb_code_queue.pop() | |
| high_freq, high_symbs = symb_code_queue.pop() | |
| for symb in low_symbs: | |
| code_dict[symb] = "0" + code_dict[symb] | |
| for symb in high_symbs: | |
| code_dict[symb] = "1" + code_dict[symb] | |
| symb_code_queue.push( | |
| (low_freq + high_freq, | |
| sorted(low_symbs + high_symbs) | |
| ) | |
| ) | |
| return code_dict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment