Last active
November 17, 2022 06:25
-
-
Save piyush01123/2966470dea04227144630951d784e18b to your computer and use it in GitHub Desktop.
Huffman Encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Huffman Encoding: Given a frequency map of signs, create a new compressed representation. | |
Algorithm: | |
1. Create a data structure for node with prob, sign, left, right pointers | |
2. Create a min heap of nodes with prob as key. For each item in frequency map; insert node(p, s, NULL, NULL) into the min heap. | |
3. while the min heap has more than 1 element: Do | |
a) pop two nodes | |
b) create a parent node with these two nodes as its children | |
c) push parent node into the min heap | |
4. The last remaining node in the min heap is the root of the Huffman Tree. | |
5. Traverse through the tree and keep adding 0 for left and 1 for right to a prefix started with empty string. | |
6. Our signs reside only at the leaf nodes. Huffman Code for a sign is the accumulated prefix at the corresponding leaf node. | |
Complexity: nlog(n) | |
Entropy H = -Sum over signs [p(sign)*log_2(p(sign))] | |
Average code length = H | |
*/ | |
#include <iostream> | |
#include <unordered_map> | |
#include <vector> | |
#include <queue> | |
#include <string> | |
using namespace std; | |
class Node | |
{ | |
public: | |
char ch; | |
double prob; | |
Node *left, *right; | |
Node(char c, double p, Node *l, Node *r): ch(c),prob(p),left(l),right(r) {} | |
}; | |
bool comp(Node *a, Node *b){return a->prob > b->prob;} | |
void dfs(Node *root, unordered_map<char, string> &H, string cur="") | |
{ | |
if (!root->left && !root->right) | |
{ | |
H[root->ch] = cur; | |
return; | |
} | |
dfs(root->left, H, cur+'0'); | |
dfs(root->right, H, cur+'1'); | |
} | |
unordered_map<char, string> huffman( const unordered_map<char, double> freq_map) | |
{ | |
unordered_map<char, string> H; | |
priority_queue<Node *,vector<Node *>, function<bool(Node*,Node*)>> pq(comp); | |
for (auto item: freq_map) pq.push(new Node(item.first,item.second,NULL,NULL)); | |
while(pq.size()>1) | |
{ | |
Node *u = pq.top(); | |
pq.pop(); | |
Node *v = pq.top(); | |
pq.pop(); | |
pq.push(new Node('*',u->prob+v->prob,u,v)); | |
} | |
Node *root = pq.top(); | |
dfs(root,H); | |
return H; | |
} | |
string encode(string s, unordered_map<char, string> huffman_codes) | |
{ | |
string enc = ""; | |
for (char ch: s) enc+=huffman_codes[ch]; | |
return enc; | |
} | |
string decode(string s, unordered_map<char, string> huffman_codes) | |
{ | |
unordered_map<string,char> H; | |
for (auto p: huffman_codes) H[p.second] = p.first; | |
string cur = "",dec=""; | |
for (char ch: s) | |
{ | |
cur += ch; | |
if (!H.count(cur)) continue; | |
else | |
{ | |
dec += H[cur]; | |
cur = ""; | |
} | |
} | |
return dec; | |
} | |
int main() | |
{ | |
unordered_map<char, double> freq_map = {{'A', 0.1}, {'B', 0.4}, {'C', 0.06}, {'D', 0.1}, {'E', 0.04}, {'F', 0.3}}; | |
unordered_map<char, string> huffman_codes = huffman(freq_map); | |
for (auto p: huffman_codes) cout << p.first << ':' << p.second << endl; | |
string s = "AABACDEFBCDEFCBCABDEDCDAEEFF"; | |
string enc = encode(s,huffman_codes); | |
string dec = decode(enc,huffman_codes); | |
cout << "Original:" << s << endl << "Encoded:" << enc << endl << "Decoded:" << dec << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment