Last active
October 6, 2021 05:44
-
-
Save misterpoloy/27387ed9d6ded2106a3d53e5d165d7d7 to your computer and use it in GitHub Desktop.
Huffman Encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://www.youtube.com/watch?v=co4_ahEDCho | |
#include <iostream> | |
#include <unordered_map> | |
#include "minHeap.hpp" | |
using namespace std; | |
// used to store frequency of each caracter | |
unordered_map<char, int> freq; | |
// min heap is the base structure to build the code tree | |
MinHeap* minHeap = new MinHeap(); | |
// // to map each character its huffman value | |
unordered_map<char, string> codes; | |
// Util used to count frequency | |
void calFreq(string str) { | |
for (char c : str) | |
freq[c]++; | |
} | |
// utility function to store characters along with | |
// there huffman value in a hash table | |
void storeCodes(struct MinHeapNode* root, string str) { | |
if (root == NULL) return; | |
if (root->data != '$') | |
codes[root->data]=str; | |
storeCodes(root->left, str + "0"); | |
storeCodes(root->right, str + "1"); | |
} | |
// The main function that builds Huffman tree | |
void huffmanCodes() { | |
struct MinHeapNode *left, *right, *top; | |
// Step 1. Build a min heap that contains n nodes | |
for (auto i = freq.begin(); i != freq.end(); i++) | |
minHeap->push(new MinHeapNode(i->first, i->second)); | |
// Step 2 Extract two minimum frequency nodes from min heap | |
while (minHeap->size() != 1) { | |
left = minHeap->top(); | |
minHeap->pop(); | |
right = minHeap->top(); | |
minHeap->pop(); | |
top = new MinHeapNode('$', left->freq + right->freq); | |
top->left = left; | |
top->right = right; | |
minHeap->push(top); | |
} | |
storeCodes(minHeap->top(), ""); | |
} | |
// function iterates through the encoded string s | |
// if s[i]=='1' then move to node->right | |
// if s[i]=='0' then move to node->left | |
// if leaf node append the node->data to our output string | |
string decode_file(struct MinHeapNode* root, string s) | |
{ | |
string ans = ""; | |
struct MinHeapNode* curr = root; | |
for (int i=0;i<s.size();i++) | |
{ | |
if (s[i] == '0') | |
curr = curr->left; | |
else | |
curr = curr->right; | |
// reached leaf node | |
if (curr->left==NULL and curr->right==NULL) | |
{ | |
ans += curr->data; | |
curr = root; | |
} | |
} | |
return ans; | |
} | |
int main(int argc, const char * argv[]) { | |
string str = "geeksforgeeks"; | |
string encodedString, decodedString; | |
// STEP 1: calculate frequency | |
calFreq(str); | |
// STEP 2: Get huffman codes | |
huffmanCodes(); | |
// RESULT codes and frequencies | |
cout << "Character With there Frequencies:\n"; | |
for (auto v=codes.begin(); v!=codes.end(); v++) | |
cout << v->first <<' ' << v->second << endl; | |
// RESULT coded | |
for (auto i: str) | |
encodedString+=codes[i]; | |
cout << "\nEncoded Huffman data:\n" << encodedString << "\n" << endl; | |
// RESULT decoded | |
decodedString = decode_file(minHeap->top(), encodedString); | |
cout << "\nDecoded Huffman Data:\n" << decodedString << endl; | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "minHeap.hpp" | |
#include <iostream> | |
#include <vector> | |
using namespace std; | |
MinHeapNode::MinHeapNode(char data, int freq) { | |
left = right = NULL; | |
this->data = data; | |
this->freq = freq; | |
} | |
// Returns the parent index of a given index | |
int MinHeap::parent(int i) {return (i - 1) / 2;} | |
// Returns the left index | |
int MinHeap::left(int i) {return 2 * i + 1;} | |
// Returns the right index child of a given index | |
int MinHeap::right(int i) {return 2 * i + 2;} | |
// Return the size of the heap | |
unsigned int MinHeap::size() { return (int)heap.size(); } | |
void MinHeap::push(MinHeapNode* node) { | |
heap.push_back(node); | |
int index = size() - 1; | |
// Fix the min heap property | |
heapify_up(index); | |
} | |
void MinHeap::pop() { | |
heap[0] = heap.back(); | |
heap.pop_back(); | |
// call heapify down on root node | |
heapify_down(0); | |
} | |
MinHeapNode* MinHeap::top() { | |
return heap.at(0); | |
} | |
// // Recursive Heapify-down algorithm, used in remove/pop | |
void MinHeap::heapify_down(int i) { | |
// Set initial values | |
int l = left(i); | |
int r = right(i); | |
int smallest = i; | |
//Find the smallest element | |
if ( l < size() && heap[l]->freq < heap[i]->freq){ | |
smallest = l; | |
} else if ( r < size() && heap[r]->freq < heap[i]->freq){ | |
smallest = r; | |
} | |
// Swap elements to their correct place | |
if (smallest != i) { | |
swap(heap[i], heap[smallest]); | |
heapify_down(smallest); | |
} | |
} | |
// Recursive Heapigy-up algorithm (is the bubble of the min value when added) / push | |
void MinHeap::heapify_up(int i) { | |
// Check if node at index i and it's parent violates the heap property | |
if (i && heap[parent(i)]->freq > heap[i]->freq) { | |
swap(heap[i], heap[parent(i)]); | |
heapify_up(parent(i)); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef minHeap_hpp | |
#define minHeap_hpp | |
#include <iostream> | |
#include <vector> | |
using namespace std; | |
struct MinHeapNode { | |
char data; | |
unsigned freq; | |
struct MinHeapNode *left, *right; | |
MinHeapNode(char data, int freq); | |
}; | |
class MinHeap { | |
private: | |
vector<MinHeapNode*> heap; | |
int parent(int i); | |
int left(int i); | |
int right(int i); | |
public: | |
void push(MinHeapNode* node); | |
unsigned int size() ; | |
void heapify_down(int i); | |
void heapify_up(int i); | |
void pop(); | |
MinHeapNode* top(); | |
}; | |
#endif /* minHeap_hpp */ |
Author
misterpoloy
commented
Aug 28, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment