Created
February 20, 2018 11:38
-
-
Save vincentius15/72f7679558c38df0b953085d028cb8d2 to your computer and use it in GitHub Desktop.
File compressor using huffman code algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<algorithm> | |
#include<math.h> | |
using namespace std; | |
typedef struct tree{ | |
int count; | |
int character; | |
struct tree *left, *right; | |
}node; | |
unsigned long long huffmanCodeTable[256]; | |
unsigned long long dataLength=0; | |
char fileName[100]; | |
char keyFileName[100]; | |
char compressedFileName[100]; | |
char extractedFileName[100]; | |
bool compare(node *a, node *b) | |
{ | |
return a->count < b->count; | |
} | |
int codeLength(unsigned long long code) | |
{ | |
return log10(code)+1; | |
} | |
void printHuffmanCodeTable() | |
{ | |
int i; | |
for(i=0;i<256;i++) | |
{ | |
if(huffmanCodeTable[i]>0) | |
{ | |
printf("%d ", i); | |
printf("%llu\n", huffmanCodeTable[i]); | |
} | |
} | |
} | |
void keyGenerator() | |
{ | |
FILE *inp, *out; | |
int charCount[256], i; | |
memset(charCount,0,sizeof(charCount)); | |
int buffer; | |
//inp = fopen("M12.pptx", "rb"); | |
inp = fopen(fileName, "rb"); | |
//inp = fopen("battery.png", "rb"); | |
//inp = fopen("inp.txt", "rb"); | |
//inp = fopen("char.txt", "rb"); | |
if(inp == NULL) | |
{ | |
printf("File not found!\n"); | |
return; | |
} | |
out = fopen(keyFileName, "w"); | |
//out = fopen("key.key", "w"); | |
while(fscanf(inp,"%c", &buffer) != EOF) | |
{ | |
//printf("%c", buffer); | |
charCount[buffer]+=1; | |
} | |
for(i=0;i<256;i++) | |
{ | |
if(charCount[i]>0) | |
{ | |
fprintf(out, "%d ", i); | |
fprintf(out, "%d\n", charCount[i]); | |
} | |
} | |
fclose(inp); | |
fclose(out); | |
return; | |
} | |
node * huffmanTreeGenerator() | |
{ | |
FILE *inp; | |
int charCount[256],i,j,k=0,l=0,temp; | |
node *arr[256] = {NULL}; | |
inp = fopen(keyFileName, "r"); | |
if(inp == NULL) | |
{ | |
return 0; | |
} | |
//inp = fopen("key.key", "r"); | |
memset(charCount,0,sizeof(charCount)); | |
while(fscanf(inp, "%d ", &j) != EOF) | |
{ | |
printf("key detected: %d\n", j); | |
if(j<=255) | |
{ | |
fscanf(inp, "%d\n", &temp); | |
if(temp!=0) | |
{ | |
charCount[j] = temp; | |
printf("frequency value detected: %d\n", charCount[j]); | |
node *huffman = (node*) malloc(sizeof(node)); | |
huffman->character = j; | |
huffman->count = charCount[j]; | |
huffman->left = NULL; | |
huffman->right = NULL; | |
arr[k] = huffman; | |
k++; | |
} | |
} | |
if(j>255||temp==0) | |
{ | |
dataLength = j; | |
printf("bit length is %d\n", dataLength); | |
} | |
temp = 0; | |
} | |
fclose(inp); | |
for(l=0;l<k-1;l++) | |
{ | |
sort(arr+l,arr+k,compare); | |
/*for(i=l;i<k;i++) //for debugging | |
{ | |
printf("%d ", i); | |
printf("%c ", arr[i]->character); | |
printf("%d\n", arr[i]->count); | |
} | |
printf("\n\n");*/ | |
node *temp = (node*) malloc(sizeof(node)); | |
temp->left = arr[l]; | |
temp->right = arr[l+1]; | |
temp->count = arr[l]->count + arr[l+1]->count; | |
temp->character = 1000; | |
arr[l+1] = temp; | |
temp = NULL; | |
} | |
return arr[k-1]; | |
} | |
void huffmanExplorer(node *root, unsigned long long codeBuffer) | |
{ | |
if(root==NULL) | |
{ | |
return; | |
} | |
if(root->character != 1000) | |
{ | |
if(codeBuffer>0) | |
{ | |
huffmanCodeTable[root->character] = codeBuffer; | |
} | |
else | |
{ | |
huffmanCodeTable[root->character] = 1; | |
} | |
} | |
huffmanExplorer(root->left, (codeBuffer*10)+1); | |
huffmanExplorer(root->right, (codeBuffer*10)+2); | |
} | |
void compress() | |
{ | |
int bitLength=0, bit,i,buffer=0, bitCounter=8; | |
unsigned long long bin, div; | |
int j; | |
FILE *inp, *out, *key; | |
//inp = fopen("inp.txt", "rb"); | |
//inp = fopen("M12.pptx", "rb"); | |
inp = fopen(fileName, "rb"); | |
if(inp == NULL) | |
{ | |
return; | |
} | |
//inp = fopen("battery.png", "rb"); | |
//inp = fopen("char.txt", "rb"); | |
key = fopen(keyFileName, "a"); | |
//key = fopen("key.key", "a"); | |
out = fopen(compressedFileName, "wb"); | |
//out = fopen("compressed.cm", "wb"); | |
while(fscanf(inp, "%c", &j) != EOF) | |
{ | |
bin = huffmanCodeTable[j]; | |
bitLength = codeLength(bin); | |
//printf("huffman code of %c : %d\n", j, bin); | |
//printf("length : %d\n", bitLength); | |
div = pow(10,(int)log10(bin)); | |
for(i=0;i<bitLength;i++) | |
{ | |
bit = ((bin/div)%10)-1; | |
buffer = buffer | bit; | |
dataLength++; | |
//printf("cur buffer : %d\n", buffer); | |
bitCounter--; | |
if(!bitCounter) | |
{ | |
fprintf(out, "%c", buffer); | |
//printf("printed : %d\n", buffer); | |
bitCounter=8; | |
buffer = 0; | |
} | |
div = div/10; | |
buffer = buffer << 1; | |
} | |
} | |
fprintf(key,"%d\n",dataLength); | |
if (bitCounter < 8) | |
{ | |
buffer = buffer << (bitCounter-1); | |
fprintf(out, "%c", buffer); | |
//printf("printed : %d\n", buffer); | |
} | |
fclose(inp); | |
fclose(out); | |
fclose(key); | |
printf("Compress successful\n\n"); | |
return; | |
} | |
void extract(node *root) | |
{ | |
int bitLength=0, bit,i,buffer=0; | |
char j; | |
node *ptr; | |
FILE *inp, *out; | |
ptr = root; | |
inp = fopen(compressedFileName, "rb"); | |
if(inp == NULL) | |
{ | |
printf("File not found!\n"); | |
return; | |
} | |
//inp = fopen("M12_2PAA2.pptx.cm", "rb"); | |
out = fopen(extractedFileName, "wb"); | |
//out = fopen("extract.txt", "wb"); | |
while(fscanf(inp, "%c", &j) != EOF) | |
{ | |
for(i=0;i<8;i++) | |
{ | |
if(dataLength>0) | |
{ | |
//printf("datalength %d\n", dataLength); | |
bit = j & 128; | |
dataLength--; | |
if(bit == 0) | |
{ | |
if(ptr->character != 1000) | |
{ | |
fprintf(out, "%c", ptr->character); | |
ptr = root; | |
} | |
else | |
{ | |
ptr = ptr->left; | |
if(ptr->character != 1000) | |
{ | |
fprintf(out, "%c", ptr->character); | |
ptr = root; | |
} | |
} | |
} | |
else | |
{ | |
if(ptr->character != 1000) | |
{ | |
fprintf(out, "%c", ptr->character); | |
ptr = root; | |
} | |
else | |
{ | |
ptr = ptr->right; | |
if(ptr->character != 1000) | |
{ | |
fprintf(out, "%c", ptr->character); | |
ptr = root; | |
} | |
} | |
} | |
j = j << 1; | |
} | |
} | |
} | |
fclose(inp); | |
fclose(out); | |
printf("Extract successful\n\n"); | |
return; | |
} | |
int main() | |
{ | |
int choice; | |
while(1) | |
{ | |
node *ptr; | |
memset(huffmanCodeTable,0,sizeof(huffmanCodeTable)); | |
dataLength = 0; | |
printf("Enter Choice:\n1.Compress\n2.Extract\n"); | |
scanf("%d", &choice); | |
if(choice==1) | |
{ | |
printf("Enter filename :\n"); | |
scanf("%s", fileName); | |
strcpy(keyFileName,fileName); | |
strcat(keyFileName,".key"); | |
strcpy(compressedFileName,fileName); | |
strcat(compressedFileName,".cm"); | |
keyGenerator(); | |
ptr = huffmanTreeGenerator(); | |
huffmanExplorer(ptr,0); | |
printHuffmanCodeTable(); | |
compress(); | |
} | |
else if(choice==2) | |
{ | |
printf("Enter filename :\n"); | |
scanf("%s", compressedFileName); | |
strcpy(fileName,compressedFileName); | |
fileName[strlen(fileName)-3] = '\0'; | |
strcpy(keyFileName,fileName); | |
strcat(keyFileName,".key"); | |
printf("%s\n", keyFileName); | |
strcpy(extractedFileName,fileName); | |
strcat(extractedFileName,"-"); | |
ptr = huffmanTreeGenerator(); | |
//printf("%d", dataLength); | |
extract(ptr); | |
} | |
else | |
{ | |
continue; | |
} | |
keyFileName[0] = '\0'; | |
compressedFileName[0] = '\0'; | |
fileName[0] = '\0'; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment