Created
February 2, 2014 10:15
-
-
Save sahutd/8765901 to your computer and use it in GitHub Desktop.
Gist created using YAGC - Yet Another Gist CLI . Visit https://github.com/sahutd/YAGC-Yet-Another-Gist-CLI to for the Repo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
@author : saimadhav heblikar | |
usage: python compr.py [-c | -d] <input file> <output file> | |
Call -c for compress | |
Call -d for decompress | |
Make sure to escape the input file and output with with double quotes if you | |
are on a NT System!!! | |
Purpose:Given a input file,a compressed output file is generated along with the | |
overhead required to decompress it. | |
""" | |
import heapq | |
from sys import argv | |
class node(): | |
def __init__(self,char,freq,left_child,right_child,parent): | |
self.char=char | |
self.freq=freq | |
self.left_child=left_child | |
self.right_child=right_child | |
self.parent=parent | |
def extract_code(root): | |
global d | |
if root != None: | |
extract_code(root[1].left_child) | |
if root[1].right_child==None and root[1].left_child==None: | |
s="" | |
child=root[1] | |
parent=child.parent | |
while(parent!=None): | |
if(child==parent.left_child[1]): | |
s='0'+s | |
else: | |
s='1'+s | |
child=parent | |
parent=parent.parent | |
d[root[1].char]=s | |
extract_code(root[1].right_child) | |
def compress(msg): | |
#Create freq dict | |
global d | |
for i in msg: | |
if i in d: | |
d[i]=d[i]+1 | |
else: | |
d[i]=1 | |
queue=[] | |
for char,freq in d.iteritems(): | |
temp_node=node(char,freq,None,None,None) | |
heapq.heappush(queue,(freq,temp_node)) | |
while len(queue)>1: | |
temp_node1=heapq.heappop(queue) | |
temp_node2=heapq.heappop(queue) | |
new_node=node('*',temp_node1[0]+temp_node2[0],temp_node1,temp_node2,None) | |
temp_node1[1].parent=new_node | |
temp_node2[1].parent=new_node | |
heapq.heappush(queue,(new_node.freq,new_node)) | |
#print queue[0] | |
queue[0][1].left_child,queue[0][1].right_child=queue[0][1].right_child,queue[0][1].left_child | |
d={} | |
extract_code(queue[0]) | |
compressed_msg='' | |
for i in msg: | |
compressed_msg=compressed_msg+d[i] | |
return compressed_msg | |
def decompress(): | |
pass | |
#todo | |
d={} | |
msg=argv[2] | |
print msg | |
c=compress(msg) | |
print c | |
print 'msg size in bytes:'+str(len(msg)) | |
print 'compressed msg size in bytes:'+str(len(c)/8) #BIT ARRAY LENGTH |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment