Last active
July 18, 2024 09:04
-
-
Save pabloem/7974e7a1dc755190b7f1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import json | |
import pickle | |
def decode(dic, bitstr): | |
res = [] | |
length = bitstr.bit_length() - 1 | |
if bitstr >> length != 1: | |
raise Error("Corrupt file!") | |
done = False | |
while length > 0 and not done: | |
shift = length - 1 | |
while True: | |
num = bitstr >> shift | |
bitnum = bin(num)[3:] # Quitamos '0b1' - el 1 inicial y el 0b de formato | |
if bitnum not in dic: | |
shift -= 1 | |
continue | |
char = dic[bitnum] | |
if char == 'end': | |
done = True | |
break | |
res.append(char) | |
bitstr = bitstr - ((num - 1) << shift) | |
length = shift | |
return ''.join(res) | |
if __name__ == "__main__": | |
usage = """Usage: ./huffman_de.py infile outfile""" | |
if len(sys.argv) < 3: | |
print(usage) | |
sys.exit(1) | |
f = open(sys.argv[1]+".dic") | |
dic = json.load(f) | |
nwdic = {} | |
for i,e in dic.items(): nwdic[e] = i | |
f.close() | |
f = open(sys.argv[1],'rb') | |
bstr = pickle.load(f) | |
f.close() | |
content = decode(nwdic,bstr) | |
f = open(sys.argv[2],'w') | |
f.write(content) | |
f.close() | |
print("File decompressed!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import heapq | |
from collections import Counter | |
import pickle | |
import json | |
def get_probabilities(content): | |
total = len(content) + 1 # Agregamos uno por el caracter FINAL | |
c = Counter(content) | |
res = {} | |
for char,count in c.items(): | |
res[char] = float(count)/total | |
res['end'] = 1.0/total | |
return res | |
def make_tree(probs): | |
q = [] | |
for ch,pr in probs.items(): | |
# La fila de prioridad está ordenada por prioridad y PROFUNDIDAD | |
heapq.heappush(q,(pr,0,ch)) | |
while len(q) > 1: | |
e1 = heapq.heappop(q) | |
e2 = heapq.heappop(q) | |
nw_e = (e1[0]+e2[0],max(e1[1],e2[1])+1,[e1,e2]) | |
heapq.heappush(q,nw_e) | |
return q[0] | |
def make_dictionary(tree): | |
res = {} | |
search_stack = [] | |
search_stack.append(tree+("",)) # El último elemento de la lista es el prefijo! | |
while len(search_stack) > 0: | |
elm = search_stack.pop() | |
if type(elm[2]) == list: | |
prefix = elm[-1] | |
search_stack.append(elm[2][1]+(prefix+"0",)) | |
search_stack.append(elm[2][0]+(prefix+"1",)) | |
continue | |
else: | |
res[elm[2]] = elm[-1] | |
pass | |
return res | |
def compress(dic,content): | |
res = "" | |
for ch in content: | |
code = dic[ch] | |
res = res + code | |
res = '1' + res + dic['end'] # Agregamos el caracter final y el 1 inicial | |
res = res + (len(res) % 8 * "0") # Agregamos ceros para convertir en multiplo de 8 | |
return int(res,2) | |
def store(data,dic,outfile): | |
# Lo guardamos en un archivo | |
outf = open(outfile,'wb') | |
pickle.dump(compressed,outf) | |
outf.close() | |
# Guardamos el diccionario en otro archivo | |
outf = open(outfile+".dic",'w') | |
json.dump(dic,outf) | |
outf.close() | |
pass | |
if __name__ == "__main__": | |
usage = """Usage: ./huffman_en.py infile outfile""" | |
if len(sys.argv) < 3: | |
print(usage) | |
sys.exit(1) | |
# Leemos el archivo de entrada completo a cont | |
inf = open(sys.argv[1]) | |
cont = inf.read() | |
inf.close() | |
# Usamos counter para calcular la probabilidad de cada símbolo | |
probs = get_probabilities(cont) | |
# Construimos el árbol de parseo! : ) | |
tree = make_tree(probs) | |
# Construimos el diccionario para codificar | |
dic = make_dictionary(tree) | |
# Creamos el contenido del nuevo archivo | |
compressed = compress(dic,cont) | |
store(compressed,dic,sys.argv[2]) | |
print("Archivo comprimido!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"j": "1001111", "e": "101", "h": "10010000", "\u00e9": "01100011", "end": "0110000001", "m": "011001", "y": "111001", "g": "100101", "\u00ed": "10011100", "o": "0100", "p": "11000", " ": "000", "\n": "011000101", "u": "1101", "\u00bf": "011000010", ".": "011000100", "q": "11101", "s": "0101", "v": "10011101", "a": "0010", "t": "11001", "\u00f3": "0110000000", "l": "1111", "d": "00111", "i": "00110", ",": "100110", "?": "011000011", "r": "1000", "b": "1001001", "n": "0111", "z": "1110001", "\u00e1": "011000001", "f": "1110000", "\u00fa": "10010001", "c": "01101"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ni tampoco hay nadie que ame, persiga y quiera alcanzar el dolor mismo porque sea dolor, sino porque a veces se dan las circunstancias de tal manera, que con esfuerzo y dolor puede obtener algún gran placer. en efecto, para ir a cosas insignificantes, ¿quién de nosotros asume algún ejercicio físico trabajoso si no es para conseguir alguna ventaja de él? por otra parte, ¿quién censuraría con razón a aquel que quiere estar en un placer al que no siga ninguna molestia, o a aquel que huye del dolor con el que no se produce ningún placer? pero sin duda acusamos y juzgamos como los más dignos de un justo aborrecimiento a aquellos que, ablandados y corrompidos por el encanto de los placeres presentes, cegados por el deseo, no prevén los dolores y las molestias que han de sucederles, y están en falta semejante quienes abandonan sus deberes por debilidad de espíritu, es decir, por huir de esfuerzos y dolores. | |
Thedoctor1989
commented
Jul 18, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment