Last active
August 5, 2019 04:49
-
-
Save WangYihang/3c94b6c9606b11c01ff7b06c2f2af642 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding:utf-8 | |
import struct | |
class HuffmanNode: | |
def __init__(self, times, left, right, char): | |
self.times = times | |
self.left = left | |
self.right = right | |
self.char = char | |
def MinHuffmanNode(nodes): | |
nodes = nodes[::-1] | |
result = nodes[0] | |
for i in nodes[1:]: | |
if i.times < result.times: | |
result = i | |
return result | |
def huffman(data): | |
# Count | |
table = [0 for i in range(0x100)] | |
for i in data: | |
table[ord(i)] += 1 | |
# Build huffman tree | |
nodes = [] | |
for i in range(len(table)): | |
if table[i] != 0: | |
nodes.append(HuffmanNode(table[i], None, None, chr(i))) | |
while len(nodes) != 1: | |
x = MinHuffmanNode(nodes) | |
nodes.remove(x) | |
y = MinHuffmanNode(nodes) | |
nodes.remove(y) | |
z = HuffmanNode(x.times + y.times, x, y, None) | |
nodes.append(z) | |
return nodes[0] | |
encoding = {} | |
def traversal(tree, prefix=""): | |
if tree.left != None: | |
traversal(tree.left, prefix + "0") | |
if tree.right != None: | |
traversal(tree.right, prefix + "1") | |
if tree.left == None and tree.right == None: | |
encoding[tree.char] = prefix | |
def encode(plain): | |
result = "" | |
for i in plain: | |
result += encoding[i] | |
return result | |
def decode(compressed, tree): | |
i = 0 | |
current = tree | |
result = "" | |
while True: | |
if current.left != None and compressed[i] == "0": | |
current = current.left | |
i += 1 | |
continue | |
if current.right != None and compressed[i] == "1": | |
current = current.right | |
i += 1 | |
continue | |
if current.left == None and current.right == None: | |
result += current.char | |
current = tree | |
if i > len(compressed) - 1: | |
break | |
return result | |
def uncompressed(data): | |
result = "" | |
for i in data: | |
b = bin(ord(i))[2:] | |
result += "0" * (8 - len(b)) + b | |
return result | |
def save(data, filename): | |
# TODO: Save Huffman Tree at the same time | |
data += (8 - len(data) % 8) * "0" | |
result = "" | |
for i in range(0, len(data), 8): | |
result += chr(int(data[i:i+8], base=2)) | |
with open(filename, "wb") as f: | |
f.write(bytes(result, encoding="utf-8")) | |
def main(): | |
plain = "If you can read assembly language, then every thing is open source." | |
print("Plain", plain) | |
tree = huffman(plain) | |
traversal(tree) | |
print("Encoding table", encoding) | |
origin = uncompressed(plain) | |
print("Uncompressed", origin) | |
save(origin, "origin.bin") | |
compressed = encode(plain) | |
print("Compressed", compressed) | |
save(compressed, "compressed.bin") | |
print("Compressing Propotion", len(compressed) / len(origin)) | |
decoded = decode(compressed, tree) | |
print("Decoded", decoded) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment