Skip to content

Instantly share code, notes, and snippets.

@WangYihang
Last active August 5, 2019 04:49
Show Gist options
  • Save WangYihang/3c94b6c9606b11c01ff7b06c2f2af642 to your computer and use it in GitHub Desktop.
Save WangYihang/3c94b6c9606b11c01ff7b06c2f2af642 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# encoding:utf-8
import struct
class HuffmanNode:
def __init__(self, times, left, right, char):
self.times = times
self.left = left
self.right = right
self.char = char
def MinHuffmanNode(nodes):
nodes = nodes[::-1]
result = nodes[0]
for i in nodes[1:]:
if i.times < result.times:
result = i
return result
def huffman(data):
# Count
table = [0 for i in range(0x100)]
for i in data:
table[ord(i)] += 1
# Build huffman tree
nodes = []
for i in range(len(table)):
if table[i] != 0:
nodes.append(HuffmanNode(table[i], None, None, chr(i)))
while len(nodes) != 1:
x = MinHuffmanNode(nodes)
nodes.remove(x)
y = MinHuffmanNode(nodes)
nodes.remove(y)
z = HuffmanNode(x.times + y.times, x, y, None)
nodes.append(z)
return nodes[0]
encoding = {}
def traversal(tree, prefix=""):
if tree.left != None:
traversal(tree.left, prefix + "0")
if tree.right != None:
traversal(tree.right, prefix + "1")
if tree.left == None and tree.right == None:
encoding[tree.char] = prefix
def encode(plain):
result = ""
for i in plain:
result += encoding[i]
return result
def decode(compressed, tree):
i = 0
current = tree
result = ""
while True:
if current.left != None and compressed[i] == "0":
current = current.left
i += 1
continue
if current.right != None and compressed[i] == "1":
current = current.right
i += 1
continue
if current.left == None and current.right == None:
result += current.char
current = tree
if i > len(compressed) - 1:
break
return result
def uncompressed(data):
result = ""
for i in data:
b = bin(ord(i))[2:]
result += "0" * (8 - len(b)) + b
return result
def save(data, filename):
# TODO: Save Huffman Tree at the same time
data += (8 - len(data) % 8) * "0"
result = ""
for i in range(0, len(data), 8):
result += chr(int(data[i:i+8], base=2))
with open(filename, "wb") as f:
f.write(bytes(result, encoding="utf-8"))
def main():
plain = "If you can read assembly language, then every thing is open source."
print("Plain", plain)
tree = huffman(plain)
traversal(tree)
print("Encoding table", encoding)
origin = uncompressed(plain)
print("Uncompressed", origin)
save(origin, "origin.bin")
compressed = encode(plain)
print("Compressed", compressed)
save(compressed, "compressed.bin")
print("Compressing Propotion", len(compressed) / len(origin))
decoded = decode(compressed, tree)
print("Decoded", decoded)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment