Skip to content

Instantly share code, notes, and snippets.

@bgschiller
Created February 24, 2017 02:48
Show Gist options
  • Select an option

  • Save bgschiller/1b80103ab54421f46b78c18c773f4837 to your computer and use it in GitHub Desktop.

Select an option

Save bgschiller/1b80103ab54421f46b78c18c773f4837 to your computer and use it in GitHub Desktop.
from collections import namedtuple, Counter
import heapq
from pprint import pprint
from operator import attrgetter
import struct
Tree = namedtuple('Tree', 'weight left right')
Letter = namedtuple('Letter', 'weight char')
get_weight = attrgetter('weight') # same as def get_weight(o): return o.weight
text = (
'on the far end of town where the grickle grass grows ' +
'and the wind smells slow and sour when it blows ' +
'is the street of the lifted lorax'
)
freq = Counter(text).most_common()
# freq is like [(' ', 27), ('e', 20), ...]
letters = [Letter(weight=weight, char=char) for char, weight in freq]
heapq.heapify(letters)
while len(letters) > 1:
l = heapq.heappop(letters)
r = heapq.heappop(letters)
heapq.heappush(letters, Tree(l.weight + r.weight, l, r))
# We've combined all our Letters into a single large Tree
assert len(letters) == 1, "if it's not length 1, why are we out of the loop?"
tree = letters[0]
addresses = {}
def fill_address_book(t, a, prefix=''):
if isinstance(t, Letter):
a[t.char] = prefix
else:
fill_address_book(t.left, a, prefix + '0')
fill_address_book(t.right, a, prefix + '1')
fill_address_book(tree, addresses)
print 'addresses maps from each letter to its bitstring:'
pprint(addresses)
compressed = ''.join([addresses[c] for c in text])
print 'the whole sentence compresses to:'
print compressed
def eat_one_char(t, bitstring):
if isinstance(t, Letter):
return t.char, bitstring
if bitstring[0] == '0':
return eat_one_char(t.left, bitstring[1:])
else:
assert bitstring[0] == '1', "what? that's no bitstring!"
return eat_one_char(t.right, bitstring[1:])
decoded = []
bitstring = compressed
while bitstring:
new_char, bitstring = eat_one_char(tree, bitstring)
decoded.append(new_char)
if ''.join(decoded) == text:
print 'decoded successfully!'
else:
print 'uh oh, something went wrong...'
def write_bitstring_to_file(fname, bitstring):
with open(fname, 'wb') as f:
while len(bitstring) >= 8:
# next_eight is a byte, so we can write it efficiently
next_eight, bitstring = bitstring[:8], bitstring[8:]
# bitstring has had the first 8 chars trimmed off
# read the '00011101' and interpret it as a character
char = chr(int(next_eight, 2))
# I don't really know what this line is about *shrug*
# I copied it from http://stackoverflow.com/a/25916006/1586229
packed = struct.pack('@B', char)
f.write(packed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment