Created
February 24, 2017 02:48
-
-
Save bgschiller/1b80103ab54421f46b78c18c773f4837 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import namedtuple, Counter | |
| import heapq | |
| from pprint import pprint | |
| from operator import attrgetter | |
| import struct | |
| Tree = namedtuple('Tree', 'weight left right') | |
| Letter = namedtuple('Letter', 'weight char') | |
| get_weight = attrgetter('weight') # same as def get_weight(o): return o.weight | |
| text = ( | |
| 'on the far end of town where the grickle grass grows ' + | |
| 'and the wind smells slow and sour when it blows ' + | |
| 'is the street of the lifted lorax' | |
| ) | |
| freq = Counter(text).most_common() | |
| # freq is like [(' ', 27), ('e', 20), ...] | |
| letters = [Letter(weight=weight, char=char) for char, weight in freq] | |
| heapq.heapify(letters) | |
| while len(letters) > 1: | |
| l = heapq.heappop(letters) | |
| r = heapq.heappop(letters) | |
| heapq.heappush(letters, Tree(l.weight + r.weight, l, r)) | |
| # We've combined all our Letters into a single large Tree | |
| assert len(letters) == 1, "if it's not length 1, why are we out of the loop?" | |
| tree = letters[0] | |
| addresses = {} | |
| def fill_address_book(t, a, prefix=''): | |
| if isinstance(t, Letter): | |
| a[t.char] = prefix | |
| else: | |
| fill_address_book(t.left, a, prefix + '0') | |
| fill_address_book(t.right, a, prefix + '1') | |
| fill_address_book(tree, addresses) | |
| print 'addresses maps from each letter to its bitstring:' | |
| pprint(addresses) | |
| compressed = ''.join([addresses[c] for c in text]) | |
| print 'the whole sentence compresses to:' | |
| print compressed | |
| def eat_one_char(t, bitstring): | |
| if isinstance(t, Letter): | |
| return t.char, bitstring | |
| if bitstring[0] == '0': | |
| return eat_one_char(t.left, bitstring[1:]) | |
| else: | |
| assert bitstring[0] == '1', "what? that's no bitstring!" | |
| return eat_one_char(t.right, bitstring[1:]) | |
| decoded = [] | |
| bitstring = compressed | |
| while bitstring: | |
| new_char, bitstring = eat_one_char(tree, bitstring) | |
| decoded.append(new_char) | |
| if ''.join(decoded) == text: | |
| print 'decoded successfully!' | |
| else: | |
| print 'uh oh, something went wrong...' | |
| def write_bitstring_to_file(fname, bitstring): | |
| with open(fname, 'wb') as f: | |
| while len(bitstring) >= 8: | |
| # next_eight is a byte, so we can write it efficiently | |
| next_eight, bitstring = bitstring[:8], bitstring[8:] | |
| # bitstring has had the first 8 chars trimmed off | |
| # read the '00011101' and interpret it as a character | |
| char = chr(int(next_eight, 2)) | |
| # I don't really know what this line is about *shrug* | |
| # I copied it from http://stackoverflow.com/a/25916006/1586229 | |
| packed = struct.pack('@B', char) | |
| f.write(packed) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment