Last active
October 6, 2015 03:30
-
-
Save colltoaction/8fd5d376134ab5e603af to your computer and use it in GitHub Desktop.
Naïve LZW implementation for a school assignment using Python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LZW: | |
def __init__(self): | |
self.ctx = "" | |
self.table = [ chr(i) for i in range(256) ] | |
self.empty_index = 256 | |
self.last_index = None | |
self._model = [] | |
def encode(self, c): | |
"""Receives a character to encode. Updates the table.""" | |
curr = self.ctx + c | |
try: | |
self.last_index = self.table.index(curr) | |
self.ctx = curr | |
except ValueError: | |
self.table.append(curr) | |
self._model.append(self.last_index) | |
self.ctx = c | |
self.last_index = self.table.index(c) | |
def model(self): | |
"""Returns a human-readable version of the compressed file.""" | |
m = "" | |
for c in self._model: | |
if c < 256: | |
m += "({})".format(repr(chr(c))) | |
else: | |
m += "({})".format(c) | |
return m | |
def compressed(self): | |
"""Returns a string containing a text representation of the compressed binary file.""" | |
c = "".join("{:09b}".format(c) for c in self._model) | |
return c + "0" * (len(c) % 8) | |
def print_table(self): | |
"""Prints the current table from 256 onwards.""" | |
print("pos | chars") | |
print("~~~~~~~~~~~") | |
for i in range(256, len(self.table)): | |
print("{} | {}".format(i, repr(self.table[i]))) | |
if __name__ == '__main__': | |
import argparse | |
import os | |
parser = argparse.ArgumentParser(description='Encodes the text passed through stdin using the LZW algorithm. Input should end with an empty line.') | |
parser.add_argument('-no-info', action="store_true", help='Hide the extra information like the model representation and the table.') | |
args = parser.parse_args() | |
coder = LZW() | |
for line in os.sys.stdin: | |
for c in line: | |
coder.encode(c) | |
if args.no_info: | |
print(coder.compressed()) | |
else: | |
print("compressed: {}".format(coder.compressed())) | |
print() | |
print("model: {}".format(coder.model())) | |
print() | |
print("table:") | |
coder.print_table() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment