Last active
August 10, 2020 22:35
-
-
Save SeijiEmery/c8a4071bb087a7dc1133b2b4b4bc7528 to your computer and use it in GitHub Desktop.
variable length encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
data-driven variable length encoder :) | |
(started as a toy program to encode / decode morse code; | |
extended to cover any variable-length encodings, and works provided that all | |
encodings are 1-1, ie. bidirectionally unambiguous, which this encoder assumes | |
but does not check for) | |
''' | |
from utils import fmt_or, ignore_errors | |
class VariableLengthEncoder: | |
def __init__(self, file=None): | |
self.encoding_table = {} | |
self.decoding_table = {} | |
if file is not None: | |
self.load_encoding(file) | |
def add_encoding(self, seq, encoding): | |
insert_trie(self.encoding_table, seq, encoding) | |
insert_trie(self.decoding_table, encoding, seq) | |
def load_encoding(self, file, comment='#'): | |
with open(file, 'r') as f: | |
for line in f.readlines(): | |
line = line.split(comment)[0].strip() | |
if line: | |
seq, encoding = line.split() | |
self.add_encoding(seq, encoding) | |
def encode(self, text): | |
return trie_decode(self.encoding_table, text) | |
def decode(self, text): | |
return trie_decode(self.decoding_table, text) | |
def insert_trie(d, seq, value): | |
''' inserts a sequence (and assigns value) into the trie d ''' | |
assert(type(d) == dict) | |
seq, last = seq[:-1], seq[-1] | |
# insert elements + get final dict | |
for elem in seq: | |
# extend the trie if / as needed | |
if elem not in d: | |
d[elem] = {} | |
# if run into a terminal leaf node, convert into a branch | |
# w/ a special None value indiciating that this can terminate | |
if type(d[elem]) != dict: | |
d[elem] = { | |
None: d[elem] | |
} | |
# walk through the trie | |
d = d[elem] | |
# insert final leaf node | |
d[last] = value | |
class DecodingException(Exception): | |
pass | |
def trie_decode(decoding_table, text, result=''): | |
tbl = decoding_table | |
for i, char in enumerate(text): | |
if char not in tbl: | |
if None in tbl: | |
result += tbl[None] | |
tbl = decoding_table | |
else: | |
raise DecodingException( | |
"decoding error in '%s' at '%s': expecting %s" % ( | |
text, text[i], fmt_or(tbl.keys()) | |
)) | |
entry = tbl[char] | |
if type(entry) == dict: | |
tbl = entry | |
else: | |
result += entry | |
tbl = decoding_table | |
if tbl != decoding_table: | |
raise DecodingException( | |
"unterminated sequence '%s' at '%s': expecting %s\n(in input '%s')" % ( | |
text[i], text[:i + 1], fmt_or(tbl.keys()), text)) | |
return result | |
if __name__ == '__main__': | |
encoder = VariableLengthEncoder('encoding.txt') | |
# encode, decode = encoder.encode, encoder.decode | |
encode = lambda *args: ignore_errors(lambda: encoder.encode(*args), DecodingException) | |
decode = lambda *args: ignore_errors(lambda: encoder.decode(*args), DecodingException) | |
print(encoder.encoding_table) | |
print(encoder.decoding_table) | |
print(encode('ABC')) | |
print(encode('ABBC')) | |
print(decode(encode('ABC'))) | |
print(decode(encode('ABBC'))) | |
print(decode('.')) | |
print(decode('.--')) | |
print(decode('XYZ')) | |
print(encode('XYZ')) | |
print(encode('Y')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A .- | |
B -... | |
C -.-. | |
BB --.- | |
YY -.-- |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{'A': '.-', 'B': {None: '-...', 'B': '--.-'}, 'C': '-.-.', 'Y': {'Y': '-.--'}} | |
{'.': {'-': 'A'}, '-': {'.': {'.': {'.': 'B'}, '-': {'.': 'C', '-': 'YY'}}, '-': {'.': {'-': 'BB'}}}} | |
.--...-.-. | |
.---.--.-. | |
ABC | |
ABBC | |
unterminated sequence '.' at '.': expecting '-' | |
(in input '.') | |
None | |
unterminated sequence '-' at '.--': expecting '.' or '-' | |
(in input '.--') | |
None | |
decoding error in 'XYZ' at 'X': expecting '.' or '-' | |
None | |
decoding error in 'XYZ' at 'X': expecting 'A', 'B', 'C' or 'Y' | |
None | |
unterminated sequence 'Y' at 'Y': expecting 'Y' | |
(in input 'Y') | |
None | |
[Finished in 0.5s] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fmt_or(items): | |
''' formats a list ['a', 'b', 'c'] as "'a', 'b', or 'c'" ''' | |
if len(items) == 0: | |
return '<none>' | |
elif len(items) <= 2: | |
return ' or '.join(map(repr, items)) | |
else: | |
items = list(map(repr, items)) | |
return ' or '.join(( | |
', '.join(items[:-1]), | |
items[-1] | |
)) | |
def ignore_errors(expr, *exception_types): | |
''' used to wrap a function that may throw, catching specific exception types. | |
usage: ignore_errors(lamda: <function call that may throw>, ExceptionType1, ExceptionType2, ...) | |
''' | |
try: | |
return expr() | |
except Exception as e: | |
if not exception_types or type(e) in exception_types: | |
print(e) | |
else: | |
raise e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment