Skip to content

Instantly share code, notes, and snippets.

@SeijiEmery
Last active August 10, 2020 22:35
Show Gist options
  • Save SeijiEmery/c8a4071bb087a7dc1133b2b4b4bc7528 to your computer and use it in GitHub Desktop.
Save SeijiEmery/c8a4071bb087a7dc1133b2b4b4bc7528 to your computer and use it in GitHub Desktop.
variable length encoding
'''
data-driven variable length encoder :)
(started as a toy program to encode / decode morse code;
extended to cover any variable-length encodings, and works provided that all
encodings are 1-1, ie. bidirectionally unambiguous, which this encoder assumes
but does not check for)
'''
from utils import fmt_or, ignore_errors
class VariableLengthEncoder:
def __init__(self, file=None):
self.encoding_table = {}
self.decoding_table = {}
if file is not None:
self.load_encoding(file)
def add_encoding(self, seq, encoding):
insert_trie(self.encoding_table, seq, encoding)
insert_trie(self.decoding_table, encoding, seq)
def load_encoding(self, file, comment='#'):
with open(file, 'r') as f:
for line in f.readlines():
line = line.split(comment)[0].strip()
if line:
seq, encoding = line.split()
self.add_encoding(seq, encoding)
def encode(self, text):
return trie_decode(self.encoding_table, text)
def decode(self, text):
return trie_decode(self.decoding_table, text)
def insert_trie(d, seq, value):
''' inserts a sequence (and assigns value) into the trie d '''
assert(type(d) == dict)
seq, last = seq[:-1], seq[-1]
# insert elements + get final dict
for elem in seq:
# extend the trie if / as needed
if elem not in d:
d[elem] = {}
# if run into a terminal leaf node, convert into a branch
# w/ a special None value indiciating that this can terminate
if type(d[elem]) != dict:
d[elem] = {
None: d[elem]
}
# walk through the trie
d = d[elem]
# insert final leaf node
d[last] = value
class DecodingException(Exception):
pass
def trie_decode(decoding_table, text, result=''):
tbl = decoding_table
for i, char in enumerate(text):
if char not in tbl:
if None in tbl:
result += tbl[None]
tbl = decoding_table
else:
raise DecodingException(
"decoding error in '%s' at '%s': expecting %s" % (
text, text[i], fmt_or(tbl.keys())
))
entry = tbl[char]
if type(entry) == dict:
tbl = entry
else:
result += entry
tbl = decoding_table
if tbl != decoding_table:
raise DecodingException(
"unterminated sequence '%s' at '%s': expecting %s\n(in input '%s')" % (
text[i], text[:i + 1], fmt_or(tbl.keys()), text))
return result
if __name__ == '__main__':
encoder = VariableLengthEncoder('encoding.txt')
# encode, decode = encoder.encode, encoder.decode
encode = lambda *args: ignore_errors(lambda: encoder.encode(*args), DecodingException)
decode = lambda *args: ignore_errors(lambda: encoder.decode(*args), DecodingException)
print(encoder.encoding_table)
print(encoder.decoding_table)
print(encode('ABC'))
print(encode('ABBC'))
print(decode(encode('ABC')))
print(decode(encode('ABBC')))
print(decode('.'))
print(decode('.--'))
print(decode('XYZ'))
print(encode('XYZ'))
print(encode('Y'))
A .-
B -...
C -.-.
BB --.-
YY -.--
{'A': '.-', 'B': {None: '-...', 'B': '--.-'}, 'C': '-.-.', 'Y': {'Y': '-.--'}}
{'.': {'-': 'A'}, '-': {'.': {'.': {'.': 'B'}, '-': {'.': 'C', '-': 'YY'}}, '-': {'.': {'-': 'BB'}}}}
.--...-.-.
.---.--.-.
ABC
ABBC
unterminated sequence '.' at '.': expecting '-'
(in input '.')
None
unterminated sequence '-' at '.--': expecting '.' or '-'
(in input '.--')
None
decoding error in 'XYZ' at 'X': expecting '.' or '-'
None
decoding error in 'XYZ' at 'X': expecting 'A', 'B', 'C' or 'Y'
None
unterminated sequence 'Y' at 'Y': expecting 'Y'
(in input 'Y')
None
[Finished in 0.5s]
def fmt_or(items):
''' formats a list ['a', 'b', 'c'] as "'a', 'b', or 'c'" '''
if len(items) == 0:
return '<none>'
elif len(items) <= 2:
return ' or '.join(map(repr, items))
else:
items = list(map(repr, items))
return ' or '.join((
', '.join(items[:-1]),
items[-1]
))
def ignore_errors(expr, *exception_types):
''' used to wrap a function that may throw, catching specific exception types.
usage: ignore_errors(lamda: <function call that may throw>, ExceptionType1, ExceptionType2, ...)
'''
try:
return expr()
except Exception as e:
if not exception_types or type(e) in exception_types:
print(e)
else:
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment