Last active
March 22, 2017 11:33
-
-
Save arjunattam/9195fc1425d3c5812ca2754fec3b16c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def encode(data, modelers): | |
''' | |
data is a list of vector. each vector | |
has n elements. modelers is a list of | |
n functions, for each element of the vector. | |
''' | |
encoded = '' | |
if not data: | |
return encoded | |
prev = [0 for _ in data[0]] | |
for vector in data: | |
modeled = [modeler(vector[i]) for i, modeler in enumerate(modelers)] | |
# modeled has n dimensions, each dimension is an integer | |
diff = [m - prev[i] for i, m in enumerate(modeled)] | |
for v in diff: | |
v = ~(v << 1) if v < 0 else v << 1 | |
while v >= 0x20: | |
encoded += (chr((0x20 | (v & 0x1f)) + 63)) | |
v >>= 5 | |
encoded += (chr(v + 63)) | |
prev = modeled | |
return encoded | |
# Stock ticker example: 13536 vs 91495 (gzip: 34057) | |
from dateutil import parser | |
price_modeler = lambda x: int(float(x)) | |
date_modeler = lambda x: ( | |
parser.parse(x) - parser.parse('2012-03-22')).days | |
modeler = [ | |
date_modeler, price_modeler, | |
price_modeler, price_modeler, | |
price_modeler, price_modeler, | |
price_modeler | |
] | |
# Time aware polyline example | |
coordinate_modeler = lambda x: int(round(x * 1e5)) | |
datetime_modeler = lambda x: ( | |
parser.parse(x) - parser.parse( | |
'1970-01-01T00:00:00Z')).seconds | |
modeler = [ | |
coordinate_modeler, coordinate_modeler, | |
datetime_modeler | |
] | |
## Decoder | |
def decode(encoded, inverse_modelers): | |
''' | |
''' | |
data = [] | |
n_dimension = len(inverse_modelers) | |
index = 0 | |
state = [0 for _ in xrange(n_dimension)] | |
while index < len(encoded): | |
vector = [] | |
for i in xrange(n_dimension): | |
index, element = get_decoded_dimension(encoded, index) | |
vector.append(element) | |
state = [s + vector[i] for i, s in enumerate(state)] | |
inverse_modeled = [modeler(state[i]) for i, modeler in enumerate(inverse_modelers)] | |
data.append(inverse_modeled) | |
return data | |
def get_decoded_dimension(encoded, index): | |
''' | |
''' | |
result = 1 | |
shift = 0 | |
while True: | |
b = ord(encoded[index]) - 63 - 1 | |
index += 1 | |
result += b << shift | |
shift += 5 | |
if b < 0x1f: | |
break | |
return index, (~result >> 1) if (result & 1) != 0 else (result >> 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment