Skip to content

Instantly share code, notes, and snippets.

@arjunattam
Last active March 22, 2017 11:33
Show Gist options
  • Save arjunattam/9195fc1425d3c5812ca2754fec3b16c1 to your computer and use it in GitHub Desktop.
Save arjunattam/9195fc1425d3c5812ca2754fec3b16c1 to your computer and use it in GitHub Desktop.
def encode(data, modelers):
'''
data is a list of vector. each vector
has n elements. modelers is a list of
n functions, for each element of the vector.
'''
encoded = ''
if not data:
return encoded
prev = [0 for _ in data[0]]
for vector in data:
modeled = [modeler(vector[i]) for i, modeler in enumerate(modelers)]
# modeled has n dimensions, each dimension is an integer
diff = [m - prev[i] for i, m in enumerate(modeled)]
for v in diff:
v = ~(v << 1) if v < 0 else v << 1
while v >= 0x20:
encoded += (chr((0x20 | (v & 0x1f)) + 63))
v >>= 5
encoded += (chr(v + 63))
prev = modeled
return encoded
# Stock ticker example: 13536 vs 91495 (gzip: 34057)
from dateutil import parser
price_modeler = lambda x: int(float(x))
date_modeler = lambda x: (
parser.parse(x) - parser.parse('2012-03-22')).days
modeler = [
date_modeler, price_modeler,
price_modeler, price_modeler,
price_modeler, price_modeler,
price_modeler
]
# Time aware polyline example
coordinate_modeler = lambda x: int(round(x * 1e5))
datetime_modeler = lambda x: (
parser.parse(x) - parser.parse(
'1970-01-01T00:00:00Z')).seconds
modeler = [
coordinate_modeler, coordinate_modeler,
datetime_modeler
]
## Decoder
def decode(encoded, inverse_modelers):
'''
'''
data = []
n_dimension = len(inverse_modelers)
index = 0
state = [0 for _ in xrange(n_dimension)]
while index < len(encoded):
vector = []
for i in xrange(n_dimension):
index, element = get_decoded_dimension(encoded, index)
vector.append(element)
state = [s + vector[i] for i, s in enumerate(state)]
inverse_modeled = [modeler(state[i]) for i, modeler in enumerate(inverse_modelers)]
data.append(inverse_modeled)
return data
def get_decoded_dimension(encoded, index):
'''
'''
result = 1
shift = 0
while True:
b = ord(encoded[index]) - 63 - 1
index += 1
result += b << shift
shift += 5
if b < 0x1f:
break
return index, (~result >> 1) if (result & 1) != 0 else (result >> 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment