Skip to content

Instantly share code, notes, and snippets.

@spotco
Created July 8, 2018 21:07
Show Gist options
  • Select an option

  • Save spotco/cbff37aff9a1d94f8eec1d9484ea34a1 to your computer and use it in GitHub Desktop.

Select an option

Save spotco/cbff37aff9a1d94f8eec1d9484ea34a1 to your computer and use it in GitHub Desktop.
bincompress.py
import sys
import struct
import binascii
def to_bytes(n, length):
h = '%x' % n
s = ('0'*(len(h) % 2) + h).zfill(length*2).decode('hex')
if len(s) > length:
raise Exception("Value(%d) bytes(%d) expected(%d)" % (n,len(s),length))
return s
_type_states = {}
def convert_to_int_repr(val, type):
global _type_states
if not type in _type_states:
_type_states[type] = {
"incr": 0,
"map": {}
}
type_state = _type_states[type]
type_state_map = type_state["map"]
if not val in type_state_map:
incr = type_state["incr"]
type_state_map[val] = incr
type_state["incr"] = incr + 1
return type_state_map[val]
def float_to_bytes(val):
return struct.pack("f",val)
def write_type_state(type, val_bytes_len, file):
global _type_states
type_state_map = _type_states[type]["map"]
for key in type_state_map:
value = type_state_map[key]
file.write(key)
file.write(to_bytes(len(key),1))
file.write(to_bytes(value,val_bytes_len))
file.write(to_bytes(len(type_state_map),2))
_lines = open(sys.argv[1],'r').read().split("\n")
_last_timestamp = int(_lines[0].split(",")[4])
_first_timestamp = _last_timestamp
_output_compressed_file = open(sys.argv[2],'wb')
COLUMN_SYMBOL = "symbol"
COLUMN_SYM3 = "sym3"
COLUMN_TIMESTAMP_DELTA = "timestamp_delta"
COLUMN_ENDINT = "endint"
for line in _lines:
tokens = line.split(",")
if len(tokens) < 8:
continue
cur_timestamp = int(tokens[4])
timestamp_delta = str(cur_timestamp - _last_timestamp)
_output_compressed_file.write(to_bytes(
convert_to_int_repr(tokens[0],COLUMN_SYMBOL),
2
))
_output_compressed_file.write(to_bytes(
convert_to_int_repr(tokens[1] + tokens[2] + tokens[3],COLUMN_SYM3),
2
))
_output_compressed_file.write(to_bytes(
convert_to_int_repr(timestamp_delta,COLUMN_TIMESTAMP_DELTA),
2
))
_output_compressed_file.write(float_to_bytes(float(tokens[6])))
_output_compressed_file.write(to_bytes(
convert_to_int_repr(str(int(tokens[7])),COLUMN_ENDINT),
2
))
_last_timestamp = cur_timestamp
write_type_state(COLUMN_SYMBOL, 2, _output_compressed_file)
write_type_state(COLUMN_SYM3, 2, _output_compressed_file)
write_type_state(COLUMN_TIMESTAMP_DELTA, 2, _output_compressed_file)
write_type_state(COLUMN_ENDINT, 2, _output_compressed_file)
_output_compressed_file.write(to_bytes(_first_timestamp, 4))
_output_compressed_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment