Skip to content

Instantly share code, notes, and snippets.

@1328
Created February 20, 2015 16:35
Show Gist options
  • Select an option

  • Save 1328/ac768193c9debdc6be87 to your computer and use it in GitHub Desktop.

Select an option

Save 1328/ac768193c9debdc6be87 to your computer and use it in GitHub Desktop.
parser
import time
import datetime
import os
import csv
import re
import logging
from pprint import pprint
from collections import namedtuple, defaultdict
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
l1 = '''
2014:12:27:00:00:36:GMT: subject=BMRA.BM.T_ARCHW-1.FPN, message={SD=2014:12:27:00:00:00:GMT,SP=3,NP=3,TS=2014:12:27:01:00:00:GMT,VP=6.0, TS=2014:12:27:01:01:00:GMT,VP=4.0,TS=2014:12:27:01:30:00:GMT,VP=4.0}
'''.strip()
l = '''
2014:12:27:03:01:28:GMT: subject=BMRA.BM.T_FARR-2.MEL, message={SD=2014:12:27:00:00:00:GMT,SP=9,NP=2,TS=2014:12:27:04:00:00:GMT,VE=46.0,TS=2014:12:27:04:30:00:GMT,VE=46.0}
'''.strip()
#l = l1
#WARNING:root:Could not find transactions of type "MEL" in line 2014:12:27:03:01:28:GMT: subject=BMRA
#.BM.T_FARR-2.MEL, message={SD=2014:12:27:00:00:00:GMT,SP=9,NP=2,TS=2014:12:27:04:00:00:GMT,VE=46.0,T
#S=2014:12:27:04:30:00:GMT,VE=46.0}
RE_PARTY_TYPE = re.compile(r'subject=BMRA\.BM\.(?P<NAME>.+?)\.(?P<TX>.+?),')
RE_VP = re.compile(r'TS=(?P<TS>.+?),VP=(?P<VALUE>.+?)[},]')
RE_VF = re.compile(r'TS=(?P<TS>.+?),VF=(?P<VALUE>.+?)[},]')
RE_VB = re.compile(r'TS=(?P<TS>.+?),VB=(?P<VALUE>.+?)[},]')
RE_VE = re.compile(r'TS=(?P<TS>.+?),VE=(?P<VALUE>.+?)[},]')
RE_DATA_TYPES ={
'FPN':RE_VP,
'QPN':RE_VP,
'MIL':RE_VF,
'MEL':RE_VE,
'BOD.-1':RE_VB,
'BOD.-2':RE_VB,
'BOD.-3':RE_VB,
'BOD.-4':RE_VB,
'BOD.-5':RE_VB,
'BOD.1':RE_VB,
'BOD.2':RE_VB,
'BOD.3':RE_VB,
'BOD.4':RE_VB,
'BOD.5':RE_VB,
}
Record = namedtuple('Record',['name','type', 'value', 'time'])
def process_time(time_string):
''' take a time string and turn into a timedate object '''
# split on :, drop the last element ('GMT')
time_tuple=map(int, time_string.split(':')[:-1])
time=datetime.datetime(*time_tuple)
return time
def find_values(tx_type, line):
''' generate all transaction times/values from line of tx_type data '''
found = RE_DATA_TYPES[tx_type].findall(line)
if len(found) <1:
logger.warning('Could not find transactions of type "{}" in line {}'.format(
tx_type, line))
for time, value in found:
time = process_time(time)
yield time, value
def process_line(line):
''' process a line and return a Record for each transaction identified '''
match = RE_PARTY_TYPE.search(line)
if not match:
logger.warning('Count not find party/type in {}'.format(line))
raise StopIteration
name = match.group('NAME')
tx_type = match.group('TX')
for time, value in find_values(tx_type, line):
yield Record(name, tx_type, value, time)
def process_transactions(data):
''' process a single data file,
returns a sorted dictionary {party:[list of txs sorted by time}
'''
result = defaultdict(list)
for line in data:
for tx in process_line(line):
result[tx.name].append(tx)
result = {k:sorted(v, key = lambda x:x.time) for k,v in result.items()}
return result
def get_data(fn = 'bm.txt'):
with open(fn) as fh:
data = [l.strip() for l in fh]
return data
def main():
#print(list(process_line(l)))
#return
data = get_data()
records = process_transactions(data)
pprint(records)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment