-
-
Save 1328/ac768193c9debdc6be87 to your computer and use it in GitHub Desktop.
parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import time | |
| import datetime | |
| import os | |
| import csv | |
| import re | |
| import logging | |
| from pprint import pprint | |
| from collections import namedtuple, defaultdict | |
| logging.basicConfig(level=logging.DEBUG) | |
| logger = logging.getLogger() | |
| l1 = ''' | |
| 2014:12:27:00:00:36:GMT: subject=BMRA.BM.T_ARCHW-1.FPN, message={SD=2014:12:27:00:00:00:GMT,SP=3,NP=3,TS=2014:12:27:01:00:00:GMT,VP=6.0, TS=2014:12:27:01:01:00:GMT,VP=4.0,TS=2014:12:27:01:30:00:GMT,VP=4.0} | |
| '''.strip() | |
| l = ''' | |
| 2014:12:27:03:01:28:GMT: subject=BMRA.BM.T_FARR-2.MEL, message={SD=2014:12:27:00:00:00:GMT,SP=9,NP=2,TS=2014:12:27:04:00:00:GMT,VE=46.0,TS=2014:12:27:04:30:00:GMT,VE=46.0} | |
| '''.strip() | |
| #l = l1 | |
| #WARNING:root:Could not find transactions of type "MEL" in line 2014:12:27:03:01:28:GMT: subject=BMRA | |
| #.BM.T_FARR-2.MEL, message={SD=2014:12:27:00:00:00:GMT,SP=9,NP=2,TS=2014:12:27:04:00:00:GMT,VE=46.0,T | |
| #S=2014:12:27:04:30:00:GMT,VE=46.0} | |
| RE_PARTY_TYPE = re.compile(r'subject=BMRA\.BM\.(?P<NAME>.+?)\.(?P<TX>.+?),') | |
| RE_VP = re.compile(r'TS=(?P<TS>.+?),VP=(?P<VALUE>.+?)[},]') | |
| RE_VF = re.compile(r'TS=(?P<TS>.+?),VF=(?P<VALUE>.+?)[},]') | |
| RE_VB = re.compile(r'TS=(?P<TS>.+?),VB=(?P<VALUE>.+?)[},]') | |
| RE_VE = re.compile(r'TS=(?P<TS>.+?),VE=(?P<VALUE>.+?)[},]') | |
| RE_DATA_TYPES ={ | |
| 'FPN':RE_VP, | |
| 'QPN':RE_VP, | |
| 'MIL':RE_VF, | |
| 'MEL':RE_VE, | |
| 'BOD.-1':RE_VB, | |
| 'BOD.-2':RE_VB, | |
| 'BOD.-3':RE_VB, | |
| 'BOD.-4':RE_VB, | |
| 'BOD.-5':RE_VB, | |
| 'BOD.1':RE_VB, | |
| 'BOD.2':RE_VB, | |
| 'BOD.3':RE_VB, | |
| 'BOD.4':RE_VB, | |
| 'BOD.5':RE_VB, | |
| } | |
| Record = namedtuple('Record',['name','type', 'value', 'time']) | |
| def process_time(time_string): | |
| ''' take a time string and turn into a timedate object ''' | |
| # split on :, drop the last element ('GMT') | |
| time_tuple=map(int, time_string.split(':')[:-1]) | |
| time=datetime.datetime(*time_tuple) | |
| return time | |
| def find_values(tx_type, line): | |
| ''' generate all transaction times/values from line of tx_type data ''' | |
| found = RE_DATA_TYPES[tx_type].findall(line) | |
| if len(found) <1: | |
| logger.warning('Could not find transactions of type "{}" in line {}'.format( | |
| tx_type, line)) | |
| for time, value in found: | |
| time = process_time(time) | |
| yield time, value | |
| def process_line(line): | |
| ''' process a line and return a Record for each transaction identified ''' | |
| match = RE_PARTY_TYPE.search(line) | |
| if not match: | |
| logger.warning('Count not find party/type in {}'.format(line)) | |
| raise StopIteration | |
| name = match.group('NAME') | |
| tx_type = match.group('TX') | |
| for time, value in find_values(tx_type, line): | |
| yield Record(name, tx_type, value, time) | |
| def process_transactions(data): | |
| ''' process a single data file, | |
| returns a sorted dictionary {party:[list of txs sorted by time} | |
| ''' | |
| result = defaultdict(list) | |
| for line in data: | |
| for tx in process_line(line): | |
| result[tx.name].append(tx) | |
| result = {k:sorted(v, key = lambda x:x.time) for k,v in result.items()} | |
| return result | |
| def get_data(fn = 'bm.txt'): | |
| with open(fn) as fh: | |
| data = [l.strip() for l in fh] | |
| return data | |
| def main(): | |
| #print(list(process_line(l))) | |
| #return | |
| data = get_data() | |
| records = process_transactions(data) | |
| pprint(records) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment