Created
September 4, 2014 21:58
-
-
Save 1328/64c01ce440a223c2f0e1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pprint import pprint | |
| import dicttoxml | |
| from xml.dom.minidom import parseString | |
| FILES = ['FILE2.COMPAS', 'FILE1.COMPAS'] | |
| def read_csv(fn): | |
| data = [] | |
| with open(fn, mode='r') as fh: | |
| for r in fh: | |
| if r: | |
| data.append(r.strip().split(',')) | |
| return data | |
| def parse_hdrs(hdrlist): | |
| ''' | |
| takes a list of split 'hdr' lines and returns a list of dictionaries | |
| later access will allow lookups like: result[3]['SellerName'] to reference | |
| the SellerName for the fourth header line fed into this function. | |
| ''' | |
| hdr_translation = { 1: 'Header', | |
| 2: 'blank1', | |
| 3: 'blank2', | |
| 4: 'CustomerNumber', | |
| 5: 'CustomerName', | |
| 6: 'blank3', | |
| 7: 'UnitNumber1', | |
| 8: 'SellerName', | |
| 9: 'blank4', | |
| 10: 'blank5', | |
| 11: 'UnitNumber2', | |
| 12: 'DeliveryAddress', | |
| 13: 'blank6', | |
| 14: 'blank7', | |
| 15: 'blank8', | |
| 16: 'blank9', | |
| 17: 'blank10', | |
| 18: 'RitterInvoiceNumber', | |
| 19: 'DateofInvoice', | |
| 20: 'TaxPointDate', | |
| 21: 'blank11', | |
| 22: 'blank12', | |
| 23: 'CustomerOrderNumber', | |
| 24: 'blank13', | |
| 25: 'blank14', | |
| 26: 'blank15', | |
| 27: 'blank16', | |
| 28: 'blank17', | |
| 29: 'blank18', | |
| 30: 'blank19', | |
| 31: 'PaymentTerms', | |
| 32: 'PaymentDueDate', | |
| 33: 'discounttobeappliedtoinvoice', | |
| 34: 'blank20', | |
| 35: 'blank21', | |
| 36: 'FileType', | |
| 37: 'PayTermsCode', | |
| } | |
| result = [] | |
| for hdr in hdrlist: | |
| try: | |
| header_dict = {name: hdr[field] for field, name in hdr_translation.items()} | |
| result.append(header_dict) | |
| except IndexError: | |
| print('could not parse headers from {}'.format(hdr)) | |
| return result | |
| def parse_ilds(ildlist): | |
| ild_translation = { 1: 'InvoiceLines', | |
| 2: 'Blank1', | |
| 3: 'Product Code', | |
| 4: 'Blank2', | |
| 5: 'Qty', | |
| 6: 'Blank3', | |
| 7: 'Selling Unit', | |
| 8: 'ProductDescription', | |
| 9: 'blank4', | |
| 10: 'UnitCostafterdiscount', | |
| 11: 'ExtendedLinecostexVATincDiscount', | |
| 12: 'VatCode', | |
| 13: 'Vat', | |
| 14: 'UnitCostExVatBeforeDiscount', | |
| 15: 'DiscountValue', | |
| 16: 'Discount', | |
| } | |
| result = [] | |
| for ild in ildlist: | |
| try: | |
| ild_dict = {name: ild[field] for field, name in ild_translation.items()} | |
| result.append(ild_dict) | |
| except IndexError: | |
| print('could not parse headers from {}'.format(ild)) | |
| return result | |
| def parse_stls(stllist): | |
| stl_translation = { 1: 'VatSummary', | |
| 2: 'VatCode', | |
| 3: 'VatRate', | |
| 4: 'VattableAmount', | |
| 5: 'Blank1', | |
| 6: 'Blank2', | |
| 7: 'Blank3', | |
| 8: 'Blank4', | |
| 9: 'Blank5', | |
| 10: 'TotalVattableAmount', | |
| 11: 'VatAmount', | |
| 12: 'Blank6', | |
| 13: 'LineTotalIncVatAmount', | |
| } | |
| result = [] | |
| for stl in stllist: | |
| try: | |
| stl_dict = {name: stl[field] for field, name in stl_translation.items()} | |
| result.append(stl_dict) | |
| except IndexError: | |
| print('could not parse headers from {}'.format(stl)) | |
| return result | |
| def parse_tlrs(tlrlist): | |
| tlr_translation = { 1: 'InvoiceTotal', | |
| 2: 'Blank1', | |
| 3: 'Blank2', | |
| 4: 'Blank3', | |
| 5: 'Blank4', | |
| 6: 'Blank5', | |
| 7: 'Blank6', | |
| 8: 'TotalBeforeVat(subtotalsaddedtogether)', | |
| 9: 'VatAmount', | |
| 10: 'Blank7', | |
| 11: 'InvoiceTotal', | |
| } | |
| result = [] | |
| for tlr in tlrlist: | |
| try: | |
| tlr_dict = {name: tlr[field] for field, name in tlr_translation.items()} | |
| result.append(tlr_dict) | |
| except IndexError: | |
| print('could not parse headers from {}'.format(tlr)) | |
| return result | |
| def split(data): | |
| hdrlist = [] | |
| ildlist = [] | |
| stllist = [] | |
| tlrlist = [] | |
| for line in data: | |
| if line[0] == '"HDR"': | |
| hdrlist.append(line) | |
| elif line[0] == '"ILD"': | |
| ildlist.append(line) | |
| elif line[0] == '"STL"': | |
| stllist.append(line) | |
| elif line[0] == '"TLR"': | |
| tlrlist.append(line) | |
| else: | |
| print("!!!!! Unexpected item in file !!!!!") | |
| print(line) | |
| print("!!!!! Unexpected item in file !!!!!") | |
| return hdrlist, ildlist, stllist, tlrlist, | |
| def strip_quotes(l): | |
| res = [] | |
| for d in l: | |
| res.append( {k:v.strip('"') for k,v in d.items()}) | |
| return res | |
| def process(fn): | |
| data = read_csv(fn) | |
| hdrlist, ildlist, stllist, tlrlist = split(data) | |
| all_hdrs = parse_hdrs(hdrlist) | |
| all_ilds = parse_ilds(ildlist) | |
| all_stls = parse_stls(stllist) | |
| all_tlrs = parse_tlrs(tlrlist) | |
| print ("There is", len(all_hdrs), "rows in header") | |
| print ("There is", len(all_ilds), "rows in ild") | |
| print ("There is", len(all_stls), "rows in stl") | |
| print ("There is", len(all_tlrs), "rows in tlr") | |
| # the quotes were bugging me, so I dropped them | |
| # feel free to delete, it should still work | |
| all_hdrs = strip_quotes(all_hdrs) | |
| all_ilds = strip_quotes(all_ilds) | |
| all_stls = strip_quotes(all_stls) | |
| all_tlrs = strip_quotes(all_tlrs) | |
| combined = { fn: [ | |
| {'hdrs':all_hdrs}, | |
| {'ilds':all_ilds}, | |
| {'stls':all_stls}, | |
| {'tlrs':all_tlrs}, | |
| ] | |
| } | |
| return combined | |
| def to_xml(combined): | |
| xml = dicttoxml.dicttoxml(combined, | |
| custom_root='xml_file', # looks like it does not like spaces here | |
| attr_type=False,) | |
| pprint(xml) | |
| dom = parseString(xml) | |
| print(dom.toprettyxml()) | |
| return(dom) | |
| def main(): | |
| combined = {} | |
| for filename in FILES: | |
| new = process(filename) | |
| combined.update(new) | |
| pprint(combined) | |
| xml = to_xml(combined) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment