Skip to content

Instantly share code, notes, and snippets.

@1328
Created September 4, 2014 21:58
Show Gist options
  • Select an option

  • Save 1328/64c01ce440a223c2f0e1 to your computer and use it in GitHub Desktop.

Select an option

Save 1328/64c01ce440a223c2f0e1 to your computer and use it in GitHub Desktop.
from pprint import pprint
import dicttoxml
from xml.dom.minidom import parseString
FILES = ['FILE2.COMPAS', 'FILE1.COMPAS']
def read_csv(fn):
data = []
with open(fn, mode='r') as fh:
for r in fh:
if r:
data.append(r.strip().split(','))
return data
def parse_hdrs(hdrlist):
'''
takes a list of split 'hdr' lines and returns a list of dictionaries
later access will allow lookups like: result[3]['SellerName'] to reference
the SellerName for the fourth header line fed into this function.
'''
hdr_translation = { 1: 'Header',
2: 'blank1',
3: 'blank2',
4: 'CustomerNumber',
5: 'CustomerName',
6: 'blank3',
7: 'UnitNumber1',
8: 'SellerName',
9: 'blank4',
10: 'blank5',
11: 'UnitNumber2',
12: 'DeliveryAddress',
13: 'blank6',
14: 'blank7',
15: 'blank8',
16: 'blank9',
17: 'blank10',
18: 'RitterInvoiceNumber',
19: 'DateofInvoice',
20: 'TaxPointDate',
21: 'blank11',
22: 'blank12',
23: 'CustomerOrderNumber',
24: 'blank13',
25: 'blank14',
26: 'blank15',
27: 'blank16',
28: 'blank17',
29: 'blank18',
30: 'blank19',
31: 'PaymentTerms',
32: 'PaymentDueDate',
33: 'discounttobeappliedtoinvoice',
34: 'blank20',
35: 'blank21',
36: 'FileType',
37: 'PayTermsCode',
}
result = []
for hdr in hdrlist:
try:
header_dict = {name: hdr[field] for field, name in hdr_translation.items()}
result.append(header_dict)
except IndexError:
print('could not parse headers from {}'.format(hdr))
return result
def parse_ilds(ildlist):
ild_translation = { 1: 'InvoiceLines',
2: 'Blank1',
3: 'Product Code',
4: 'Blank2',
5: 'Qty',
6: 'Blank3',
7: 'Selling Unit',
8: 'ProductDescription',
9: 'blank4',
10: 'UnitCostafterdiscount',
11: 'ExtendedLinecostexVATincDiscount',
12: 'VatCode',
13: 'Vat',
14: 'UnitCostExVatBeforeDiscount',
15: 'DiscountValue',
16: 'Discount',
}
result = []
for ild in ildlist:
try:
ild_dict = {name: ild[field] for field, name in ild_translation.items()}
result.append(ild_dict)
except IndexError:
print('could not parse headers from {}'.format(ild))
return result
def parse_stls(stllist):
stl_translation = { 1: 'VatSummary',
2: 'VatCode',
3: 'VatRate',
4: 'VattableAmount',
5: 'Blank1',
6: 'Blank2',
7: 'Blank3',
8: 'Blank4',
9: 'Blank5',
10: 'TotalVattableAmount',
11: 'VatAmount',
12: 'Blank6',
13: 'LineTotalIncVatAmount',
}
result = []
for stl in stllist:
try:
stl_dict = {name: stl[field] for field, name in stl_translation.items()}
result.append(stl_dict)
except IndexError:
print('could not parse headers from {}'.format(stl))
return result
def parse_tlrs(tlrlist):
tlr_translation = { 1: 'InvoiceTotal',
2: 'Blank1',
3: 'Blank2',
4: 'Blank3',
5: 'Blank4',
6: 'Blank5',
7: 'Blank6',
8: 'TotalBeforeVat(subtotalsaddedtogether)',
9: 'VatAmount',
10: 'Blank7',
11: 'InvoiceTotal',
}
result = []
for tlr in tlrlist:
try:
tlr_dict = {name: tlr[field] for field, name in tlr_translation.items()}
result.append(tlr_dict)
except IndexError:
print('could not parse headers from {}'.format(tlr))
return result
def split(data):
hdrlist = []
ildlist = []
stllist = []
tlrlist = []
for line in data:
if line[0] == '"HDR"':
hdrlist.append(line)
elif line[0] == '"ILD"':
ildlist.append(line)
elif line[0] == '"STL"':
stllist.append(line)
elif line[0] == '"TLR"':
tlrlist.append(line)
else:
print("!!!!! Unexpected item in file !!!!!")
print(line)
print("!!!!! Unexpected item in file !!!!!")
return hdrlist, ildlist, stllist, tlrlist,
def strip_quotes(l):
res = []
for d in l:
res.append( {k:v.strip('"') for k,v in d.items()})
return res
def process(fn):
data = read_csv(fn)
hdrlist, ildlist, stllist, tlrlist = split(data)
all_hdrs = parse_hdrs(hdrlist)
all_ilds = parse_ilds(ildlist)
all_stls = parse_stls(stllist)
all_tlrs = parse_tlrs(tlrlist)
print ("There is", len(all_hdrs), "rows in header")
print ("There is", len(all_ilds), "rows in ild")
print ("There is", len(all_stls), "rows in stl")
print ("There is", len(all_tlrs), "rows in tlr")
# the quotes were bugging me, so I dropped them
# feel free to delete, it should still work
all_hdrs = strip_quotes(all_hdrs)
all_ilds = strip_quotes(all_ilds)
all_stls = strip_quotes(all_stls)
all_tlrs = strip_quotes(all_tlrs)
combined = { fn: [
{'hdrs':all_hdrs},
{'ilds':all_ilds},
{'stls':all_stls},
{'tlrs':all_tlrs},
]
}
return combined
def to_xml(combined):
xml = dicttoxml.dicttoxml(combined,
custom_root='xml_file', # looks like it does not like spaces here
attr_type=False,)
pprint(xml)
dom = parseString(xml)
print(dom.toprettyxml())
return(dom)
def main():
combined = {}
for filename in FILES:
new = process(filename)
combined.update(new)
pprint(combined)
xml = to_xml(combined)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment