1328 · September 4, 2014 21:58
diff --git a/gistfile1.txt b/gistfile1.txt

 from pprint import pprint
 import dicttoxml
 from xml.dom.minidom import parseString
 
 
 
 
 
 FILES = ['FILE2.COMPAS', 'FILE1.COMPAS']
 
 def read_csv(fn):
    data = []
    with open(fn, mode='r') as fh:
        for r in fh:
            if r:
                data.append(r.strip().split(','))
 
    return data
 
 def parse_hdrs(hdrlist):
    '''
    takes a list of split 'hdr' lines and returns a list of dictionaries
 
    later access will allow lookups like: result[3]['SellerName'] to reference
    the SellerName for the fourth header line fed into this function.
 
    '''
 
    hdr_translation = { 1: 'Header',
                        2: 'blank1',
                        3: 'blank2',
                        4: 'CustomerNumber',
                        5: 'CustomerName',
                        6: 'blank3',
                        7: 'UnitNumber1',
                        8: 'SellerName',
                        9: 'blank4',
                        10: 'blank5',
                        11: 'UnitNumber2',
                        12: 'DeliveryAddress',
                        13: 'blank6',
                        14: 'blank7',
                        15: 'blank8',
                        16: 'blank9',
                        17: 'blank10',
                        18: 'RitterInvoiceNumber',
                        19: 'DateofInvoice',
                        20: 'TaxPointDate',
                        21: 'blank11',
                        22: 'blank12',
                        23: 'CustomerOrderNumber',
                        24: 'blank13',
                        25: 'blank14',
                        26: 'blank15',
                        27: 'blank16',
                        28: 'blank17',
                        29: 'blank18',
                        30: 'blank19',
                        31: 'PaymentTerms',
                        32: 'PaymentDueDate',
                        33: 'discounttobeappliedtoinvoice',
                        34: 'blank20',
                        35: 'blank21',
                        36: 'FileType',
                        37: 'PayTermsCode',
                        }
 
    result = []
    for hdr in hdrlist:
        try:
            header_dict = {name: hdr[field] for field, name in hdr_translation.items()}
            result.append(header_dict)
        except IndexError:
            print('could not parse headers from {}'.format(hdr))
 
    return result
 
 def parse_ilds(ildlist):
 
    ild_translation = { 1: 'InvoiceLines',
                        2: 'Blank1',
                        3: 'Product Code',
                        4: 'Blank2',
                        5: 'Qty',
                        6: 'Blank3',
                        7: 'Selling Unit',
                        8: 'ProductDescription',
                        9: 'blank4',
                        10: 'UnitCostafterdiscount',
                        11: 'ExtendedLinecostexVATincDiscount',
                        12: 'VatCode',
                        13: 'Vat',
                        14: 'UnitCostExVatBeforeDiscount',
                        15: 'DiscountValue',
                        16: 'Discount',
                        }
    result = []
    for ild in ildlist:
        try:
            ild_dict = {name: ild[field] for field, name in ild_translation.items()}
            result.append(ild_dict)
        except IndexError:
            print('could not parse headers from {}'.format(ild))
 
    return result
 
 def parse_stls(stllist):
 
    stl_translation = { 1: 'VatSummary',
                        2: 'VatCode',
                        3: 'VatRate',
                        4: 'VattableAmount',
                        5: 'Blank1',
                        6: 'Blank2',
                        7: 'Blank3',
                        8: 'Blank4',
                        9: 'Blank5',
                        10: 'TotalVattableAmount',
                        11: 'VatAmount',
                        12: 'Blank6',
                        13: 'LineTotalIncVatAmount',
                        }
 
    result = []
    for stl in stllist:
        try:
            stl_dict = {name: stl[field] for field, name in stl_translation.items()}
            result.append(stl_dict)
        except IndexError:
            print('could not parse headers from {}'.format(stl))
 
    return result
 
 def parse_tlrs(tlrlist):
 
    tlr_translation = { 1: 'InvoiceTotal',
                        2: 'Blank1',
                        3: 'Blank2',
                        4: 'Blank3',
                        5: 'Blank4',
                        6: 'Blank5',
                        7: 'Blank6',
                        8: 'TotalBeforeVat(subtotalsaddedtogether)',
                        9: 'VatAmount',
                        10: 'Blank7',
                        11: 'InvoiceTotal',
                        }
 
    result = []
    for tlr in tlrlist:
        try:
            tlr_dict = {name: tlr[field] for field, name in tlr_translation.items()}
            result.append(tlr_dict)
        except IndexError:
            print('could not parse headers from {}'.format(tlr))
 
    return result
 
 def split(data):
    hdrlist = []
    ildlist = []
    stllist = []
    tlrlist = []
 
    for line in data:
        if line[0] == '"HDR"':
            hdrlist.append(line)
        elif line[0] == '"ILD"':
            ildlist.append(line)
        elif line[0] == '"STL"':
            stllist.append(line)
        elif line[0] == '"TLR"':
            tlrlist.append(line)
        else:
            print("!!!!! Unexpected item in file !!!!!")
            print(line)
            print("!!!!! Unexpected item in file !!!!!")
 
    return hdrlist, ildlist, stllist, tlrlist,
 
 
 def strip_quotes(l):
    res = []
    for d in l:
        res.append( {k:v.strip('"') for k,v in d.items()})
    return res

 
 def process(fn):
 
    data = read_csv(fn)
    hdrlist, ildlist, stllist, tlrlist = split(data)
 
    all_hdrs = parse_hdrs(hdrlist)
    all_ilds = parse_ilds(ildlist)
    all_stls = parse_stls(stllist)
    all_tlrs = parse_tlrs(tlrlist)
 
    print ("There is", len(all_hdrs), "rows in header")
    print ("There is", len(all_ilds), "rows in ild")
    print ("There is", len(all_stls), "rows in stl")
    print ("There is", len(all_tlrs), "rows in tlr")
 
    # the quotes were bugging me, so I dropped them
    # feel free to delete, it should still work
    all_hdrs = strip_quotes(all_hdrs)
    all_ilds = strip_quotes(all_ilds)
    all_stls = strip_quotes(all_stls)
    all_tlrs = strip_quotes(all_tlrs)

    combined = { fn: [
            {'hdrs':all_hdrs},
            {'ilds':all_ilds},
            {'stls':all_stls},
            {'tlrs':all_tlrs},
            ]
            }

    return combined

 def to_xml(combined):
 
    xml = dicttoxml.dicttoxml(combined, 
            custom_root='xml_file', # looks like it does not like spaces here
            attr_type=False,)
    pprint(xml)
    dom = parseString(xml)
    print(dom.toprettyxml())
    return(dom)
 
 def main():
    combined = {}
    for filename in FILES:
        new = process(filename)
        combined.update(new)
    pprint(combined)
    xml = to_xml(combined)
 
 if __name__ == '__main__':
 
    main()

	from pprint import pprint
	import dicttoxml
	from xml.dom.minidom import parseString





	FILES = ['FILE2.COMPAS', 'FILE1.COMPAS']

	def read_csv(fn):
	data = []
	with open(fn, mode='r') as fh:
	for r in fh:
	if r:
	data.append(r.strip().split(','))

	return data

	def parse_hdrs(hdrlist):
	'''
	takes a list of split 'hdr' lines and returns a list of dictionaries

	later access will allow lookups like: result[3]['SellerName'] to reference
	the SellerName for the fourth header line fed into this function.

	'''

	hdr_translation = { 1: 'Header',
	2: 'blank1',
	3: 'blank2',
	4: 'CustomerNumber',
	5: 'CustomerName',
	6: 'blank3',
	7: 'UnitNumber1',
	8: 'SellerName',
	9: 'blank4',
	10: 'blank5',
	11: 'UnitNumber2',
	12: 'DeliveryAddress',
	13: 'blank6',
	14: 'blank7',
	15: 'blank8',
	16: 'blank9',
	17: 'blank10',
	18: 'RitterInvoiceNumber',
	19: 'DateofInvoice',
	20: 'TaxPointDate',
	21: 'blank11',
	22: 'blank12',
	23: 'CustomerOrderNumber',
	24: 'blank13',
	25: 'blank14',
	26: 'blank15',
	27: 'blank16',
	28: 'blank17',
	29: 'blank18',
	30: 'blank19',
	31: 'PaymentTerms',
	32: 'PaymentDueDate',
	33: 'discounttobeappliedtoinvoice',
	34: 'blank20',
	35: 'blank21',
	36: 'FileType',
	37: 'PayTermsCode',
	}

	result = []
	for hdr in hdrlist:
	try:
	header_dict = {name: hdr[field] for field, name in hdr_translation.items()}
	result.append(header_dict)
	except IndexError:
	print('could not parse headers from {}'.format(hdr))

	return result

	def parse_ilds(ildlist):

	ild_translation = { 1: 'InvoiceLines',
	2: 'Blank1',
	3: 'Product Code',
	4: 'Blank2',
	5: 'Qty',
	6: 'Blank3',
	7: 'Selling Unit',
	8: 'ProductDescription',
	9: 'blank4',
	10: 'UnitCostafterdiscount',
	11: 'ExtendedLinecostexVATincDiscount',
	12: 'VatCode',
	13: 'Vat',
	14: 'UnitCostExVatBeforeDiscount',
	15: 'DiscountValue',
	16: 'Discount',
	}
	result = []
	for ild in ildlist:
	try:
	ild_dict = {name: ild[field] for field, name in ild_translation.items()}
	result.append(ild_dict)
	except IndexError:
	print('could not parse headers from {}'.format(ild))

	return result

	def parse_stls(stllist):

	stl_translation = { 1: 'VatSummary',
	2: 'VatCode',
	3: 'VatRate',
	4: 'VattableAmount',
	5: 'Blank1',
	6: 'Blank2',
	7: 'Blank3',
	8: 'Blank4',
	9: 'Blank5',
	10: 'TotalVattableAmount',
	11: 'VatAmount',
	12: 'Blank6',
	13: 'LineTotalIncVatAmount',
	}

	result = []
	for stl in stllist:
	try:
	stl_dict = {name: stl[field] for field, name in stl_translation.items()}
	result.append(stl_dict)
	except IndexError:
	print('could not parse headers from {}'.format(stl))

	return result

	def parse_tlrs(tlrlist):

	tlr_translation = { 1: 'InvoiceTotal',
	2: 'Blank1',
	3: 'Blank2',
	4: 'Blank3',
	5: 'Blank4',
	6: 'Blank5',
	7: 'Blank6',
	8: 'TotalBeforeVat(subtotalsaddedtogether)',
	9: 'VatAmount',
	10: 'Blank7',
	11: 'InvoiceTotal',
	}

	result = []
	for tlr in tlrlist:
	try:
	tlr_dict = {name: tlr[field] for field, name in tlr_translation.items()}
	result.append(tlr_dict)
	except IndexError:
	print('could not parse headers from {}'.format(tlr))

	return result

	def split(data):
	hdrlist = []
	ildlist = []
	stllist = []
	tlrlist = []

	for line in data:
	if line[0] == '"HDR"':
	hdrlist.append(line)
	elif line[0] == '"ILD"':
	ildlist.append(line)
	elif line[0] == '"STL"':
	stllist.append(line)
	elif line[0] == '"TLR"':
	tlrlist.append(line)
	else:
	print("!!!!! Unexpected item in file !!!!!")
	print(line)
	print("!!!!! Unexpected item in file !!!!!")

	return hdrlist, ildlist, stllist, tlrlist,


	def strip_quotes(l):
	res = []
	for d in l:
	res.append( {k:v.strip('"') for k,v in d.items()})
	return res


	def process(fn):

	data = read_csv(fn)
	hdrlist, ildlist, stllist, tlrlist = split(data)

	all_hdrs = parse_hdrs(hdrlist)
	all_ilds = parse_ilds(ildlist)
	all_stls = parse_stls(stllist)
	all_tlrs = parse_tlrs(tlrlist)

	print ("There is", len(all_hdrs), "rows in header")
	print ("There is", len(all_ilds), "rows in ild")
	print ("There is", len(all_stls), "rows in stl")
	print ("There is", len(all_tlrs), "rows in tlr")

	# the quotes were bugging me, so I dropped them
	# feel free to delete, it should still work
	all_hdrs = strip_quotes(all_hdrs)
	all_ilds = strip_quotes(all_ilds)
	all_stls = strip_quotes(all_stls)
	all_tlrs = strip_quotes(all_tlrs)

	combined = { fn: [
	{'hdrs':all_hdrs},
	{'ilds':all_ilds},
	{'stls':all_stls},
	{'tlrs':all_tlrs},
	]
	}

	return combined

	def to_xml(combined):

	xml = dicttoxml.dicttoxml(combined,
	custom_root='xml_file', # looks like it does not like spaces here
	attr_type=False,)
	pprint(xml)
	dom = parseString(xml)
	print(dom.toprettyxml())
	return(dom)

	def main():
	combined = {}
	for filename in FILES:
	new = process(filename)
	combined.update(new)
	pprint(combined)
	xml = to_xml(combined)

	if __name__ == '__main__':

	main()
No results found