bfocht · August 29, 2015 14:07
diff --git a/readprogressdumps.py b/readprogressdumps.py
 #!/usr/bin/python

 #Example usage
 #python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep
 # -i input file is a binary dump file from Progress Software database
 # -o output file
 # -b columns that contain binary integers
 # -p columns that contain packed decimals
 # -h column headers  (first row)
 # --debug  (make first row contain column number instead of column header

 import struct 
 from decimal import Decimal
 import csv
 import sys,getopt

 def unpack(val):
 	fld  = struct.unpack('B',val)[0]
 	return int(fld)

 def getSize(fileobject):
    fileobject.seek(0,2) # move the cursor to the end of the file
    size = fileobject.tell()
    return size

 def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no):
 	with open(inputfile, 'rb') as f:
 		w = open(outputfile, 'wb')
 		writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC

 		print 'Input file is ', inputfile
 		print 'Output file is ', outputfile
 		
 		filesize = getSize(f)
 		print 'filesize is %dkb' % filesize

 		f.seek(312) #field sepearater
 		sep = f.read(1)
 		f.seek(320)
 		fldcount = unpack(f.read(1))
 		print 'field count is %d' % fldcount

 		f.seek(636) #jump to first record
 		while  f.read(1) != sep  and f.tell() < filesize:
 				pass

 		
 		writer.writerow(header)
 		#create header based on field number
 		if show_field_no:
 			currentfld=1
 			header=[]
 			while currentfld < fldcount:
 				header.append("%d" %(currentfld))
 				currentfld+=1
 			writer.writerow(header)
 		

 		lineno = 0		
 		#process file
 	 	while  f.tell() < filesize:
 			lineno +=1
 			val = f.read(1) 		
 			fld  = struct.unpack('B',val)[0]
 			num = int(fld)
 			if num == 231:
 					val =f.read(1)
 					fld1  = struct.unpack('B',val)[0]
 					val =f.read(1)
 					fld2  = struct.unpack('B',val)[0]
 					num = 256 * fld1 + fld2
 					f.read(num)
 			if num != 1:
 				val = f.read(1) 		
 				fld  = struct.unpack('B',val)[0]
 				num = int(fld)
 			val = f.read(num)
 			if  val != sep:
 				print 'invalid start character found on line number %d ...attempting to recover' % lineno
 				f.seek(f.tell()-num-12)
 				while  int(struct.unpack('B',f.read(1))[0]) != 231  and f.tell() < filesize:
 					pass
 				f.seek(f.tell()-1)
 				continue

 		 	array=[]
 		 	currentfld=1
 		 	while currentfld < fldcount and f.tell() < filesize:	 		
 		 		val = f.read(1) 		
 				fld  = struct.unpack('B',val)[0]
 				num = int(fld)
 				#char string
 				if num == 250:
 					val =f.read(2)
 					charlen  = struct.unpack('>h',val)[0]
 					charcount  = struct.unpack('>b',f.read(1))[0]
 					if charlen > 0: 
 						encvalue = ''
 						for x in range(1, charlen):
 							val = f.read(1)
 							num = struct.unpack('>b',val)[0]
 							if num == -1:
 								pass
 							elif num < 32:
 								encvalue += str(num)
 							else:
 								encvalue += val
 						array.append(encvalue)
 					else :
 						array.append('')
 				elif num == 230: 
 					val =f.read(2)
 					charlen  = struct.unpack('>h',val)[0]
 					if charlen > 0:
 						array.append(f.read(charlen))
 					else :
 						array.append('')
 				elif num > 0 and num !=253:
 					val = f.read(num)

 					packed = ord(struct.unpack('c', val[0])[0])
 					#binary values
 					if  int_list and num == 3 and currentfld in int_list:					
 							array.append(struct.unpack('>bh', val)[0])
 					elif int_list and num == 2 and currentfld in int_list:					
 							array.append(struct.unpack('>h', val)[0])
 					elif int_list and num == 1 and currentfld in int_list:					
 							array.append(struct.unpack('>b', val)[0])
 					#packed decimal-1	
 					elif (packed >= 128 and packed <= 137):
 						encvalue = ''
 						for x in range(1, val.__len__()):
 							encvalue += struct.unpack('c', val[x])[0].encode('hex')
 						encvalue = Decimal(encvalue.replace('f',''))
 						for x in range(128, packed):
 							encvalue = encvalue/10
 						array.append(encvalue)
 					#packed decimal-2 signed
 					elif packedlist and currentfld in packedlist:
 						encvalue = ''
 						for x in range(1, val.__len__()):
 							encvalue += struct.unpack('c', val[x])[0].encode('hex')
 						encvalue = Decimal(encvalue.replace('f',''))
 						for x in range(0, packed):
 							encvalue = encvalue/10
 						encvalue = encvalue*-1
 						array.append(encvalue)
 					else:
 						encvalue = ''
 						for x in range(0, val.__len__()):
 							num = struct.unpack('>b',val[x])[0]
 							if num == -1:
 								pass
 							elif num < 32:
 								encvalue += str(num)
 							else:
 								encvalue += val[x]
 						array.append(encvalue)
 				else:
 					array.append('')
 				currentfld += 1
 			writer.writerow(array)
 			
 			f.read(12) #stuff at end of line, I don't know what it is for
 			
 		print 'finished exporting %d rows' % lineno
 		print 'done'

 		
 def main(argv):
 	inputfile = None
 	header = None
 	intlist = None
 	outputfile = None
 	packedlist = None
 	show_field_no = None
 	try:
 		opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="])
 	except getopt.GetoptError:
 		print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>'
 		sys.exit(2)
 	for opt, arg in opts:
 		if opt == '-h':
 			header = arg.split(',')		
 		elif opt in ("--debug"):
 			show_field_no = 1
 		elif opt in ("-i", "--ifile"):
 			inputfile = arg
 		elif opt in ("-o", "--ofile"):
 			outputfile = arg
 		elif opt in ("-b","--binary"):
 			intlist = map(int,arg.split(','))
 		elif opt in ("-p","--packed"):
 			packedlist = map(int,arg.split(','))
 	if inputfile == None or outputfile == None :
 		print ' -i <inputfile> -o <outputfile> required'
 		sys.exit()
 	processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no)
 	


 if __name__ == "__main__":
 	main(sys.argv[1:])
	#!/usr/bin/python

	#Example usage
	#python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep
	# -i input file is a binary dump file from Progress Software database
	# -o output file
	# -b columns that contain binary integers
	# -p columns that contain packed decimals
	# -h column headers (first row)
	# --debug (make first row contain column number instead of column header

	import struct
	from decimal import Decimal
	import csv
	import sys,getopt

	def unpack(val):
	fld = struct.unpack('B',val)[0]
	return int(fld)

	def getSize(fileobject):
	fileobject.seek(0,2) # move the cursor to the end of the file
	size = fileobject.tell()
	return size

	def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no):
	with open(inputfile, 'rb') as f:
	w = open(outputfile, 'wb')
	writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC

	print 'Input file is ', inputfile
	print 'Output file is ', outputfile

	filesize = getSize(f)
	print 'filesize is %dkb' % filesize

	f.seek(312) #field sepearater
	sep = f.read(1)
	f.seek(320)
	fldcount = unpack(f.read(1))
	print 'field count is %d' % fldcount

	f.seek(636) #jump to first record
	while f.read(1) != sep and f.tell() < filesize:
	pass


	writer.writerow(header)
	#create header based on field number
	if show_field_no:
	currentfld=1
	header=[]
	while currentfld < fldcount:
	header.append("%d" %(currentfld))
	currentfld+=1
	writer.writerow(header)


	lineno = 0
	#process file
	while f.tell() < filesize:
	lineno +=1
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	if num == 231:
	val =f.read(1)
	fld1 = struct.unpack('B',val)[0]
	val =f.read(1)
	fld2 = struct.unpack('B',val)[0]
	num = 256 * fld1 + fld2
	f.read(num)
	if num != 1:
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	val = f.read(num)
	if val != sep:
	print 'invalid start character found on line number %d ...attempting to recover' % lineno
	f.seek(f.tell()-num-12)
	while int(struct.unpack('B',f.read(1))[0]) != 231 and f.tell() < filesize:
	pass
	f.seek(f.tell()-1)
	continue

	array=[]
	currentfld=1
	while currentfld < fldcount and f.tell() < filesize:
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	#char string
	if num == 250:
	val =f.read(2)
	charlen = struct.unpack('>h',val)[0]
	charcount = struct.unpack('>b',f.read(1))[0]
	if charlen > 0:
	encvalue = ''
	for x in range(1, charlen):
	val = f.read(1)
	num = struct.unpack('>b',val)[0]
	if num == -1:
	pass
	elif num < 32:
	encvalue += str(num)
	else:
	encvalue += val
	array.append(encvalue)
	else :
	array.append('')
	elif num == 230:
	val =f.read(2)
	charlen = struct.unpack('>h',val)[0]
	if charlen > 0:
	array.append(f.read(charlen))
	else :
	array.append('')
	elif num > 0 and num !=253:
	val = f.read(num)

	packed = ord(struct.unpack('c', val[0])[0])
	#binary values
	if int_list and num == 3 and currentfld in int_list:
	array.append(struct.unpack('>bh', val)[0])
	elif int_list and num == 2 and currentfld in int_list:
	array.append(struct.unpack('>h', val)[0])
	elif int_list and num == 1 and currentfld in int_list:
	array.append(struct.unpack('>b', val)[0])
	#packed decimal-1
	elif (packed >= 128 and packed <= 137):
	encvalue = ''
	for x in range(1, val.__len__()):
	encvalue += struct.unpack('c', val[x])[0].encode('hex')
	encvalue = Decimal(encvalue.replace('f',''))
	for x in range(128, packed):
	encvalue = encvalue/10
	array.append(encvalue)
	#packed decimal-2 signed
	elif packedlist and currentfld in packedlist:
	encvalue = ''
	for x in range(1, val.__len__()):
	encvalue += struct.unpack('c', val[x])[0].encode('hex')
	encvalue = Decimal(encvalue.replace('f',''))
	for x in range(0, packed):
	encvalue = encvalue/10
	encvalue = encvalue*-1
	array.append(encvalue)
	else:
	encvalue = ''
	for x in range(0, val.__len__()):
	num = struct.unpack('>b',val[x])[0]
	if num == -1:
	pass
	elif num < 32:
	encvalue += str(num)
	else:
	encvalue += val[x]
	array.append(encvalue)
	else:
	array.append('')
	currentfld += 1
	writer.writerow(array)

	f.read(12) #stuff at end of line, I don't know what it is for

	print 'finished exporting %d rows' % lineno
	print 'done'


	def main(argv):
	inputfile = None
	header = None
	intlist = None
	outputfile = None
	packedlist = None
	show_field_no = None
	try:
	opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="])
	except getopt.GetoptError:
	print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>'
	sys.exit(2)
	for opt, arg in opts:
	if opt == '-h':
	header = arg.split(',')
	elif opt in ("--debug"):
	show_field_no = 1
	elif opt in ("-i", "--ifile"):
	inputfile = arg
	elif opt in ("-o", "--ofile"):
	outputfile = arg
	elif opt in ("-b","--binary"):
	intlist = map(int,arg.split(','))
	elif opt in ("-p","--packed"):
	packedlist = map(int,arg.split(','))
	if inputfile == None or outputfile == None :
	print ' -i <inputfile> -o <outputfile> required'
	sys.exit()
	processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no)



	if __name__ == "__main__":
	main(sys.argv[1:])
No results found