Last active
August 29, 2015 14:07
-
-
Save bfocht/3382e97476f4f2001ea3 to your computer and use it in GitHub Desktop.
Script that will extract a ProgressDB binary dump file and convert it to a comma separate file. Tested on v8.3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#Example usage | |
#python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep | |
# -i input file is a binary dump file from Progress Software database | |
# -o output file | |
# -b columns that contain binary integers | |
# -p columns that contain packed decimals | |
# -h column headers (first row) | |
# --debug (make first row contain column number instead of column header | |
import struct | |
from decimal import Decimal | |
import csv | |
import sys,getopt | |
def unpack(val): | |
fld = struct.unpack('B',val)[0] | |
return int(fld) | |
def getSize(fileobject): | |
fileobject.seek(0,2) # move the cursor to the end of the file | |
size = fileobject.tell() | |
return size | |
def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no): | |
with open(inputfile, 'rb') as f: | |
w = open(outputfile, 'wb') | |
writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC | |
print 'Input file is ', inputfile | |
print 'Output file is ', outputfile | |
filesize = getSize(f) | |
print 'filesize is %dkb' % filesize | |
f.seek(312) #field sepearater | |
sep = f.read(1) | |
f.seek(320) | |
fldcount = unpack(f.read(1)) | |
print 'field count is %d' % fldcount | |
f.seek(636) #jump to first record | |
while f.read(1) != sep and f.tell() < filesize: | |
pass | |
writer.writerow(header) | |
#create header based on field number | |
if show_field_no: | |
currentfld=1 | |
header=[] | |
while currentfld < fldcount: | |
header.append("%d" %(currentfld)) | |
currentfld+=1 | |
writer.writerow(header) | |
lineno = 0 | |
#process file | |
while f.tell() < filesize: | |
lineno +=1 | |
val = f.read(1) | |
fld = struct.unpack('B',val)[0] | |
num = int(fld) | |
if num == 231: | |
val =f.read(1) | |
fld1 = struct.unpack('B',val)[0] | |
val =f.read(1) | |
fld2 = struct.unpack('B',val)[0] | |
num = 256 * fld1 + fld2 | |
f.read(num) | |
if num != 1: | |
val = f.read(1) | |
fld = struct.unpack('B',val)[0] | |
num = int(fld) | |
val = f.read(num) | |
if val != sep: | |
print 'invalid start character found on line number %d ...attempting to recover' % lineno | |
f.seek(f.tell()-num-12) | |
while int(struct.unpack('B',f.read(1))[0]) != 231 and f.tell() < filesize: | |
pass | |
f.seek(f.tell()-1) | |
continue | |
array=[] | |
currentfld=1 | |
while currentfld < fldcount and f.tell() < filesize: | |
val = f.read(1) | |
fld = struct.unpack('B',val)[0] | |
num = int(fld) | |
#char string | |
if num == 250: | |
val =f.read(2) | |
charlen = struct.unpack('>h',val)[0] | |
charcount = struct.unpack('>b',f.read(1))[0] | |
if charlen > 0: | |
encvalue = '' | |
for x in range(1, charlen): | |
val = f.read(1) | |
num = struct.unpack('>b',val)[0] | |
if num == -1: | |
pass | |
elif num < 32: | |
encvalue += str(num) | |
else: | |
encvalue += val | |
array.append(encvalue) | |
else : | |
array.append('') | |
elif num == 230: | |
val =f.read(2) | |
charlen = struct.unpack('>h',val)[0] | |
if charlen > 0: | |
array.append(f.read(charlen)) | |
else : | |
array.append('') | |
elif num > 0 and num !=253: | |
val = f.read(num) | |
packed = ord(struct.unpack('c', val[0])[0]) | |
#binary values | |
if int_list and num == 3 and currentfld in int_list: | |
array.append(struct.unpack('>bh', val)[0]) | |
elif int_list and num == 2 and currentfld in int_list: | |
array.append(struct.unpack('>h', val)[0]) | |
elif int_list and num == 1 and currentfld in int_list: | |
array.append(struct.unpack('>b', val)[0]) | |
#packed decimal-1 | |
elif (packed >= 128 and packed <= 137): | |
encvalue = '' | |
for x in range(1, val.__len__()): | |
encvalue += struct.unpack('c', val[x])[0].encode('hex') | |
encvalue = Decimal(encvalue.replace('f','')) | |
for x in range(128, packed): | |
encvalue = encvalue/10 | |
array.append(encvalue) | |
#packed decimal-2 signed | |
elif packedlist and currentfld in packedlist: | |
encvalue = '' | |
for x in range(1, val.__len__()): | |
encvalue += struct.unpack('c', val[x])[0].encode('hex') | |
encvalue = Decimal(encvalue.replace('f','')) | |
for x in range(0, packed): | |
encvalue = encvalue/10 | |
encvalue = encvalue*-1 | |
array.append(encvalue) | |
else: | |
encvalue = '' | |
for x in range(0, val.__len__()): | |
num = struct.unpack('>b',val[x])[0] | |
if num == -1: | |
pass | |
elif num < 32: | |
encvalue += str(num) | |
else: | |
encvalue += val[x] | |
array.append(encvalue) | |
else: | |
array.append('') | |
currentfld += 1 | |
writer.writerow(array) | |
f.read(12) #stuff at end of line, I don't know what it is for | |
print 'finished exporting %d rows' % lineno | |
print 'done' | |
def main(argv): | |
inputfile = None | |
header = None | |
intlist = None | |
outputfile = None | |
packedlist = None | |
show_field_no = None | |
try: | |
opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="]) | |
except getopt.GetoptError: | |
print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>' | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
header = arg.split(',') | |
elif opt in ("--debug"): | |
show_field_no = 1 | |
elif opt in ("-i", "--ifile"): | |
inputfile = arg | |
elif opt in ("-o", "--ofile"): | |
outputfile = arg | |
elif opt in ("-b","--binary"): | |
intlist = map(int,arg.split(',')) | |
elif opt in ("-p","--packed"): | |
packedlist = map(int,arg.split(',')) | |
if inputfile == None or outputfile == None : | |
print ' -i <inputfile> -o <outputfile> required' | |
sys.exit() | |
processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment