alpiepho · September 29, 2017 20:14
diff --git a/expandLogData.py b/expandLogData.py
 #!/usr/bin/python

 from datetime import datetime
 import getopt
 import os
 import sys

 TD_HEADER     = "<td isi-data-column-header"
 TR_DATA       = "<tr class=\"data\" isi-data-row"
 TR_DATAMARKED = "<tr class=\"data-marked\" isi-data-row"
 TD_DATE       = "<td class=\"dateTime\">"
 TD_OPENTAG    = "<td>"
 TD_CLOSETAG   = "</td>"
 TR_CLOSETAG   = "</tr>"

 def Usage():
    print("Usage: %s -i <file> -o <file> -c <num> -r <num> -m <num>" % sys.argv[0])
    print("  -i <file>      Input     HTM file")
    print("  -o <file>      Output    HTM file")
    print("  -c <num>       Duplicate columns N times")
    print("  -r <num>       Duplicate rows    N times")
    print("  -m <num>       max rows  (total will be min of this number and original row count)")

 def DataRowToDate(line):
    dateStartIndex = line.find(TD_DATE) + len(TD_DATE)
    dateEndIndex   = line.find(TD_CLOSETAG, dateStartIndex)
    dateStr        = line[dateStartIndex:dateEndIndex]
    try:
        dto = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S.%f')
    except:
        dto = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S')
    return dto

 def DataRowReplaceDate(line, dto):
    dateStartIndex = line.find(TD_DATE) + len(TD_DATE)
    dateEndIndex   = line.find(TD_CLOSETAG, dateStartIndex)

    try:
        temp = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S.%f')
        useMs = True
    except:
        useMs = False

    newLine = line[:dateStartIndex]
    if useMs:
        newLine += dto.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
    else:
        newLine += dto.strftime('%Y-%m-%d %H:%M:%S')
    newLine += line[dateEndIndex:-1]
    newLine += "\n"
    return newLine


 inFilename  = ''
 outFilename = ''
 dupColumns  = 1
 dupRows     = 1
 dupRowMax   = 100000000000
 try:
    # process command arguments
    ouropts, args = getopt.getopt(sys.argv[1:],"i:o:c:r:m:h")
    for o, a in ouropts:
        if   o == '-i':
            inFilename = a
        elif o == '-o':
            outFilename = a
        elif o == '-c':
            dupColumns  = int(a)
        elif o == '-r':
            dupRows     = int(a)
        elif o == '-m':
            dupRowMax   = int(a)
        elif o == '-h':
            Usage()
            sys.exit(0)
 except getopt.GetoptError as e:
    print(str(e))
    Usage()
    sys.exit(2)
 
 if type(inFilename) != str or len(inFilename) <= 0:
    print("please use -i for input HTM log file")
    Usage()
    sys.exit(0)
 if type(outFilename) != str or len(outFilename) <= 0:
    print("please use -o for output HTM log file")
    Usage()
    sys.exit(0)

 actualColumns = 0
 actualRows    = 0

 with open(outFilename, 'wb') as outFp:
    with open(inFilename, 'rb') as inFp:
        contents = inFp.readlines()
        total = len(contents)
        newContents = []

        # adjust all data columns first
        row = 0
        while row < total:
            # for duplicate data columns (log has 1 tr per full data row, with multiple tds)
            if contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0:
                line = contents[row]
                rangeDataStart = line.find(TD_OPENTAG)
                rangeDataEnd   = line.rfind(TR_CLOSETAG)
                newLine = line[:rangeDataEnd]
                for i in range(1, dupColumns):
                    newLine += line[rangeDataStart:rangeDataEnd]
                newLine += TR_CLOSETAG + "\n"
                contents[row] = newLine
            row += 1


        # adjust header columns and data rows in the same loop
        rangeHeaderStart = -1
        rangeHeaderEnd   = -1
        rangeDataStart = -1
        rangeDataEnd   = -1
        row = 0
        while row < total:
            # for duplicate header columns (log has 1 td/column per row)
            # start header td range 
            if contents[row].find(TD_HEADER) >= 0 and rangeHeaderStart == -1:
                rangeHeaderStart = row + 1 # skip Date/Time
            # track header td range end
            if contents[row].find(TD_HEADER) >= 0 and rangeHeaderStart != -1:
                rangeHeaderEnd = row
            # header range done, duplicate header columns
            if rangeHeaderStart != -1 and contents[row].find(TD_HEADER) == -1:
                for i in range(1, dupColumns):
                    for j in range(rangeHeaderStart, rangeHeaderEnd+1):
                        newContents.append(contents[j])
                actualColumns = dupColumns * (rangeHeaderEnd - rangeHeaderStart) + 1
                rangeHeaderStart = -1
                rangeHeaderEnd   = -1

            if (contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0) and rangeDataStart == -1:
                rangeDataStart = row
            # track data td range end
            if (contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0) and rangeDataStart != -1:
                rangeDataEnd = row

            # data data done, duplicate data rows
            # start data tr range 
            if rangeDataStart != -1 and contents[row].find(TR_DATA) == -1 and contents[row].find(TR_DATAMARKED) == -1:
                # get date range delta
                dto1 = DataRowToDate(contents[rangeDataStart])
                dto2 = DataRowToDate(contents[rangeDataStart+1])
                dto3 = DataRowToDate(contents[rangeDataEnd])
                rangeDelta = (dto3 - dto1) + (dto2 - dto1)
                totalRows = rangeDataEnd - rangeDataStart + 1
              
                for i in range(1, dupRows):
                    for j in range(rangeDataStart, rangeDataEnd+1):
                        line = contents[j]
                        # adjust date/time for repeated row sets
                        dto = DataRowToDate(line)
                        dto += i * rangeDelta
                        line = DataRowReplaceDate(line, dto)
                        # add the duplicated row
                        newContents.append(line)

                        totalRows += 1
                        if totalRows > dupRowMax:
                            break
                    if totalRows > dupRowMax:
                        break
                actualRows = totalRows
                rangeDataStart = -1
                rangeDataEnd   = -1

            newContents.append(contents[row])

            row += 1

        outFp.write(''.join(newContents))

        print "actual columns: {}".format(actualColumns)
        print "actual rows:    {}".format(actualRows)
        print "\n(hint: current max rows: {} -> row dup num {}\n)".format(dupRowMax,  dupRowMax/actualRows)
	#!/usr/bin/python

	from datetime import datetime
	import getopt
	import os
	import sys

	TD_HEADER = "<td isi-data-column-header"
	TR_DATA = "<tr class=\"data\" isi-data-row"
	TR_DATAMARKED = "<tr class=\"data-marked\" isi-data-row"
	TD_DATE = "<td class=\"dateTime\">"
	TD_OPENTAG = "<td>"
	TD_CLOSETAG = "</td>"
	TR_CLOSETAG = "</tr>"

	def Usage():
	print("Usage: %s -i <file> -o <file> -c <num> -r <num> -m <num>" % sys.argv[0])
	print(" -i <file> Input HTM file")
	print(" -o <file> Output HTM file")
	print(" -c <num> Duplicate columns N times")
	print(" -r <num> Duplicate rows N times")
	print(" -m <num> max rows (total will be min of this number and original row count)")

	def DataRowToDate(line):
	dateStartIndex = line.find(TD_DATE) + len(TD_DATE)
	dateEndIndex = line.find(TD_CLOSETAG, dateStartIndex)
	dateStr = line[dateStartIndex:dateEndIndex]
	try:
	dto = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S.%f')
	except:
	dto = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S')
	return dto

	def DataRowReplaceDate(line, dto):
	dateStartIndex = line.find(TD_DATE) + len(TD_DATE)
	dateEndIndex = line.find(TD_CLOSETAG, dateStartIndex)

	try:
	temp = datetime.strptime(dateStr, '%Y-%m-%d %H:%M:%S.%f')
	useMs = True
	except:
	useMs = False

	newLine = line[:dateStartIndex]
	if useMs:
	newLine += dto.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
	else:
	newLine += dto.strftime('%Y-%m-%d %H:%M:%S')
	newLine += line[dateEndIndex:-1]
	newLine += "\n"
	return newLine


	inFilename = ''
	outFilename = ''
	dupColumns = 1
	dupRows = 1
	dupRowMax = 100000000000
	try:
	# process command arguments
	ouropts, args = getopt.getopt(sys.argv[1:],"i:o:c:r:m:h")
	for o, a in ouropts:
	if o == '-i':
	inFilename = a
	elif o == '-o':
	outFilename = a
	elif o == '-c':
	dupColumns = int(a)
	elif o == '-r':
	dupRows = int(a)
	elif o == '-m':
	dupRowMax = int(a)
	elif o == '-h':
	Usage()
	sys.exit(0)
	except getopt.GetoptError as e:
	print(str(e))
	Usage()
	sys.exit(2)

	if type(inFilename) != str or len(inFilename) <= 0:
	print("please use -i for input HTM log file")
	Usage()
	sys.exit(0)
	if type(outFilename) != str or len(outFilename) <= 0:
	print("please use -o for output HTM log file")
	Usage()
	sys.exit(0)

	actualColumns = 0
	actualRows = 0

	with open(outFilename, 'wb') as outFp:
	with open(inFilename, 'rb') as inFp:
	contents = inFp.readlines()
	total = len(contents)
	newContents = []

	# adjust all data columns first
	row = 0
	while row < total:
	# for duplicate data columns (log has 1 tr per full data row, with multiple tds)
	if contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0:
	line = contents[row]
	rangeDataStart = line.find(TD_OPENTAG)
	rangeDataEnd = line.rfind(TR_CLOSETAG)
	newLine = line[:rangeDataEnd]
	for i in range(1, dupColumns):
	newLine += line[rangeDataStart:rangeDataEnd]
	newLine += TR_CLOSETAG + "\n"
	contents[row] = newLine
	row += 1


	# adjust header columns and data rows in the same loop
	rangeHeaderStart = -1
	rangeHeaderEnd = -1
	rangeDataStart = -1
	rangeDataEnd = -1
	row = 0
	while row < total:
	# for duplicate header columns (log has 1 td/column per row)
	# start header td range
	if contents[row].find(TD_HEADER) >= 0 and rangeHeaderStart == -1:
	rangeHeaderStart = row + 1 # skip Date/Time
	# track header td range end
	if contents[row].find(TD_HEADER) >= 0 and rangeHeaderStart != -1:
	rangeHeaderEnd = row
	# header range done, duplicate header columns
	if rangeHeaderStart != -1 and contents[row].find(TD_HEADER) == -1:
	for i in range(1, dupColumns):
	for j in range(rangeHeaderStart, rangeHeaderEnd+1):
	newContents.append(contents[j])
	actualColumns = dupColumns * (rangeHeaderEnd - rangeHeaderStart) + 1
	rangeHeaderStart = -1
	rangeHeaderEnd = -1

	if (contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0) and rangeDataStart == -1:
	rangeDataStart = row
	# track data td range end
	if (contents[row].find(TR_DATA) >= 0 or contents[row].find(TR_DATAMARKED) >= 0) and rangeDataStart != -1:
	rangeDataEnd = row

	# data data done, duplicate data rows
	# start data tr range
	if rangeDataStart != -1 and contents[row].find(TR_DATA) == -1 and contents[row].find(TR_DATAMARKED) == -1:
	# get date range delta
	dto1 = DataRowToDate(contents[rangeDataStart])
	dto2 = DataRowToDate(contents[rangeDataStart+1])
	dto3 = DataRowToDate(contents[rangeDataEnd])
	rangeDelta = (dto3 - dto1) + (dto2 - dto1)
	totalRows = rangeDataEnd - rangeDataStart + 1

	for i in range(1, dupRows):
	for j in range(rangeDataStart, rangeDataEnd+1):
	line = contents[j]
	# adjust date/time for repeated row sets
	dto = DataRowToDate(line)
	dto += i * rangeDelta
	line = DataRowReplaceDate(line, dto)
	# add the duplicated row
	newContents.append(line)

	totalRows += 1
	if totalRows > dupRowMax:
	break
	if totalRows > dupRowMax:
	break
	actualRows = totalRows
	rangeDataStart = -1
	rangeDataEnd = -1

	newContents.append(contents[row])

	row += 1

	outFp.write(''.join(newContents))

	print "actual columns: {}".format(actualColumns)
	print "actual rows: {}".format(actualRows)
	print "\n(hint: current max rows: {} -> row dup num {}\n)".format(dupRowMax, dupRowMax/actualRows)