Created
September 27, 2017 17:05
-
-
Save alpiepho/35e7c9b0b4eab67b2790ebbf31bf33c5 to your computer and use it in GitHub Desktop.
Python - column and row duplication for HTM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# NOTE: my first significant python script - AJP | |
import codecs | |
import csv | |
from datetime import datetime | |
import getopt | |
import os | |
import sys | |
def processTemplateStart(htmFp, tplFilename): | |
with open(tplFilename, 'rb') as f: | |
content = f.readlines() | |
for line in content: | |
if line.find("class=\"dataHeader") >= 0: | |
break | |
else: | |
htmFp.write(line) | |
def processTemplateEnd(htmFp, tplFilename): | |
with open(tplFilename, 'rb') as f: | |
content = f.readlines() | |
dataStart = False | |
dataEnd = False | |
for line in content: | |
if line.find("class=\"dataHeader") >= 0: | |
dataStart = True | |
elif dataStart and line.find("class=\"sectionHeader") >= 0: | |
dataEnd = True | |
if dataEnd: | |
htmFp.write(line) | |
def processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset): | |
lastDate = '' | |
delim = '\t' if delimEnum == 1 else ',' | |
reader = csv.reader(csvFp, delimiter=delim) | |
dataStarted = False | |
for row in reader: | |
if len(row) > 1: | |
# duplicate columns | |
columns = dupColumns | |
dupRow = row[1:] | |
while columns > 1: | |
row = row + dupRow | |
columns -= 1 | |
line = ''.join(row) | |
if line.find("Date Time") >= 0: | |
# use flag to avoid duplicate sets of data | |
if dataStarted: | |
break | |
dataStarted = True | |
# count for padding out empty "Marked" column | |
totalColumns = len(row) | |
# duplicate rows loop calls this function, only show header lines once | |
if rows == 0: | |
htmFp.write("<tr class=\"dataHeader\">\n") | |
for el in row: | |
if el.find("Date Time") >= 0: | |
htmFp.write("<td isi-data-column-header=\"datetime\">Date Time</td>\n") | |
elif el.find("Marked") >= 0: | |
htmFp.write("<td isi-data-column-header=\"marked\" isi-marked>Marked</td>\n") | |
else: | |
htmFp.write("<td isi-data-column-header=\"parameter\" isi-device-id=\"465769\" isi-sensor-id=\"465769\" isi-sensor-type=\"2\" isi-parameter-type=\"2\" isi-unit-type=\"2\">" + el + "</td>\n") | |
htmFp.write("</tr>\n") | |
else: | |
isiMarked = "" | |
if line.find("Marked") >= 0: | |
isiMarked = " isi-marked" | |
htmFp.write("<tr class=\"data\">\n") | |
currentColumns = 0 | |
for el in row: | |
currentColumns += 1 | |
if currentColumns == 1: | |
if dateOffset >= 1: | |
dto = datetime.strptime(el, '%m/%d/%y %H:%M:%S.%f') | |
# TODO finish this | |
#dto = dto + datetime.timedelta(seconds=dateOffset) | |
dts = dto.strftime('%-m/%-d/%y %H:%M:%S.%f')[:-3] | |
el = dts | |
htmFp.write("<td isi-data-column isi-timestamp=\"123456\"" + isiMarked + " class=\"date\">" + el + "</td>\n") | |
else: | |
htmFp.write("<td isi-data-column" + isiMarked + ">" + el + "</td>\n") | |
if currentColumns < totalColumns: | |
htmFp.write("<td />\n") | |
htmFp.write("</tr>\n") | |
lastDate = row[0:1] | |
if rows+1 == dupRows: | |
htmFp.write("<tr>\n") | |
htmFp.write(" <td />\n") | |
htmFp.write("</tr>\n") | |
return lastDate | |
def Usage(): | |
print("Usage: %s -i <file> -o <file> -t <file> -c <num> -r <num>" % sys.argv[0]) | |
print(" -i <file> Input CSV file (use utf16toutf8.pv if needed)") | |
print(" -o <file> Output HTM file") | |
print(" -t <file> Template HTM file, data will be replaced with CVS data") | |
print(" -d 0|1 (optional) 0=','(default) 1='\t'") | |
print(" -c <num> (optional) Duplicate columns N times") | |
print(" -r <num> (optional) Duplicate rows N times") | |
print("TODO: adjust time for -r") | |
print("TODO: parse CSV meta data instead of fixed values from template) | |
# TODO parse CSV meta data instead of fixed values from template | |
csvFilename = '' | |
htmFilename = '' | |
tplFilename = '' | |
dupColumns = 1 | |
dupRows = 1 | |
delimEnum = 0 | |
try: | |
# process command arguments | |
ouropts, args = getopt.getopt(sys.argv[1:],"i:o:t:d:c:r:h") | |
for o, a in ouropts: | |
if o == '-i': | |
csvFilename = a | |
elif o == '-o': | |
htmFilename = a | |
elif o == '-t': | |
tplFilename = a | |
elif o == '-d': | |
delimEnum = int(a) | |
elif o == '-c': | |
dupColumns = int(a) | |
elif o == '-r': | |
dupRows = int(a) | |
elif o == '-h': | |
Usage() | |
sys.exit(0) | |
except getopt.GetoptError as e: | |
print(str(e)) | |
Usage() | |
sys.exit(2) | |
if type(csvFilename) != str or len(csvFilename) <= 0: | |
print("please use -i for input CSV file") | |
Usage() | |
sys.exit(0) | |
if type(htmFilename) != str or len(htmFilename) <= 0: | |
print("please use -o for output HTM file") | |
Usage() | |
sys.exit(0) | |
if type(tplFilename) != str or len(tplFilename) <= 0: | |
print("please use -t for template HTM file") | |
Usage() | |
sys.exit(0) | |
with open(htmFilename, 'wb') as htmFp: | |
with open(csvFilename, 'rb') as csvFp: | |
processTemplateStart(htmFp, tplFilename) | |
# handle duplicate rows (TODO: should adjust date) | |
rows = 0 | |
dateOffset = 0 | |
while rows < dupRows: | |
lastDate = processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset) | |
dateOffset += 10000 | |
csvFp.seek(0) | |
rows += 1 | |
processTemplateEnd(htmFp, tplFilename) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment