jrleeman · March 21, 2016 20:24
diff --git a/datastamp.py b/datastamp.py
 import numpy as np
 import datetime
 import matplotlib.pyplot as plt
 import math
 import scipy
 import argparse


 def mean_downsample(a, args.r):
    pad_size = math.ceil(float(a.size)/args.r)*args.r - a.size
    a_padded = np.append(a, np.zeros(pad_size)*np.NaN)
    return scipy.nanmean(a_padded.reshape(-1, args.r), axis=1)

 #
 # Set parameters and parse arguments
 #

 desc_str = 'Interpolate, downsample, and timetamp a file with relative\
            time stamps. Relative time should be in milliseconds. See\
            help for more information and usage instructions.'

 ex_str = 'Example: python datastamp.py -t0 2016-02-12T01:10:00 -f LOG00122.TXT\
          -d , -r 30 -i 2000 -u ms -tc 0 -o 31000'

 parser = argparse.ArgumentParser(description=desc_str,
                                 epilog='Example: ')

 parser.add_argument('-f', required=True, type=str, help='Filename to operate on')
 parser.add_argument('-r', required=True, type=int, help='Downsample factor')
 parser.add_argument('-d', required=True, type=str, help='Column delimiter')
 parser.add_argument('-i', required=True, type=float, help='Interpolation interval in milliseconds')
 parser.add_argument('-t0', required=True, type=str, help='Start time YYYY-MM-DDTHH:MM:SS')
 parser.add_argument('-tc', required=True, type=int, help='Time column, zero indexed')
 parser.add_argument('-o', required=True, type=float, help='Time offset in milliseconds')

 args = parser.parse_args()
 args.t0 = datetime.datetime.strptime(args.t0, "%Y-%m-%dT%H:%M:%S")

 print "Loading data..."


 def load_data(fname, tcol, delimiter, zerotime=True, printstats=False):

    # Load the data
    data = np.loadtxt(fname, delimiter=delimiter)

    # If requested, zero the time column
    if zerotime:
        data[:, args.tc] = data[:, tcol] - data[0, tcol]

    if printstats:
        n_data = np.shape(data)[0]
        file_dt = np.ediff1d(data[:, tcol])
        print "\n"
        print "-"*70
        print "Raw File"
        print "-"*70
        print "nrows: ", np.shape(data)[0]
        print "ncols: ", np.shape(data)[1]
        print "dt max: ", np.max(file_dt)
        print "dt min: ", np.min(file_dt)
        print "dt mean: ", np.mean(file_dt)
    return data

 # Load up the data
 data = load_data(args.f, args.tc, args.d, printstats=True)

 #
 # Interpolation
 #

 # Calculate how many points will be in the interpolated dataset
 npts_interpolated = int(data[-1,args.tc]/args.i)

 # Create array of times at which interpolation will be done
 interpolated_times = np.arange(0,npts_interpolated) * args.i

 # Create empty array to hold interpolated data
 interpolated_data = np.zeros((len(interpolated_times), np.shape(data)[1]))

 # Interpolate each column
 for j in range(np.shape(data)[1]):
    interpolated_data[:,j] = np.interp(interpolated_times, data[:,args.tc], data[:,j])

 # Print stats of interpolated data
 print "\n"
 print "-"*70
 print "Interpolated"
 print "-"*70
 print "nrows: ", np.shape(interpolated_times)[0]

 #
 # Downsampling of interpolated data
 #

 # Calculate any padding needed to get us to a number of elements divisible
 # by the downsampling factor so we can average effectively
 pad_size = math.ceil(float(npts_interpolated)/args.r)*args.r - npts_interpolated

 # Calculate the final numbers of rows and columns, create an array
 final_rows = int(np.shape(interpolated_data)[0]/args.r + pad_size)
 final_cols = np.shape(interpolated_data)[1]
 interpolated_downsampled_data = np.zeros((final_rows, final_cols))

 # Downsample each column
 for i in range(final_cols):
    interpolated_downsampled_data[:,i] = mean_downsample(interpolated_data[:,i], args.r)

 # Apply any offset to the relative time column
 interpolated_downsampled_data[:,args.tc] = interpolated_downsampled_data[:,args.tc] + args.o

 # Convert relative time stamps to datetime objects
 relative_time_stamps = [datetime.timedelta(milliseconds=i) for i in interpolated_downsampled_data[:, args.tc]]

 # Make absolute time stamps by creating array of t0 stamp objects, then adding
 # the time deltas from relative times.
 absolute_time_stamps = np.array([args.t0 for i in xrange(len(interpolated_downsampled_data[:, args.tc]))])
 absolute_time_stamps = absolute_time_stamps + relative_time_stamps

 # Print stats of downsampled data
 print "\n"
 print "-"*70
 print "Downsampled"
 print "-"*70
 print "nrows: ", final_rows

 #
 # Write output file
 #

 print "Writing output file"

 f = open('output.txt', 'w')

 # Write each row
 for i in range(final_rows):
    # First write a formatted date-time string
    f.write(datetime.datetime.strftime(absolute_time_stamps[i], "%Y-%m-%dT%H:%M:%S"))
    f.write(args.d)

    # write each column of data
    for j in range(final_cols):
        f.write(str(interpolated_downsampled_data[i,j]))

        # Newline if last column
        if j == final_cols - 1:
            f.write("\n")
        else:
            f.write(args.d)
 f.close()
	import numpy as np
	import datetime
	import matplotlib.pyplot as plt
	import math
	import scipy
	import argparse


	def mean_downsample(a, args.r):
	pad_size = math.ceil(float(a.size)/args.r)*args.r - a.size
	a_padded = np.append(a, np.zeros(pad_size)*np.NaN)
	return scipy.nanmean(a_padded.reshape(-1, args.r), axis=1)

	#
	# Set parameters and parse arguments
	#

	desc_str = 'Interpolate, downsample, and timetamp a file with relative\
	time stamps. Relative time should be in milliseconds. See\
	help for more information and usage instructions.'

	ex_str = 'Example: python datastamp.py -t0 2016-02-12T01:10:00 -f LOG00122.TXT\
	-d , -r 30 -i 2000 -u ms -tc 0 -o 31000'

	parser = argparse.ArgumentParser(description=desc_str,
	epilog='Example: ')

	parser.add_argument('-f', required=True, type=str, help='Filename to operate on')
	parser.add_argument('-r', required=True, type=int, help='Downsample factor')
	parser.add_argument('-d', required=True, type=str, help='Column delimiter')
	parser.add_argument('-i', required=True, type=float, help='Interpolation interval in milliseconds')
	parser.add_argument('-t0', required=True, type=str, help='Start time YYYY-MM-DDTHH:MM:SS')
	parser.add_argument('-tc', required=True, type=int, help='Time column, zero indexed')
	parser.add_argument('-o', required=True, type=float, help='Time offset in milliseconds')

	args = parser.parse_args()
	args.t0 = datetime.datetime.strptime(args.t0, "%Y-%m-%dT%H:%M:%S")

	print "Loading data..."


	def load_data(fname, tcol, delimiter, zerotime=True, printstats=False):

	# Load the data
	data = np.loadtxt(fname, delimiter=delimiter)

	# If requested, zero the time column
	if zerotime:
	data[:, args.tc] = data[:, tcol] - data[0, tcol]

	if printstats:
	n_data = np.shape(data)[0]
	file_dt = np.ediff1d(data[:, tcol])
	print "\n"
	print "-"*70
	print "Raw File"
	print "-"*70
	print "nrows: ", np.shape(data)[0]
	print "ncols: ", np.shape(data)[1]
	print "dt max: ", np.max(file_dt)
	print "dt min: ", np.min(file_dt)
	print "dt mean: ", np.mean(file_dt)
	return data

	# Load up the data
	data = load_data(args.f, args.tc, args.d, printstats=True)

	#
	# Interpolation
	#

	# Calculate how many points will be in the interpolated dataset
	npts_interpolated = int(data[-1,args.tc]/args.i)

	# Create array of times at which interpolation will be done
	interpolated_times = np.arange(0,npts_interpolated) * args.i

	# Create empty array to hold interpolated data
	interpolated_data = np.zeros((len(interpolated_times), np.shape(data)[1]))

	# Interpolate each column
	for j in range(np.shape(data)[1]):
	interpolated_data[:,j] = np.interp(interpolated_times, data[:,args.tc], data[:,j])

	# Print stats of interpolated data
	print "\n"
	print "-"*70
	print "Interpolated"
	print "-"*70
	print "nrows: ", np.shape(interpolated_times)[0]

	#
	# Downsampling of interpolated data
	#

	# Calculate any padding needed to get us to a number of elements divisible
	# by the downsampling factor so we can average effectively
	pad_size = math.ceil(float(npts_interpolated)/args.r)*args.r - npts_interpolated

	# Calculate the final numbers of rows and columns, create an array
	final_rows = int(np.shape(interpolated_data)[0]/args.r + pad_size)
	final_cols = np.shape(interpolated_data)[1]
	interpolated_downsampled_data = np.zeros((final_rows, final_cols))

	# Downsample each column
	for i in range(final_cols):
	interpolated_downsampled_data[:,i] = mean_downsample(interpolated_data[:,i], args.r)

	# Apply any offset to the relative time column
	interpolated_downsampled_data[:,args.tc] = interpolated_downsampled_data[:,args.tc] + args.o

	# Convert relative time stamps to datetime objects
	relative_time_stamps = [datetime.timedelta(milliseconds=i) for i in interpolated_downsampled_data[:, args.tc]]

	# Make absolute time stamps by creating array of t0 stamp objects, then adding
	# the time deltas from relative times.
	absolute_time_stamps = np.array([args.t0 for i in xrange(len(interpolated_downsampled_data[:, args.tc]))])
	absolute_time_stamps = absolute_time_stamps + relative_time_stamps

	# Print stats of downsampled data
	print "\n"
	print "-"*70
	print "Downsampled"
	print "-"*70
	print "nrows: ", final_rows

	#
	# Write output file
	#

	print "Writing output file"

	f = open('output.txt', 'w')

	# Write each row
	for i in range(final_rows):
	# First write a formatted date-time string
	f.write(datetime.datetime.strftime(absolute_time_stamps[i], "%Y-%m-%dT%H:%M:%S"))
	f.write(args.d)

	# write each column of data
	for j in range(final_cols):
	f.write(str(interpolated_downsampled_data[i,j]))

	# Newline if last column
	if j == final_cols - 1:
	f.write("\n")
	else:
	f.write(args.d)
	f.close()