Skip to content

Instantly share code, notes, and snippets.

@jrleeman
Created March 21, 2016 20:24
Show Gist options
  • Save jrleeman/eda29f9f36d939276160 to your computer and use it in GitHub Desktop.
Save jrleeman/eda29f9f36d939276160 to your computer and use it in GitHub Desktop.
Takes data with relative time stamps, interpolates, down samples, and absolute time stamps for processing.
import numpy as np
import datetime
import matplotlib.pyplot as plt
import math
import scipy
import argparse
def mean_downsample(a, args.r):
pad_size = math.ceil(float(a.size)/args.r)*args.r - a.size
a_padded = np.append(a, np.zeros(pad_size)*np.NaN)
return scipy.nanmean(a_padded.reshape(-1, args.r), axis=1)
#
# Set parameters and parse arguments
#
desc_str = 'Interpolate, downsample, and timetamp a file with relative\
time stamps. Relative time should be in milliseconds. See\
help for more information and usage instructions.'
ex_str = 'Example: python datastamp.py -t0 2016-02-12T01:10:00 -f LOG00122.TXT\
-d , -r 30 -i 2000 -u ms -tc 0 -o 31000'
parser = argparse.ArgumentParser(description=desc_str,
epilog='Example: ')
parser.add_argument('-f', required=True, type=str, help='Filename to operate on')
parser.add_argument('-r', required=True, type=int, help='Downsample factor')
parser.add_argument('-d', required=True, type=str, help='Column delimiter')
parser.add_argument('-i', required=True, type=float, help='Interpolation interval in milliseconds')
parser.add_argument('-t0', required=True, type=str, help='Start time YYYY-MM-DDTHH:MM:SS')
parser.add_argument('-tc', required=True, type=int, help='Time column, zero indexed')
parser.add_argument('-o', required=True, type=float, help='Time offset in milliseconds')
args = parser.parse_args()
args.t0 = datetime.datetime.strptime(args.t0, "%Y-%m-%dT%H:%M:%S")
print "Loading data..."
def load_data(fname, tcol, delimiter, zerotime=True, printstats=False):
# Load the data
data = np.loadtxt(fname, delimiter=delimiter)
# If requested, zero the time column
if zerotime:
data[:, args.tc] = data[:, tcol] - data[0, tcol]
if printstats:
n_data = np.shape(data)[0]
file_dt = np.ediff1d(data[:, tcol])
print "\n"
print "-"*70
print "Raw File"
print "-"*70
print "nrows: ", np.shape(data)[0]
print "ncols: ", np.shape(data)[1]
print "dt max: ", np.max(file_dt)
print "dt min: ", np.min(file_dt)
print "dt mean: ", np.mean(file_dt)
return data
# Load up the data
data = load_data(args.f, args.tc, args.d, printstats=True)
#
# Interpolation
#
# Calculate how many points will be in the interpolated dataset
npts_interpolated = int(data[-1,args.tc]/args.i)
# Create array of times at which interpolation will be done
interpolated_times = np.arange(0,npts_interpolated) * args.i
# Create empty array to hold interpolated data
interpolated_data = np.zeros((len(interpolated_times), np.shape(data)[1]))
# Interpolate each column
for j in range(np.shape(data)[1]):
interpolated_data[:,j] = np.interp(interpolated_times, data[:,args.tc], data[:,j])
# Print stats of interpolated data
print "\n"
print "-"*70
print "Interpolated"
print "-"*70
print "nrows: ", np.shape(interpolated_times)[0]
#
# Downsampling of interpolated data
#
# Calculate any padding needed to get us to a number of elements divisible
# by the downsampling factor so we can average effectively
pad_size = math.ceil(float(npts_interpolated)/args.r)*args.r - npts_interpolated
# Calculate the final numbers of rows and columns, create an array
final_rows = int(np.shape(interpolated_data)[0]/args.r + pad_size)
final_cols = np.shape(interpolated_data)[1]
interpolated_downsampled_data = np.zeros((final_rows, final_cols))
# Downsample each column
for i in range(final_cols):
interpolated_downsampled_data[:,i] = mean_downsample(interpolated_data[:,i], args.r)
# Apply any offset to the relative time column
interpolated_downsampled_data[:,args.tc] = interpolated_downsampled_data[:,args.tc] + args.o
# Convert relative time stamps to datetime objects
relative_time_stamps = [datetime.timedelta(milliseconds=i) for i in interpolated_downsampled_data[:, args.tc]]
# Make absolute time stamps by creating array of t0 stamp objects, then adding
# the time deltas from relative times.
absolute_time_stamps = np.array([args.t0 for i in xrange(len(interpolated_downsampled_data[:, args.tc]))])
absolute_time_stamps = absolute_time_stamps + relative_time_stamps
# Print stats of downsampled data
print "\n"
print "-"*70
print "Downsampled"
print "-"*70
print "nrows: ", final_rows
#
# Write output file
#
print "Writing output file"
f = open('output.txt', 'w')
# Write each row
for i in range(final_rows):
# First write a formatted date-time string
f.write(datetime.datetime.strftime(absolute_time_stamps[i], "%Y-%m-%dT%H:%M:%S"))
f.write(args.d)
# write each column of data
for j in range(final_cols):
f.write(str(interpolated_downsampled_data[i,j]))
# Newline if last column
if j == final_cols - 1:
f.write("\n")
else:
f.write(args.d)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment