Created
March 21, 2016 20:24
-
-
Save jrleeman/eda29f9f36d939276160 to your computer and use it in GitHub Desktop.
Takes data with relative time stamps, interpolates, down samples, and absolute time stamps for processing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import datetime | |
| import matplotlib.pyplot as plt | |
| import math | |
| import scipy | |
| import argparse | |
| def mean_downsample(a, args.r): | |
| pad_size = math.ceil(float(a.size)/args.r)*args.r - a.size | |
| a_padded = np.append(a, np.zeros(pad_size)*np.NaN) | |
| return scipy.nanmean(a_padded.reshape(-1, args.r), axis=1) | |
| # | |
| # Set parameters and parse arguments | |
| # | |
| desc_str = 'Interpolate, downsample, and timetamp a file with relative\ | |
| time stamps. Relative time should be in milliseconds. See\ | |
| help for more information and usage instructions.' | |
| ex_str = 'Example: python datastamp.py -t0 2016-02-12T01:10:00 -f LOG00122.TXT\ | |
| -d , -r 30 -i 2000 -u ms -tc 0 -o 31000' | |
| parser = argparse.ArgumentParser(description=desc_str, | |
| epilog='Example: ') | |
| parser.add_argument('-f', required=True, type=str, help='Filename to operate on') | |
| parser.add_argument('-r', required=True, type=int, help='Downsample factor') | |
| parser.add_argument('-d', required=True, type=str, help='Column delimiter') | |
| parser.add_argument('-i', required=True, type=float, help='Interpolation interval in milliseconds') | |
| parser.add_argument('-t0', required=True, type=str, help='Start time YYYY-MM-DDTHH:MM:SS') | |
| parser.add_argument('-tc', required=True, type=int, help='Time column, zero indexed') | |
| parser.add_argument('-o', required=True, type=float, help='Time offset in milliseconds') | |
| args = parser.parse_args() | |
| args.t0 = datetime.datetime.strptime(args.t0, "%Y-%m-%dT%H:%M:%S") | |
| print "Loading data..." | |
| def load_data(fname, tcol, delimiter, zerotime=True, printstats=False): | |
| # Load the data | |
| data = np.loadtxt(fname, delimiter=delimiter) | |
| # If requested, zero the time column | |
| if zerotime: | |
| data[:, args.tc] = data[:, tcol] - data[0, tcol] | |
| if printstats: | |
| n_data = np.shape(data)[0] | |
| file_dt = np.ediff1d(data[:, tcol]) | |
| print "\n" | |
| print "-"*70 | |
| print "Raw File" | |
| print "-"*70 | |
| print "nrows: ", np.shape(data)[0] | |
| print "ncols: ", np.shape(data)[1] | |
| print "dt max: ", np.max(file_dt) | |
| print "dt min: ", np.min(file_dt) | |
| print "dt mean: ", np.mean(file_dt) | |
| return data | |
| # Load up the data | |
| data = load_data(args.f, args.tc, args.d, printstats=True) | |
| # | |
| # Interpolation | |
| # | |
| # Calculate how many points will be in the interpolated dataset | |
| npts_interpolated = int(data[-1,args.tc]/args.i) | |
| # Create array of times at which interpolation will be done | |
| interpolated_times = np.arange(0,npts_interpolated) * args.i | |
| # Create empty array to hold interpolated data | |
| interpolated_data = np.zeros((len(interpolated_times), np.shape(data)[1])) | |
| # Interpolate each column | |
| for j in range(np.shape(data)[1]): | |
| interpolated_data[:,j] = np.interp(interpolated_times, data[:,args.tc], data[:,j]) | |
| # Print stats of interpolated data | |
| print "\n" | |
| print "-"*70 | |
| print "Interpolated" | |
| print "-"*70 | |
| print "nrows: ", np.shape(interpolated_times)[0] | |
| # | |
| # Downsampling of interpolated data | |
| # | |
| # Calculate any padding needed to get us to a number of elements divisible | |
| # by the downsampling factor so we can average effectively | |
| pad_size = math.ceil(float(npts_interpolated)/args.r)*args.r - npts_interpolated | |
| # Calculate the final numbers of rows and columns, create an array | |
| final_rows = int(np.shape(interpolated_data)[0]/args.r + pad_size) | |
| final_cols = np.shape(interpolated_data)[1] | |
| interpolated_downsampled_data = np.zeros((final_rows, final_cols)) | |
| # Downsample each column | |
| for i in range(final_cols): | |
| interpolated_downsampled_data[:,i] = mean_downsample(interpolated_data[:,i], args.r) | |
| # Apply any offset to the relative time column | |
| interpolated_downsampled_data[:,args.tc] = interpolated_downsampled_data[:,args.tc] + args.o | |
| # Convert relative time stamps to datetime objects | |
| relative_time_stamps = [datetime.timedelta(milliseconds=i) for i in interpolated_downsampled_data[:, args.tc]] | |
| # Make absolute time stamps by creating array of t0 stamp objects, then adding | |
| # the time deltas from relative times. | |
| absolute_time_stamps = np.array([args.t0 for i in xrange(len(interpolated_downsampled_data[:, args.tc]))]) | |
| absolute_time_stamps = absolute_time_stamps + relative_time_stamps | |
| # Print stats of downsampled data | |
| print "\n" | |
| print "-"*70 | |
| print "Downsampled" | |
| print "-"*70 | |
| print "nrows: ", final_rows | |
| # | |
| # Write output file | |
| # | |
| print "Writing output file" | |
| f = open('output.txt', 'w') | |
| # Write each row | |
| for i in range(final_rows): | |
| # First write a formatted date-time string | |
| f.write(datetime.datetime.strftime(absolute_time_stamps[i], "%Y-%m-%dT%H:%M:%S")) | |
| f.write(args.d) | |
| # write each column of data | |
| for j in range(final_cols): | |
| f.write(str(interpolated_downsampled_data[i,j])) | |
| # Newline if last column | |
| if j == final_cols - 1: | |
| f.write("\n") | |
| else: | |
| f.write(args.d) | |
| f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment