Created
January 10, 2013 00:40
-
-
Save alexlib/4498384 to your computer and use it in GitHub Desktop.
Reading trajAcc.#### files created by OpenPTV software (3D particle tracking velocimetry) into Pandas (www.pydata.org) DataFrame format and stores it in HDF5 (PyTables) format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "trajAcc_file_to_h5" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "%config InlineBackend.figure_format = 'svg'\nplt.figsize(10,6)\nfrom pandas import *\nimport pandas as pd\npd.set_option('repr_html', False)\npd.set_option('line_width', 120)\n\nimport time\n# from mpl_toolkits.mplot3d import axes3d\nimport glob\nimport os\nimport numpy as np", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 49 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "# Define the directory:\ndata_dir = 'test_data'\n# the files are trajAcc.\ndir_path = os.path.join(data_dir, 'trajAcc.*')\n# list the files\nfiles = glob.glob(dir_path)\n# try to read one, see how many columns are:\ntmp = read_csv(files[0],'\\t')\nlines,cols = tmp.shape\n# we need to update the options also to 47 columns and provide correct names\nif cols == 32:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'id','age']\n cols_to_read = [0,1,2,3,4,5,31] # we are going to use only x,y,z,u,v,w and age\nelif cols == 34:\n columns = ['x','y','z','u','v','w','ax','ay','az','ux','uy','uz','vx','vy','vz','wx','wy','wz',\\\n 'ut','vt','wt',\\\n 'daxdx','daxdy','daxdz',\\\n 'daydx','daydy','daydz',\\\n 'dazdx','dazdy','dazdz',\\\n 'nx','ny','nz','age']\n cols_to_read = [0,1,2,3,4,5,33] # we are going to use only x,y,z,u,v,w and age\n", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 50 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "def get_data(f):\n \"\"\" Function get_data(filename) reads the columns from the trajAcc file\n adds the extension of the file which is a frame number to the age, creating\n time sequence \"\"\"\n frame = f.split('.')[1]\n df = read_csv(f,sep='\\t',names=columns,usecols=cols_to_read)\n # df['age'].dtype = np.int\n df['t'] = df['age'].astype(np.int) + np.int(frame)\n return df", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 52 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "t = time.time()\n# Using nice trick of generator we create a list of all the data and then concatenate\n# into a single DataFrame object (see pandas tutorials)\ndf1 = concat(get_data(data_file) for data_file in files)\n# print df1.x.count()\nprint('done reading after ... %d sec ' % (time.time() - t) )\n\n ", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": "11837\ndone reading after ... 0 sec \n" | |
| } | |
| ], | |
| "prompt_number": 53 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "# we store the DataFrame into HDF5 table for later use\nstore = HDFStore('tmp.h5')\nstore['df'] = df1\nstore.close()", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 54 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "df1.describe()", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "pyout", | |
| "prompt_number": 56, | |
| "text": " x y z u v w age t\ncount 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000 11837.000000\nmean 0.007461 0.002877 -0.050775 0.008799 -0.001952 0.000680 19.658190 10020.199121\nstd 0.003269 0.002449 0.002268 0.005068 0.003025 0.005665 16.776826 16.810213\nmin -0.000795 -0.001697 -0.055244 -0.013424 -0.012890 -0.018831 0.000000 10000.000000\n25% 0.005130 0.000832 -0.052593 0.005572 -0.004035 -0.003723 6.000000 10007.000000\n50% 0.007830 0.002683 -0.050860 0.009434 -0.002110 0.000266 15.000000 10016.000000\n75% 0.010288 0.004845 -0.049000 0.012515 -0.000022 0.004214 29.000000 10029.000000\nmax 0.013838 0.008204 -0.046061 0.020057 0.008560 0.040254 87.000000 10087.000000" | |
| } | |
| ], | |
| "prompt_number": 56 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": "", | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment